R/summary.file_engine.R
summary.file_engine.Rd
Given a KGX file-based KG engine, provides summary information in the form of
node counts, category counts across nodes, relationship type counts, and available properties.
General information about the graph is printed to the console, and a list of
dataframes with this information is returned invisibly. Also returned
are cats
, preds
, and props
entries, containing lists of available
categories/predicates/properties for convenient auto-completion in RStudio.
# S3 method for class 'file_engine'
summary(object, ..., quiet = FALSE)
A list of dataframes and named lists
When applied to a file_engine
, also included are node-specific and edge-specific properties.
# Using example KGX file packaged with monarchr
data(eds_marfan_kg)
# prints a readable summary and returns a list of dataframes
res <- eds_marfan_kg |> summary()
#>
#> A KGX file-backed knowledge graph engine.
#> Total nodes: 3000
#> Total edges: 7148
#>
#> Node category counts:
#> category count
#> biolink:Entity 3000
#> biolink:NamedThing 3000
#> biolink:BiologicalEntity 2977
#> biolink:ThingWithTaxon 2977
#> biolink:DiseaseOrPhenotypicFeature 2846
#> biolink:PhenotypicFeature 2736
#> biolink:PhysicalEssence 145
#> biolink:PhysicalEssenceOrOccurrent 145
#> biolink:GenomicEntity 131
#> biolink:OntologyClass 131
#> biolink:Disease 110
#> biolink:SequenceVariant 81
#> biolink:ChemicalEntityOrGeneOrGeneProduct 37
#> biolink:Genotype 27
#> biolink:Gene 23
#> biolink:GeneOrGeneProduct 23
#> biolink:MacromolecularMachineMixin 23
#> biolink:ChemicalEntity 14
#> biolink:ChemicalEntityOrProteinOrPolypeptide 14
#> biolink:ChemicalOrDrugOrTreatment 14
#> biolink:MolecularEntity 13
#>
#> Edge type counts:
#> predicate count
#> biolink:subclass_of 5244
#> biolink:has_phenotype 1709
#> biolink:causes 56
#> biolink:associated_with_increased_likelihood_of 38
#> biolink:gene_associated_with_condition 28
#> biolink:model_of 27
#> biolink:has_mode_of_inheritance 26
#> biolink:genetically_associated_with 11
#> biolink:related_to 8
#> biolink:treats_or_applied_or_studied_to_treat 1
#>
#> Node property counts:
#> property count
#> pcategory 3000
#> provided_by 3000
#> category 3000
#> id 3000
#> name 2999
#> namespace 2992
#> iri 2868
#> description 2668
#> synonym 2320
#> xref 1469
#> in_taxon 120
#> in_taxon_label 120
#> has_gene 69
#> type 33
#> full_name 23
#> symbol 23
#>
#> Edge property counts:
#> property count
#> category 7148
#> id 7148
#> provided_by 7148
#> primary_knowledge_source 7148
#> aggregator_knowledge_source 7148
#> knowledge_source 7148
#> knowledge_level 7148
#> agent_type 7148
#> object 7148
#> predicate 7148
#> subject 7148
#> to 7148
#> from 7148
#> original_subject 1782
#> has_evidence 1729
#> frequency_qualifier 1080
#> publications 537
#> has_total 463
#> has_percentage 453
#> has_count 453
#> has_quotient 453
#> original_predicate 81
#> original_object 80
#> qualifiers 62
#> onset_qualifier 21
#>
#>
#> For more information about Biolink node (Class) and edge (Association) properties, see https://biolink.github.io/biolink-model/.
print(res)
#> $node_summary
#> category count
#> 1 biolink:Entity 3000
#> 2 biolink:NamedThing 3000
#> 3 biolink:BiologicalEntity 2977
#> 4 biolink:ThingWithTaxon 2977
#> 5 biolink:DiseaseOrPhenotypicFeature 2846
#> 6 biolink:PhenotypicFeature 2736
#> 7 biolink:PhysicalEssence 145
#> 8 biolink:PhysicalEssenceOrOccurrent 145
#> 9 biolink:GenomicEntity 131
#> 10 biolink:OntologyClass 131
#> 11 biolink:Disease 110
#> 12 biolink:SequenceVariant 81
#> 13 biolink:ChemicalEntityOrGeneOrGeneProduct 37
#> 14 biolink:Genotype 27
#> 15 biolink:Gene 23
#> 16 biolink:GeneOrGeneProduct 23
#> 17 biolink:MacromolecularMachineMixin 23
#> 18 biolink:ChemicalEntity 14
#> 19 biolink:ChemicalEntityOrProteinOrPolypeptide 14
#> 20 biolink:ChemicalOrDrugOrTreatment 14
#> 21 biolink:MolecularEntity 13
#>
#> $edge_summary
#> predicate count
#> 1 biolink:subclass_of 5244
#> 2 biolink:has_phenotype 1709
#> 3 biolink:causes 56
#> 4 biolink:associated_with_increased_likelihood_of 38
#> 5 biolink:gene_associated_with_condition 28
#> 6 biolink:model_of 27
#> 7 biolink:has_mode_of_inheritance 26
#> 8 biolink:genetically_associated_with 11
#> 9 biolink:related_to 8
#> 10 biolink:treats_or_applied_or_studied_to_treat 1
#>
#> $total_nodes
#> [1] 3000
#>
#> $total_edges
#> [1] 7148
#>
#> $node_properties_summary
#> property count
#> 16 pcategory 3000
#> 11 provided_by 3000
#> 7 category 3000
#> 1 id 3000
#> 2 name 2999
#> 10 namespace 2992
#> 8 iri 2868
#> 5 description 2668
#> 6 synonym 2320
#> 9 xref 1469
#> 12 in_taxon 120
#> 4 in_taxon_label 120
#> 15 has_gene 69
#> 14 type 33
#> 13 full_name 23
#> 3 symbol 23
#>
#> $edge_properties_summary
#> property count
#> 13 category 7148
#> 12 id 7148
#> 11 provided_by 7148
#> 10 primary_knowledge_source 7148
#> 9 aggregator_knowledge_source 7148
#> 8 knowledge_source 7148
#> 7 knowledge_level 7148
#> 6 agent_type 7148
#> 5 object 7148
#> 4 predicate 7148
#> 3 subject 7148
#> 2 to 7148
#> 1 from 7148
#> 15 original_subject 1782
#> 17 has_evidence 1729
#> 16 frequency_qualifier 1080
#> 23 publications 537
#> 18 has_total 463
#> 21 has_percentage 453
#> 20 has_count 453
#> 19 has_quotient 453
#> 25 original_predicate 81
#> 14 original_object 80
#> 24 qualifiers 62
#> 22 onset_qualifier 21
#>
#> $cats
#> $cats$`biolink:Entity`
#> [1] "biolink:Entity"
#>
#> $cats$`biolink:NamedThing`
#> [1] "biolink:NamedThing"
#>
#> $cats$`biolink:BiologicalEntity`
#> [1] "biolink:BiologicalEntity"
#>
#> $cats$`biolink:ThingWithTaxon`
#> [1] "biolink:ThingWithTaxon"
#>
#> $cats$`biolink:DiseaseOrPhenotypicFeature`
#> [1] "biolink:DiseaseOrPhenotypicFeature"
#>
#> $cats$`biolink:PhenotypicFeature`
#> [1] "biolink:PhenotypicFeature"
#>
#> $cats$`biolink:PhysicalEssence`
#> [1] "biolink:PhysicalEssence"
#>
#> $cats$`biolink:PhysicalEssenceOrOccurrent`
#> [1] "biolink:PhysicalEssenceOrOccurrent"
#>
#> $cats$`biolink:GenomicEntity`
#> [1] "biolink:GenomicEntity"
#>
#> $cats$`biolink:OntologyClass`
#> [1] "biolink:OntologyClass"
#>
#> $cats$`biolink:Disease`
#> [1] "biolink:Disease"
#>
#> $cats$`biolink:SequenceVariant`
#> [1] "biolink:SequenceVariant"
#>
#> $cats$`biolink:ChemicalEntityOrGeneOrGeneProduct`
#> [1] "biolink:ChemicalEntityOrGeneOrGeneProduct"
#>
#> $cats$`biolink:Genotype`
#> [1] "biolink:Genotype"
#>
#> $cats$`biolink:Gene`
#> [1] "biolink:Gene"
#>
#> $cats$`biolink:GeneOrGeneProduct`
#> [1] "biolink:GeneOrGeneProduct"
#>
#> $cats$`biolink:MacromolecularMachineMixin`
#> [1] "biolink:MacromolecularMachineMixin"
#>
#> $cats$`biolink:ChemicalEntity`
#> [1] "biolink:ChemicalEntity"
#>
#> $cats$`biolink:ChemicalEntityOrProteinOrPolypeptide`
#> [1] "biolink:ChemicalEntityOrProteinOrPolypeptide"
#>
#> $cats$`biolink:ChemicalOrDrugOrTreatment`
#> [1] "biolink:ChemicalOrDrugOrTreatment"
#>
#> $cats$`biolink:MolecularEntity`
#> [1] "biolink:MolecularEntity"
#>
#>
#> $preds
#> $preds$`biolink:subclass_of`
#> [1] "biolink:subclass_of"
#>
#> $preds$`biolink:has_phenotype`
#> [1] "biolink:has_phenotype"
#>
#> $preds$`biolink:causes`
#> [1] "biolink:causes"
#>
#> $preds$`biolink:associated_with_increased_likelihood_of`
#> [1] "biolink:associated_with_increased_likelihood_of"
#>
#> $preds$`biolink:gene_associated_with_condition`
#> [1] "biolink:gene_associated_with_condition"
#>
#> $preds$`biolink:model_of`
#> [1] "biolink:model_of"
#>
#> $preds$`biolink:has_mode_of_inheritance`
#> [1] "biolink:has_mode_of_inheritance"
#>
#> $preds$`biolink:genetically_associated_with`
#> [1] "biolink:genetically_associated_with"
#>
#> $preds$`biolink:related_to`
#> [1] "biolink:related_to"
#>
#> $preds$`biolink:treats_or_applied_or_studied_to_treat`
#> [1] "biolink:treats_or_applied_or_studied_to_treat"
#>
#>
#> $props
#> $props$id
#> [1] "id"
#>
#> $props$name
#> [1] "name"
#>
#> $props$symbol
#> [1] "symbol"
#>
#> $props$in_taxon_label
#> [1] "in_taxon_label"
#>
#> $props$description
#> [1] "description"
#>
#> $props$synonym
#> [1] "synonym"
#>
#> $props$category
#> [1] "category"
#>
#> $props$iri
#> [1] "iri"
#>
#> $props$xref
#> [1] "xref"
#>
#> $props$namespace
#> [1] "namespace"
#>
#> $props$provided_by
#> [1] "provided_by"
#>
#> $props$in_taxon
#> [1] "in_taxon"
#>
#> $props$full_name
#> [1] "full_name"
#>
#> $props$type
#> [1] "type"
#>
#> $props$has_gene
#> [1] "has_gene"
#>
#> $props$pcategory
#> [1] "pcategory"
#>
#> $props$from
#> [1] "from"
#>
#> $props$to
#> [1] "to"
#>
#> $props$subject
#> [1] "subject"
#>
#> $props$predicate
#> [1] "predicate"
#>
#> $props$object
#> [1] "object"
#>
#> $props$agent_type
#> [1] "agent_type"
#>
#> $props$knowledge_level
#> [1] "knowledge_level"
#>
#> $props$knowledge_source
#> [1] "knowledge_source"
#>
#> $props$aggregator_knowledge_source
#> [1] "aggregator_knowledge_source"
#>
#> $props$primary_knowledge_source
#> [1] "primary_knowledge_source"
#>
#> $props$original_object
#> [1] "original_object"
#>
#> $props$original_subject
#> [1] "original_subject"
#>
#> $props$frequency_qualifier
#> [1] "frequency_qualifier"
#>
#> $props$has_evidence
#> [1] "has_evidence"
#>
#> $props$has_total
#> [1] "has_total"
#>
#> $props$has_quotient
#> [1] "has_quotient"
#>
#> $props$has_count
#> [1] "has_count"
#>
#> $props$has_percentage
#> [1] "has_percentage"
#>
#> $props$onset_qualifier
#> [1] "onset_qualifier"
#>
#> $props$publications
#> [1] "publications"
#>
#> $props$qualifiers
#> [1] "qualifiers"
#>
#> $props$original_predicate
#> [1] "original_predicate"
#>
#>