R/summary.file_engine.R
summary.file_engine.Rd
Given a KGX file-based KG engine, provides summary information in the form of
node counts, category counts across nodes, and relationship type counts.
General information about the graph is printed to the console, and a list of
dataframes describing node and edge counts is returned invisibly. Also returned
are cats
and preds
entries, containing lists of available node categories and
edge predicates, respectively, for convenient auto-completion in RStudio.
# S3 method for class 'file_engine'
summary(object, ..., quiet = FALSE)
A list of dataframes and named lists
# Using example KGX file packaged with monarchr
filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
# prints a readable summary and returns a list of dataframes
res <- file_engine(filename) |> summary()
#>
#> A KGX file-backed knowledge graph engine.
#> Total nodes: 3000
#> Total edges: 7148
#>
#> Node category counts:
#> category count
#> biolink:Entity 3000
#> biolink:NamedThing 3000
#> biolink:BiologicalEntity 2977
#> biolink:ThingWithTaxon 2977
#> biolink:DiseaseOrPhenotypicFeature 2846
#> biolink:PhenotypicFeature 2736
#> biolink:PhysicalEssence 145
#> biolink:PhysicalEssenceOrOccurrent 145
#> biolink:GenomicEntity 131
#> biolink:OntologyClass 131
#> biolink:Disease 110
#> biolink:SequenceVariant 81
#> biolink:ChemicalEntityOrGeneOrGeneProduct 37
#> biolink:Genotype 27
#> biolink:Gene 23
#> biolink:GeneOrGeneProduct 23
#> biolink:MacromolecularMachineMixin 23
#> biolink:ChemicalEntity 14
#> biolink:ChemicalEntityOrProteinOrPolypeptide 14
#> biolink:ChemicalOrDrugOrTreatment 14
#> biolink:MolecularEntity 13
#>
#> Edge type counts:
#> predicate count
#> biolink:subclass_of 5244
#> biolink:has_phenotype 1709
#> biolink:causes 56
#> biolink:associated_with_increased_likelihood_of 38
#> biolink:gene_associated_with_condition 28
#> biolink:model_of 27
#> biolink:has_mode_of_inheritance 26
#> biolink:genetically_associated_with 11
#> biolink:related_to 8
#> biolink:treats_or_applied_or_studied_to_treat 1
print(res)
#> $node_summary
#> category count
#> 1 biolink:Entity 3000
#> 2 biolink:NamedThing 3000
#> 3 biolink:BiologicalEntity 2977
#> 4 biolink:ThingWithTaxon 2977
#> 5 biolink:DiseaseOrPhenotypicFeature 2846
#> 6 biolink:PhenotypicFeature 2736
#> 7 biolink:PhysicalEssence 145
#> 8 biolink:PhysicalEssenceOrOccurrent 145
#> 9 biolink:GenomicEntity 131
#> 10 biolink:OntologyClass 131
#> 11 biolink:Disease 110
#> 12 biolink:SequenceVariant 81
#> 13 biolink:ChemicalEntityOrGeneOrGeneProduct 37
#> 14 biolink:Genotype 27
#> 15 biolink:Gene 23
#> 16 biolink:GeneOrGeneProduct 23
#> 17 biolink:MacromolecularMachineMixin 23
#> 18 biolink:ChemicalEntity 14
#> 19 biolink:ChemicalEntityOrProteinOrPolypeptide 14
#> 20 biolink:ChemicalOrDrugOrTreatment 14
#> 21 biolink:MolecularEntity 13
#>
#> $edge_summary
#> predicate count
#> 1 biolink:subclass_of 5244
#> 2 biolink:has_phenotype 1709
#> 3 biolink:causes 56
#> 4 biolink:associated_with_increased_likelihood_of 38
#> 5 biolink:gene_associated_with_condition 28
#> 6 biolink:model_of 27
#> 7 biolink:has_mode_of_inheritance 26
#> 8 biolink:genetically_associated_with 11
#> 9 biolink:related_to 8
#> 10 biolink:treats_or_applied_or_studied_to_treat 1
#>
#> $total_nodes
#> [1] 3000
#>
#> $total_edges
#> [1] 7148
#>
#> $cats
#> $cats$`biolink:Entity`
#> [1] "biolink:Entity"
#>
#> $cats$`biolink:NamedThing`
#> [1] "biolink:NamedThing"
#>
#> $cats$`biolink:BiologicalEntity`
#> [1] "biolink:BiologicalEntity"
#>
#> $cats$`biolink:ThingWithTaxon`
#> [1] "biolink:ThingWithTaxon"
#>
#> $cats$`biolink:DiseaseOrPhenotypicFeature`
#> [1] "biolink:DiseaseOrPhenotypicFeature"
#>
#> $cats$`biolink:PhenotypicFeature`
#> [1] "biolink:PhenotypicFeature"
#>
#> $cats$`biolink:PhysicalEssence`
#> [1] "biolink:PhysicalEssence"
#>
#> $cats$`biolink:PhysicalEssenceOrOccurrent`
#> [1] "biolink:PhysicalEssenceOrOccurrent"
#>
#> $cats$`biolink:GenomicEntity`
#> [1] "biolink:GenomicEntity"
#>
#> $cats$`biolink:OntologyClass`
#> [1] "biolink:OntologyClass"
#>
#> $cats$`biolink:Disease`
#> [1] "biolink:Disease"
#>
#> $cats$`biolink:SequenceVariant`
#> [1] "biolink:SequenceVariant"
#>
#> $cats$`biolink:ChemicalEntityOrGeneOrGeneProduct`
#> [1] "biolink:ChemicalEntityOrGeneOrGeneProduct"
#>
#> $cats$`biolink:Genotype`
#> [1] "biolink:Genotype"
#>
#> $cats$`biolink:Gene`
#> [1] "biolink:Gene"
#>
#> $cats$`biolink:GeneOrGeneProduct`
#> [1] "biolink:GeneOrGeneProduct"
#>
#> $cats$`biolink:MacromolecularMachineMixin`
#> [1] "biolink:MacromolecularMachineMixin"
#>
#> $cats$`biolink:ChemicalEntity`
#> [1] "biolink:ChemicalEntity"
#>
#> $cats$`biolink:ChemicalEntityOrProteinOrPolypeptide`
#> [1] "biolink:ChemicalEntityOrProteinOrPolypeptide"
#>
#> $cats$`biolink:ChemicalOrDrugOrTreatment`
#> [1] "biolink:ChemicalOrDrugOrTreatment"
#>
#> $cats$`biolink:MolecularEntity`
#> [1] "biolink:MolecularEntity"
#>
#>
#> $preds
#> $preds$`biolink:subclass_of`
#> [1] "biolink:subclass_of"
#>
#> $preds$`biolink:has_phenotype`
#> [1] "biolink:has_phenotype"
#>
#> $preds$`biolink:causes`
#> [1] "biolink:causes"
#>
#> $preds$`biolink:associated_with_increased_likelihood_of`
#> [1] "biolink:associated_with_increased_likelihood_of"
#>
#> $preds$`biolink:gene_associated_with_condition`
#> [1] "biolink:gene_associated_with_condition"
#>
#> $preds$`biolink:model_of`
#> [1] "biolink:model_of"
#>
#> $preds$`biolink:has_mode_of_inheritance`
#> [1] "biolink:has_mode_of_inheritance"
#>
#> $preds$`biolink:genetically_associated_with`
#> [1] "biolink:genetically_associated_with"
#>
#> $preds$`biolink:related_to`
#> [1] "biolink:related_to"
#>
#> $preds$`biolink:treats_or_applied_or_studied_to_treat`
#> [1] "biolink:treats_or_applied_or_studied_to_treat"
#>
#>