Given a KGX file-based KG engine, provides summary information in the form of node counts, category counts across nodes, and relationship type counts. General information about the graph is printed to the console, and a list of dataframes describing node and edge counts is returned invisibly. Also returned are cats and preds entries, containing lists of available node categories and edge predicates, respectively, for convenient auto-completion in RStudio.

# S3 method for class 'file_engine'
summary(object, ..., quiet = FALSE)

Arguments

object

A file_engine object

...

Other parameters (not used)

quiet

Logical, whether to suppress printing of the summary

Value

A list of dataframes and named lists

Examples

# Using example KGX file packaged with monarchr
filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
# prints a readable summary and returns a list of dataframes
res <- file_engine(filename) |> summary()
#> 
#> A KGX file-backed knowledge graph engine.
#> Total nodes:  3000 
#> Total edges:  7148 
#> 
#> Node category counts:
#>                                      category count
#>                                biolink:Entity  3000
#>                            biolink:NamedThing  3000
#>                      biolink:BiologicalEntity  2977
#>                        biolink:ThingWithTaxon  2977
#>            biolink:DiseaseOrPhenotypicFeature  2846
#>                     biolink:PhenotypicFeature  2736
#>                       biolink:PhysicalEssence   145
#>            biolink:PhysicalEssenceOrOccurrent   145
#>                         biolink:GenomicEntity   131
#>                         biolink:OntologyClass   131
#>                               biolink:Disease   110
#>                       biolink:SequenceVariant    81
#>     biolink:ChemicalEntityOrGeneOrGeneProduct    37
#>                              biolink:Genotype    27
#>                                  biolink:Gene    23
#>                     biolink:GeneOrGeneProduct    23
#>            biolink:MacromolecularMachineMixin    23
#>                        biolink:ChemicalEntity    14
#>  biolink:ChemicalEntityOrProteinOrPolypeptide    14
#>             biolink:ChemicalOrDrugOrTreatment    14
#>                       biolink:MolecularEntity    13
#> 
#> Edge type counts:
#>                                        predicate count
#>                              biolink:subclass_of  5244
#>                            biolink:has_phenotype  1709
#>                                   biolink:causes    56
#>  biolink:associated_with_increased_likelihood_of    38
#>           biolink:gene_associated_with_condition    28
#>                                 biolink:model_of    27
#>                  biolink:has_mode_of_inheritance    26
#>              biolink:genetically_associated_with    11
#>                               biolink:related_to     8
#>    biolink:treats_or_applied_or_studied_to_treat     1
print(res)
#> $node_summary
#>                                        category count
#> 1                                biolink:Entity  3000
#> 2                            biolink:NamedThing  3000
#> 3                      biolink:BiologicalEntity  2977
#> 4                        biolink:ThingWithTaxon  2977
#> 5            biolink:DiseaseOrPhenotypicFeature  2846
#> 6                     biolink:PhenotypicFeature  2736
#> 7                       biolink:PhysicalEssence   145
#> 8            biolink:PhysicalEssenceOrOccurrent   145
#> 9                         biolink:GenomicEntity   131
#> 10                        biolink:OntologyClass   131
#> 11                              biolink:Disease   110
#> 12                      biolink:SequenceVariant    81
#> 13    biolink:ChemicalEntityOrGeneOrGeneProduct    37
#> 14                             biolink:Genotype    27
#> 15                                 biolink:Gene    23
#> 16                    biolink:GeneOrGeneProduct    23
#> 17           biolink:MacromolecularMachineMixin    23
#> 18                       biolink:ChemicalEntity    14
#> 19 biolink:ChemicalEntityOrProteinOrPolypeptide    14
#> 20            biolink:ChemicalOrDrugOrTreatment    14
#> 21                      biolink:MolecularEntity    13
#> 
#> $edge_summary
#>                                          predicate count
#> 1                              biolink:subclass_of  5244
#> 2                            biolink:has_phenotype  1709
#> 3                                   biolink:causes    56
#> 4  biolink:associated_with_increased_likelihood_of    38
#> 5           biolink:gene_associated_with_condition    28
#> 6                                 biolink:model_of    27
#> 7                  biolink:has_mode_of_inheritance    26
#> 8              biolink:genetically_associated_with    11
#> 9                               biolink:related_to     8
#> 10   biolink:treats_or_applied_or_studied_to_treat     1
#> 
#> $total_nodes
#> [1] 3000
#> 
#> $total_edges
#> [1] 7148
#> 
#> $cats
#> $cats$`biolink:Entity`
#> [1] "biolink:Entity"
#> 
#> $cats$`biolink:NamedThing`
#> [1] "biolink:NamedThing"
#> 
#> $cats$`biolink:BiologicalEntity`
#> [1] "biolink:BiologicalEntity"
#> 
#> $cats$`biolink:ThingWithTaxon`
#> [1] "biolink:ThingWithTaxon"
#> 
#> $cats$`biolink:DiseaseOrPhenotypicFeature`
#> [1] "biolink:DiseaseOrPhenotypicFeature"
#> 
#> $cats$`biolink:PhenotypicFeature`
#> [1] "biolink:PhenotypicFeature"
#> 
#> $cats$`biolink:PhysicalEssence`
#> [1] "biolink:PhysicalEssence"
#> 
#> $cats$`biolink:PhysicalEssenceOrOccurrent`
#> [1] "biolink:PhysicalEssenceOrOccurrent"
#> 
#> $cats$`biolink:GenomicEntity`
#> [1] "biolink:GenomicEntity"
#> 
#> $cats$`biolink:OntologyClass`
#> [1] "biolink:OntologyClass"
#> 
#> $cats$`biolink:Disease`
#> [1] "biolink:Disease"
#> 
#> $cats$`biolink:SequenceVariant`
#> [1] "biolink:SequenceVariant"
#> 
#> $cats$`biolink:ChemicalEntityOrGeneOrGeneProduct`
#> [1] "biolink:ChemicalEntityOrGeneOrGeneProduct"
#> 
#> $cats$`biolink:Genotype`
#> [1] "biolink:Genotype"
#> 
#> $cats$`biolink:Gene`
#> [1] "biolink:Gene"
#> 
#> $cats$`biolink:GeneOrGeneProduct`
#> [1] "biolink:GeneOrGeneProduct"
#> 
#> $cats$`biolink:MacromolecularMachineMixin`
#> [1] "biolink:MacromolecularMachineMixin"
#> 
#> $cats$`biolink:ChemicalEntity`
#> [1] "biolink:ChemicalEntity"
#> 
#> $cats$`biolink:ChemicalEntityOrProteinOrPolypeptide`
#> [1] "biolink:ChemicalEntityOrProteinOrPolypeptide"
#> 
#> $cats$`biolink:ChemicalOrDrugOrTreatment`
#> [1] "biolink:ChemicalOrDrugOrTreatment"
#> 
#> $cats$`biolink:MolecularEntity`
#> [1] "biolink:MolecularEntity"
#> 
#> 
#> $preds
#> $preds$`biolink:subclass_of`
#> [1] "biolink:subclass_of"
#> 
#> $preds$`biolink:has_phenotype`
#> [1] "biolink:has_phenotype"
#> 
#> $preds$`biolink:causes`
#> [1] "biolink:causes"
#> 
#> $preds$`biolink:associated_with_increased_likelihood_of`
#> [1] "biolink:associated_with_increased_likelihood_of"
#> 
#> $preds$`biolink:gene_associated_with_condition`
#> [1] "biolink:gene_associated_with_condition"
#> 
#> $preds$`biolink:model_of`
#> [1] "biolink:model_of"
#> 
#> $preds$`biolink:has_mode_of_inheritance`
#> [1] "biolink:has_mode_of_inheritance"
#> 
#> $preds$`biolink:genetically_associated_with`
#> [1] "biolink:genetically_associated_with"
#> 
#> $preds$`biolink:related_to`
#> [1] "biolink:related_to"
#> 
#> $preds$`biolink:treats_or_applied_or_studied_to_treat`
#> [1] "biolink:treats_or_applied_or_studied_to_treat"
#> 
#>