Given a KGX file-based KG engine, provides summary information in the form of node counts, category counts across nodes, relationship type counts, and available properties. General information about the graph is printed to the console, and a list of dataframes with this information is returned invisibly. Also returned are cats, preds, and props entries, containing lists of available categories/predicates/properties for convenient auto-completion in RStudio.

# S3 method for class 'file_engine'
summary(object, ..., quiet = FALSE)

Arguments

object

A file_engine object

...

Other parameters (not used)

quiet

Logical, whether to suppress printing of the summary

Value

A list of dataframes and named lists

Details

When applied to a file_engine, also included are node-specific and edge-specific properties.

Examples

# Using example KGX file packaged with monarchr
data(eds_marfan_kg)

# prints a readable summary and returns a list of dataframes
res <- eds_marfan_kg |> summary()
#> 
#> A KGX file-backed knowledge graph engine.
#> Total nodes:  3000 
#> Total edges:  7148 
#> 
#> Node category counts:
#>                                      category count
#>                                biolink:Entity  3000
#>                            biolink:NamedThing  3000
#>                      biolink:BiologicalEntity  2977
#>                        biolink:ThingWithTaxon  2977
#>            biolink:DiseaseOrPhenotypicFeature  2846
#>                     biolink:PhenotypicFeature  2736
#>                       biolink:PhysicalEssence   145
#>            biolink:PhysicalEssenceOrOccurrent   145
#>                         biolink:GenomicEntity   131
#>                         biolink:OntologyClass   131
#>                               biolink:Disease   110
#>                       biolink:SequenceVariant    81
#>     biolink:ChemicalEntityOrGeneOrGeneProduct    37
#>                              biolink:Genotype    27
#>                                  biolink:Gene    23
#>                     biolink:GeneOrGeneProduct    23
#>            biolink:MacromolecularMachineMixin    23
#>                        biolink:ChemicalEntity    14
#>  biolink:ChemicalEntityOrProteinOrPolypeptide    14
#>             biolink:ChemicalOrDrugOrTreatment    14
#>                       biolink:MolecularEntity    13
#> 
#> Edge type counts:
#>                                        predicate count
#>                              biolink:subclass_of  5244
#>                            biolink:has_phenotype  1709
#>                                   biolink:causes    56
#>  biolink:associated_with_increased_likelihood_of    38
#>           biolink:gene_associated_with_condition    28
#>                                 biolink:model_of    27
#>                  biolink:has_mode_of_inheritance    26
#>              biolink:genetically_associated_with    11
#>                               biolink:related_to     8
#>    biolink:treats_or_applied_or_studied_to_treat     1
#> 
#> Node property counts:
#>        property count
#>       pcategory  3000
#>     provided_by  3000
#>        category  3000
#>              id  3000
#>            name  2999
#>       namespace  2992
#>             iri  2868
#>     description  2668
#>         synonym  2320
#>            xref  1469
#>        in_taxon   120
#>  in_taxon_label   120
#>        has_gene    69
#>            type    33
#>       full_name    23
#>          symbol    23
#> 
#> Edge property counts:
#>                     property count
#>                     category  7148
#>                           id  7148
#>                  provided_by  7148
#>     primary_knowledge_source  7148
#>  aggregator_knowledge_source  7148
#>             knowledge_source  7148
#>              knowledge_level  7148
#>                   agent_type  7148
#>                       object  7148
#>                    predicate  7148
#>                      subject  7148
#>                           to  7148
#>                         from  7148
#>             original_subject  1782
#>                 has_evidence  1729
#>          frequency_qualifier  1080
#>                 publications   537
#>                    has_total   463
#>               has_percentage   453
#>                    has_count   453
#>                 has_quotient   453
#>           original_predicate    81
#>              original_object    80
#>                   qualifiers    62
#>              onset_qualifier    21
#> 
#> 
#> For more information about Biolink node (Class) and edge (Association) properties, see https://biolink.github.io/biolink-model/.
print(res)
#> $node_summary
#>                                        category count
#> 1                                biolink:Entity  3000
#> 2                            biolink:NamedThing  3000
#> 3                      biolink:BiologicalEntity  2977
#> 4                        biolink:ThingWithTaxon  2977
#> 5            biolink:DiseaseOrPhenotypicFeature  2846
#> 6                     biolink:PhenotypicFeature  2736
#> 7                       biolink:PhysicalEssence   145
#> 8            biolink:PhysicalEssenceOrOccurrent   145
#> 9                         biolink:GenomicEntity   131
#> 10                        biolink:OntologyClass   131
#> 11                              biolink:Disease   110
#> 12                      biolink:SequenceVariant    81
#> 13    biolink:ChemicalEntityOrGeneOrGeneProduct    37
#> 14                             biolink:Genotype    27
#> 15                                 biolink:Gene    23
#> 16                    biolink:GeneOrGeneProduct    23
#> 17           biolink:MacromolecularMachineMixin    23
#> 18                       biolink:ChemicalEntity    14
#> 19 biolink:ChemicalEntityOrProteinOrPolypeptide    14
#> 20            biolink:ChemicalOrDrugOrTreatment    14
#> 21                      biolink:MolecularEntity    13
#> 
#> $edge_summary
#>                                          predicate count
#> 1                              biolink:subclass_of  5244
#> 2                            biolink:has_phenotype  1709
#> 3                                   biolink:causes    56
#> 4  biolink:associated_with_increased_likelihood_of    38
#> 5           biolink:gene_associated_with_condition    28
#> 6                                 biolink:model_of    27
#> 7                  biolink:has_mode_of_inheritance    26
#> 8              biolink:genetically_associated_with    11
#> 9                               biolink:related_to     8
#> 10   biolink:treats_or_applied_or_studied_to_treat     1
#> 
#> $total_nodes
#> [1] 3000
#> 
#> $total_edges
#> [1] 7148
#> 
#> $node_properties_summary
#>          property count
#> 16      pcategory  3000
#> 11    provided_by  3000
#> 7        category  3000
#> 1              id  3000
#> 2            name  2999
#> 10      namespace  2992
#> 8             iri  2868
#> 5     description  2668
#> 6         synonym  2320
#> 9            xref  1469
#> 12       in_taxon   120
#> 4  in_taxon_label   120
#> 15       has_gene    69
#> 14           type    33
#> 13      full_name    23
#> 3          symbol    23
#> 
#> $edge_properties_summary
#>                       property count
#> 13                    category  7148
#> 12                          id  7148
#> 11                 provided_by  7148
#> 10    primary_knowledge_source  7148
#> 9  aggregator_knowledge_source  7148
#> 8             knowledge_source  7148
#> 7              knowledge_level  7148
#> 6                   agent_type  7148
#> 5                       object  7148
#> 4                    predicate  7148
#> 3                      subject  7148
#> 2                           to  7148
#> 1                         from  7148
#> 15            original_subject  1782
#> 17                has_evidence  1729
#> 16         frequency_qualifier  1080
#> 23                publications   537
#> 18                   has_total   463
#> 21              has_percentage   453
#> 20                   has_count   453
#> 19                has_quotient   453
#> 25          original_predicate    81
#> 14             original_object    80
#> 24                  qualifiers    62
#> 22             onset_qualifier    21
#> 
#> $cats
#> $cats$`biolink:Entity`
#> [1] "biolink:Entity"
#> 
#> $cats$`biolink:NamedThing`
#> [1] "biolink:NamedThing"
#> 
#> $cats$`biolink:BiologicalEntity`
#> [1] "biolink:BiologicalEntity"
#> 
#> $cats$`biolink:ThingWithTaxon`
#> [1] "biolink:ThingWithTaxon"
#> 
#> $cats$`biolink:DiseaseOrPhenotypicFeature`
#> [1] "biolink:DiseaseOrPhenotypicFeature"
#> 
#> $cats$`biolink:PhenotypicFeature`
#> [1] "biolink:PhenotypicFeature"
#> 
#> $cats$`biolink:PhysicalEssence`
#> [1] "biolink:PhysicalEssence"
#> 
#> $cats$`biolink:PhysicalEssenceOrOccurrent`
#> [1] "biolink:PhysicalEssenceOrOccurrent"
#> 
#> $cats$`biolink:GenomicEntity`
#> [1] "biolink:GenomicEntity"
#> 
#> $cats$`biolink:OntologyClass`
#> [1] "biolink:OntologyClass"
#> 
#> $cats$`biolink:Disease`
#> [1] "biolink:Disease"
#> 
#> $cats$`biolink:SequenceVariant`
#> [1] "biolink:SequenceVariant"
#> 
#> $cats$`biolink:ChemicalEntityOrGeneOrGeneProduct`
#> [1] "biolink:ChemicalEntityOrGeneOrGeneProduct"
#> 
#> $cats$`biolink:Genotype`
#> [1] "biolink:Genotype"
#> 
#> $cats$`biolink:Gene`
#> [1] "biolink:Gene"
#> 
#> $cats$`biolink:GeneOrGeneProduct`
#> [1] "biolink:GeneOrGeneProduct"
#> 
#> $cats$`biolink:MacromolecularMachineMixin`
#> [1] "biolink:MacromolecularMachineMixin"
#> 
#> $cats$`biolink:ChemicalEntity`
#> [1] "biolink:ChemicalEntity"
#> 
#> $cats$`biolink:ChemicalEntityOrProteinOrPolypeptide`
#> [1] "biolink:ChemicalEntityOrProteinOrPolypeptide"
#> 
#> $cats$`biolink:ChemicalOrDrugOrTreatment`
#> [1] "biolink:ChemicalOrDrugOrTreatment"
#> 
#> $cats$`biolink:MolecularEntity`
#> [1] "biolink:MolecularEntity"
#> 
#> 
#> $preds
#> $preds$`biolink:subclass_of`
#> [1] "biolink:subclass_of"
#> 
#> $preds$`biolink:has_phenotype`
#> [1] "biolink:has_phenotype"
#> 
#> $preds$`biolink:causes`
#> [1] "biolink:causes"
#> 
#> $preds$`biolink:associated_with_increased_likelihood_of`
#> [1] "biolink:associated_with_increased_likelihood_of"
#> 
#> $preds$`biolink:gene_associated_with_condition`
#> [1] "biolink:gene_associated_with_condition"
#> 
#> $preds$`biolink:model_of`
#> [1] "biolink:model_of"
#> 
#> $preds$`biolink:has_mode_of_inheritance`
#> [1] "biolink:has_mode_of_inheritance"
#> 
#> $preds$`biolink:genetically_associated_with`
#> [1] "biolink:genetically_associated_with"
#> 
#> $preds$`biolink:related_to`
#> [1] "biolink:related_to"
#> 
#> $preds$`biolink:treats_or_applied_or_studied_to_treat`
#> [1] "biolink:treats_or_applied_or_studied_to_treat"
#> 
#> 
#> $props
#> $props$id
#> [1] "id"
#> 
#> $props$name
#> [1] "name"
#> 
#> $props$symbol
#> [1] "symbol"
#> 
#> $props$in_taxon_label
#> [1] "in_taxon_label"
#> 
#> $props$description
#> [1] "description"
#> 
#> $props$synonym
#> [1] "synonym"
#> 
#> $props$category
#> [1] "category"
#> 
#> $props$iri
#> [1] "iri"
#> 
#> $props$xref
#> [1] "xref"
#> 
#> $props$namespace
#> [1] "namespace"
#> 
#> $props$provided_by
#> [1] "provided_by"
#> 
#> $props$in_taxon
#> [1] "in_taxon"
#> 
#> $props$full_name
#> [1] "full_name"
#> 
#> $props$type
#> [1] "type"
#> 
#> $props$has_gene
#> [1] "has_gene"
#> 
#> $props$pcategory
#> [1] "pcategory"
#> 
#> $props$from
#> [1] "from"
#> 
#> $props$to
#> [1] "to"
#> 
#> $props$subject
#> [1] "subject"
#> 
#> $props$predicate
#> [1] "predicate"
#> 
#> $props$object
#> [1] "object"
#> 
#> $props$agent_type
#> [1] "agent_type"
#> 
#> $props$knowledge_level
#> [1] "knowledge_level"
#> 
#> $props$knowledge_source
#> [1] "knowledge_source"
#> 
#> $props$aggregator_knowledge_source
#> [1] "aggregator_knowledge_source"
#> 
#> $props$primary_knowledge_source
#> [1] "primary_knowledge_source"
#> 
#> $props$original_object
#> [1] "original_object"
#> 
#> $props$original_subject
#> [1] "original_subject"
#> 
#> $props$frequency_qualifier
#> [1] "frequency_qualifier"
#> 
#> $props$has_evidence
#> [1] "has_evidence"
#> 
#> $props$has_total
#> [1] "has_total"
#> 
#> $props$has_quotient
#> [1] "has_quotient"
#> 
#> $props$has_count
#> [1] "has_count"
#> 
#> $props$has_percentage
#> [1] "has_percentage"
#> 
#> $props$onset_qualifier
#> [1] "onset_qualifier"
#> 
#> $props$publications
#> [1] "publications"
#> 
#> $props$qualifiers
#> [1] "qualifiers"
#> 
#> $props$original_predicate
#> [1] "original_predicate"
#> 
#>