BOOMER-PY Tutorial: Probabilistic Ontological Reasoning
This tutorial walks through the core functionality of BOOMER-PY, a Bayesian OWL Ontology Merger for probabilistic reasoning over knowledge bases.
Table of Contents
1. Setup and Basic Concepts
BOOMER-PY enables reasoning over probabilistic ontological statements. Key concepts: - Knowledge Base (KB): Collection of facts and probabilistic facts - Facts: Logical assertions about relationships - Probabilistic Facts (PFacts): Facts with assigned probabilities - Reasoning: Finding consistent interpretations of conflicting assertions
Note on Timeouts: This tutorial uses a default timeout of 60 seconds for all search operations to ensure the notebook executes in a reasonable time. For production use, you may want to adjust timeouts based on your specific needs and dataset complexity.
# Import core BOOMER-PY components
from boomer.model import KB, PFact, EquivalentTo, SubClassOf, ProperSubClassOf
from boomer.search import solve, SearchConfig
from boomer.renderers.markdown_renderer import MarkdownRenderer
from boomer.io import ptable_to_kb
# Create a default config with reasonable timeout for notebooks
# This prevents notebooks from running forever during execution
DEFAULT_CONFIG = SearchConfig(timeout_seconds=60)
print("BOOMER-PY imported successfully!")
Create a simple knowledge base with probabilistic facts
# Create a simple knowledge base with probabilistic facts
simple_kb = KB(
name="Animal Classification",
description="Mapping common names to scientific names",
pfacts=[
PFact(fact=EquivalentTo(sub="cat", equivalent="Felix"), prob=0.9), # High confidence mapping
PFact(fact=EquivalentTo(sub="dog", equivalent="Canus"), prob=0.9), # High confidence mapping
PFact(fact=EquivalentTo(sub="cat", equivalent="Canus"), prob=0.1), # Low confidence (likely wrong)
]
)
print(f"Knowledge Base: {simple_kb.name}")
print(f"Number of probabilistic facts: {len(simple_kb.pfacts)}")
for i, pfact in enumerate(simple_kb.pfacts):
print(f" {i+1}. {pfact.fact} (probability: {pfact.prob})")
from boomer.model import NotInSubsumptionWith, MemberOfDisjointGroup
# Different types of facts
facts_demo = KB(
name="Fact Types Demo",
pfacts=[
# Equivalence: A ≡ B
PFact(fact=EquivalentTo(sub="cat", equivalent="domestic_cat"), prob=0.95),
# Subsumption: A ⊆ B
PFact(fact=SubClassOf(sub="cat", sup="mammal"), prob=0.99),
# Proper subsumption: A ⊂ B (A is subset but not equal)
PFact(fact=ProperSubClassOf(sub="kitten", sup="cat"), prob=0.98),
# No subsumption relationship
PFact(fact=NotInSubsumptionWith(sub="cat", sibling="plant"), prob=0.99),
# Disjoint groups
PFact(fact=MemberOfDisjointGroup(sub="cat", group="vertebrates"), prob=0.99),
PFact(fact=MemberOfDisjointGroup(sub="oak_tree", group="plants"), prob=0.99),
]
)
print("Fact types:")
for pfact in facts_demo.pfacts:
fact_type = type(pfact.fact).__name__
print(f" {fact_type}: {pfact.fact} (p={pfact.prob})")
4. Probabilistic Facts and Reasoning
The core power of BOOMER-PY is handling conflicting probabilistic assertions. Let's solve our simple example:
# Solve the simple knowledge base with timeout
solution = solve(simple_kb, config=DEFAULT_CONFIG)
print(f"Solution found with confidence: {solution.confidence:.3f}")
print(f"Prior probability: {solution.prior_prob:.6f}")
print(f"Posterior probability: {solution.posterior_prob:.6f}")
print(f"Search statistics: {solution.number_of_combinations} combinations explored")
print(f"Time elapsed: {solution.time_elapsed:.3f} seconds")
print()
print("Resolved facts:")
for spf in solution.solved_pfacts:
status = "✓ TRUE" if spf.truth_value else "✗ FALSE"
print(f" {status}: {spf.pfact.fact} (posterior: {spf.posterior_prob:.3f})")
More complex reasoning scenario
# More complex reasoning scenario
complex_kb = KB(
name="Taxonomy Alignment",
description="Aligning multiple classification systems",
pfacts=[
# System A to System B mappings
PFact(fact=EquivalentTo(sub="A1", equivalent="B1"), prob=0.8),
PFact(fact=EquivalentTo(sub="A1", equivalent="B2"), prob=0.3),
PFact(fact=EquivalentTo(sub="A2", equivalent="B2"), prob=0.9),
PFact(fact=EquivalentTo(sub="A2", equivalent="B3"), prob=0.2),
# Hierarchical relationships within System A
PFact(fact=SubClassOf(sub="A1", sup="A_parent"), prob=0.95),
PFact(fact=SubClassOf(sub="A2", sup="A_parent"), prob=0.95),
# Hierarchical relationships within System B
PFact(fact=SubClassOf(sub="B1", sup="B_parent"), prob=0.95),
PFact(fact=SubClassOf(sub="B2", sup="B_parent"), prob=0.95),
PFact(fact=SubClassOf(sub="B3", sup="B_parent"), prob=0.95),
]
)
# Use timeout config for complex solution
complex_solution = solve(complex_kb, config=DEFAULT_CONFIG)
print(f"Complex solution confidence: {complex_solution.confidence:.3f}")
print(f"Satisfiable combinations: {complex_solution.number_of_satisfiable_combinations}")
print()
# Filter for high-confidence mappings
high_confidence_facts = [
spf for spf in complex_solution.solved_pfacts
if spf.truth_value and spf.posterior_prob > 0.7
]
print("High-confidence mappings (>0.7):")
for spf in high_confidence_facts:
print(f" ✓ {spf.pfact.fact} (posterior: {spf.posterior_prob:.3f})")
Create a sample PTable
# Create a sample PTable
ptable_content = """subject\tobject\tP(sub_sub_obj)\tP(obj_sub_sub)\tP(equiv)\tP(disjoint)
cat\tFelix\t0.05\t0.95\t0.90\t0.05
dog\tCanus\t0.05\t0.95\t0.85\t0.10
cat\tCanus\t0.01\t0.01\t0.05\t0.90
mouse\tMus\t0.05\t0.95\t0.80\t0.15"""
# Save to file
with open('/tmp/example.ptable.tsv', 'w') as f:
f.write(ptable_content)
# Load PTable into KB
ptable_kb = ptable_to_kb(
'/tmp/example.ptable.tsv',
name="Animal Species Mapping",
description="Common names to scientific genus mapping"
)
print(f"PTable KB: {ptable_kb.name}")
print(f"Loaded {len(ptable_kb.pfacts)} probabilistic facts from PTable")
print()
# Solve the PTable with timeout config
ptable_solution = solve(ptable_kb, config=DEFAULT_CONFIG)
print(f"PTable solution confidence: {ptable_solution.confidence:.3f}")
# Show equivalence mappings
equivalences = [
spf for spf in ptable_solution.solved_pfacts
if spf.truth_value and isinstance(spf.pfact.fact, EquivalentTo)
]
print("\nConfirmed equivalences:")
for spf in equivalences:
print(f" {spf.pfact.fact.sub} ≡ {spf.pfact.fact.equivalent} (p={spf.posterior_prob:.3f})")
7. Rendering Results
BOOMER-PY provides flexible rendering of solutions:
# Custom analysis of solution
def analyze_solution(solution):
"""Custom analysis of a BOOMER solution"""
true_facts = [spf for spf in solution.solved_pfacts if spf.truth_value]
false_facts = [spf for spf in solution.solved_pfacts if not spf.truth_value]
print(f"Solution Analysis:")
print(f" Confidence: {solution.confidence:.3f}")
print(f" Facts accepted: {len(true_facts)}")
print(f" Facts rejected: {len(false_facts)}")
print(f" Search time: {solution.time_elapsed:.3f}s")
# Group by fact type
fact_types = {}
for spf in true_facts:
fact_type = type(spf.pfact.fact).__name__
if fact_type not in fact_types:
fact_types[fact_type] = []
fact_types[fact_type].append(spf)
print("\nAccepted facts by type:")
for fact_type, facts in fact_types.items():
print(f" {fact_type}: {len(facts)} facts")
for spf in facts[:3]: # Show first 3
print(f" - {spf.pfact.fact} (p={spf.posterior_prob:.3f})")
if len(facts) > 3:
print(f" ... and {len(facts)-3} more")
analyze_solution(ptable_solution)
Simulate disease ontology mapping
# Simulate disease ontology mapping
disease_kb = KB(
name="Disease Classification Mapping",
description="Mapping MONDO to ICD-10 disease codes",
pfacts=[
# High-confidence mappings
PFact(fact=EquivalentTo(sub="MONDO:0005015", equivalent="ICD10:E10"), prob=0.95), # Diabetes
PFact(fact=EquivalentTo(sub="MONDO:0005233", equivalent="ICD10:C78"), prob=0.90), # Cancer
# Ambiguous mappings - one MONDO term could map to multiple ICD codes
PFact(fact=EquivalentTo(sub="MONDO:0005148", equivalent="ICD10:I25.1"), prob=0.7), # Heart disease option 1
PFact(fact=EquivalentTo(sub="MONDO:0005148", equivalent="ICD10:I25.9"), prob=0.6), # Heart disease option 2
# Hierarchical relationships within MONDO
PFact(fact=SubClassOf(sub="MONDO:0005015", sup="MONDO:0005066"), prob=0.99), # Diabetes is metabolic disease
PFact(fact=SubClassOf(sub="MONDO:0005148", sup="MONDO:0005267"), prob=0.99), # Heart disease is cardiovascular
# Hierarchical relationships within ICD-10
PFact(fact=SubClassOf(sub="ICD10:E10", sup="ICD10:E00-E89"), prob=0.99), # Endocrine chapter
PFact(fact=SubClassOf(sub="ICD10:I25.1", sup="ICD10:I00-I99"), prob=0.99), # Circulatory chapter
PFact(fact=SubClassOf(sub="ICD10:I25.9", sup="ICD10:I00-I99"), prob=0.99), # Circulatory chapter
# Cross-system hierarchical mappings
PFact(fact=EquivalentTo(sub="MONDO:0005066", equivalent="ICD10:E00-E89"), prob=0.8), # Metabolic diseases
PFact(fact=EquivalentTo(sub="MONDO:0005267", equivalent="ICD10:I00-I99"), prob=0.8), # Cardiovascular diseases
]
)
# Use timeout config for disease mapping
disease_solution = solve(disease_kb, config=DEFAULT_CONFIG)
print(f"Disease mapping solution confidence: {disease_solution.confidence:.3f}")
print()
# Analyze the mappings
confirmed_mappings = [
spf for spf in disease_solution.solved_pfacts
if spf.truth_value and isinstance(spf.pfact.fact, EquivalentTo) and spf.posterior_prob > 0.8
]
print("High-confidence disease mappings:")
for spf in confirmed_mappings:
mondo_term = spf.pfact.fact.sub if spf.pfact.fact.sub.startswith('MONDO') else spf.pfact.fact.equivalent
icd_term = spf.pfact.fact.equivalent if spf.pfact.fact.sub.startswith('MONDO') else spf.pfact.fact.sub
print(f" {mondo_term} → {icd_term} (confidence: {spf.posterior_prob:.3f})")
# Check for rejected mappings (potential conflicts)
rejected_mappings = [
spf for spf in disease_solution.solved_pfacts
if not spf.truth_value and isinstance(spf.pfact.fact, EquivalentTo)
]
if rejected_mappings:
print("\nRejected mappings (due to conflicts):")
for spf in rejected_mappings:
print(f" ✗ {spf.pfact.fact} (would have posterior: {spf.posterior_prob:.3f})")
9. Advanced Configuration
For complex problems, you can configure the search process:
from boomer.search import SearchConfig
# Configure search parameters
config = SearchConfig(
max_iterations=50000, # Limit search iterations
max_candidate_solutions=1000, # Limit candidates to explore
timeout_seconds=10, # Time limit
reasoner_class="boomer.reasoners.nx_reasoner.NxReasoner" # Use NetworkX reasoner
)
# Solve with custom configuration
configured_solution = solve(disease_kb, config=config)
print(f"Configured search results:")
print(f" Confidence: {configured_solution.confidence:.3f}")
print(f" Time elapsed: {configured_solution.time_elapsed:.3f}s")
print(f" Iterations: {configured_solution.number_of_combinations}")
print(f" Satisfiable combinations: {configured_solution.number_of_satisfiable_combinations}")
# Performance comparison
import time
def compare_search_strategies(kb):
"""Compare different search configurations"""
strategies = [
("Default", SearchConfig(timeout_seconds=10)),
("Fast", SearchConfig(max_iterations=1000, timeout_seconds=5)),
("Thorough", SearchConfig(max_iterations=100000, timeout_seconds=20)),
]
results = []
for name, config in strategies:
start_time = time.time()
try:
solution = solve(kb, config=config)
elapsed = time.time() - start_time
results.append({
'strategy': name,
'confidence': solution.confidence,
'time': elapsed,
'combinations': solution.number_of_combinations,
'satisfiable': solution.number_of_satisfiable_combinations
})
except Exception as e:
results.append({
'strategy': name,
'error': str(e)
})
return results
# Compare strategies on our disease example
comparison = compare_search_strategies(disease_kb)
print("Search strategy comparison:")
for result in comparison:
if 'error' in result:
print(f" {result['strategy']}: ERROR - {result['error']}")
else:
print(f" {result['strategy']}: confidence={result['confidence']:.3f}, "
f"time={result['time']:.3f}s, combinations={result['combinations']}")
Summary
This tutorial covered:
- Basic Usage: Creating knowledge bases with probabilistic facts
- Fact Types: Different logical relationships (equivalence, subsumption, disjointness)
- Reasoning: How BOOMER-PY finds consistent interpretations
- Search Process: Exploring combinations and calculating probabilities
- Real Applications: Disease ontology mapping example
- Configuration: Tuning search parameters for performance
BOOMER-PY is particularly useful for: - Ontology alignment: Mapping terms between different knowledge systems - Knowledge integration: Combining information from multiple sources - Uncertainty handling: Reasoning with probabilistic assertions - Conflict resolution: Finding most likely consistent interpretations
For more details, see the BOOMER-PY documentation and explore the example datasets in the repository.