from pydantic_ai.settings import ModelSettings
from aurelian.agents.draw.draw_config import DrawDependencies
from aurelian.agents.gocam.gocam_config import GOCAMDependencies
from aurelian.agents.draw.draw_agent import draw_agent
deps = DrawDependencies()
gocam_deps = GOCAMDependencies()
import os
cborg_api_key = os.environ.get("CBORG_API_KEY")
from pydantic_ai.providers.openai import OpenAIProvider
from pydantic_ai.models.openai import OpenAIModel
# not to be confused with GO-CAM Model
ai_model = OpenAIModel(
"anthropic/claude-sonnet",
#"openai/gpt-4o",
provider=OpenAIProvider(
base_url="https://api.cborg.lbl.gov",
api_key=cborg_api_key),
)
settings = ModelSettings(
max_tokens=32000,
)
collection = gocam_deps.collection
qr = collection.find({}, limit=-1)
len(qr.rows)
from gocam.datamodel import Model
def is_causal(m: dict):
model = Model(**m)
causal_associations = []
for a in model.activities or []:
causal_associations.extend(a.causal_associations or [])
return len(causal_associations) > 2
models = [m for m in qr.rows if is_causal(m)]
len(models)
!mkdir -p go-cam-figures
import nest_asyncio
nest_asyncio.apply()
from aurelian.agents.draw.draw_agent import DrawingFeedback
import os
import asyncio
PROMPT = """
Draw the following GO-CAM as a pathway diagram. Draw this in the style of a figure
for inclusion in a journal like Cell or Nature. Be faithful to the content, but
use artistic license when drawing things such as cell components, tissue compartments,
molecular tagging, subcellular transport, etc. While you should generally not include gene
products not in the GO-CAM, fill in implicit details where completely necessary, but
mark them as not being in the source.
"""
for m in models:
model = Model(**m)
title = model.title
print(f"## {model.id} {model.title}")
# make safe for file name; replace ALL non-alphanumeric characters with "_"
title = "".join(c if c.isalnum() else "_" for c in title)
model_num = model.id.split(":")[-1]
fn = f"go-cam-figures/FIG-{model_num}-{title}.svg"
fn_json = f"{fn}.json"
# if file exists, skip
if os.path.exists(fn_json):
print(f"File {fn_json} already exists, skipping.")
with open(fn_json) as f:
data = DrawingFeedback(**json.load(f))
else:
gocam_json = model.model_dump_json(exclude_unset=True)
command = f"{PROMPT}: {gocam_json}"
#print(f"COMMAND: {command}")
try:
result = draw_agent.run_sync(command, deps=deps, model=ai_model, model_settings=settings)
except Exception as e:
print(f"Error: {e}")
continue
print(result.data)
with open(fn, "w") as f:
f.write(result.data.svg_content)
with open(fn_json, "w") as f:
f.write(result.data.model_dump_json())
with open(f"{fn}.md", "w") as f:
f.write(f"# Figure: {model.title}\n\n")
f.write(f"id: [{model.id}](https://bioregistry.io/{model.id})\n\n")
f.write(f"\n\n")
f.write(result.data.legend)
f.write("\n\n")
f.write(f"Feedback from AI on figure:\n\n```json\n{result.data.feedback.model_dump_json()}\n```\n")
print("done")
# find all reviews in go-cam-reviews/ by glob searching *md
import glob
files = glob.glob("go-cam-reviews/*.md")
#cborg_api_key
summarization_model = OpenAIModel(
"anthropic/claude-sonnet",
provider=OpenAIProvider(
base_url="https://api.cborg.lbl.gov",
api_key=cborg_api_key),
)
for fn in files:
print(fn)
content = open(fn).read()
#print(len(content))
# content = content[:200]
command = f"Summarize this review: {content}"
#result = gocam_reviewer_agent.run_sync(command, deps=deps, model=ai_model, model_settings=settings)
result = gocam_review_summarizer_agent.run_sync(command, deps=deps, model=summarization_model)
#print(result.data)
# save the json in the same dir
with open(fn + ".json", "w") as f:
f.write(result.data.model_dump_json(indent=2))
#break
print("Done")