Skip to content

GO CAM Figures

The results of this can be seen here: GO-CAM Reviews

Example:

img

from pydantic_ai.settings import ModelSettings

from aurelian.agents.draw.draw_config import DrawDependencies
from aurelian.agents.gocam.gocam_config import GOCAMDependencies
from aurelian.agents.draw.draw_agent import draw_agent

deps = DrawDependencies() 
gocam_deps = GOCAMDependencies()
/Users/cjm/repos/aurelian/.venv/lib/python3.11/site-packages/pydantic/_internal/_config.py:345: UserWarning: Valid config keys have changed in V2:
* 'schema_extra' has been renamed to 'json_schema_extra'
  warnings.warn(message, UserWarning)


import os
cborg_api_key = os.environ.get("CBORG_API_KEY")


from pydantic_ai.providers.openai import OpenAIProvider
from pydantic_ai.models.openai import OpenAIModel


# not to be confused with GO-CAM Model
ai_model = OpenAIModel(
    "anthropic/claude-sonnet",
    #"openai/gpt-4o",
    provider=OpenAIProvider(
        base_url="https://api.cborg.lbl.gov",
        api_key=cborg_api_key),
)

settings = ModelSettings(
    max_tokens=32000,
)
collection = gocam_deps.collection
['gocams']

qr = collection.find({}, limit=-1) 
len(qr.rows)
944
from gocam.datamodel import Model


def is_causal(m: dict):
    model = Model(**m)
    causal_associations = []
    for a in model.activities or []:
        causal_associations.extend(a.causal_associations or [])
    return len(causal_associations) > 2

models = [m for m in qr.rows if is_causal(m)]
len(models)
740
!mkdir -p go-cam-figures
import nest_asyncio
nest_asyncio.apply()
from aurelian.agents.draw.draw_agent import DrawingFeedback
import os
import asyncio

PROMPT = """
Draw the following GO-CAM as a pathway diagram. Draw this in the style of a figure
for inclusion in a journal like Cell or Nature. Be faithful to the content, but
use artistic license when drawing things such as cell components, tissue compartments,
molecular tagging, subcellular transport, etc. While you should generally not include gene
products not in the GO-CAM, fill in implicit details where completely necessary, but
mark them as not being in the source.
"""

for m in models:
    model = Model(**m)
    title = model.title
    print(f"## {model.id} {model.title}")
    # make safe for file name; replace ALL non-alphanumeric characters with "_"
    title = "".join(c if c.isalnum() else "_" for c in title)
    model_num = model.id.split(":")[-1]
    fn = f"go-cam-figures/FIG-{model_num}-{title}.svg"
    fn_json = f"{fn}.json"
    # if file exists, skip
    if os.path.exists(fn_json):
        print(f"File {fn_json} already exists, skipping.")
        with open(fn_json) as f:
            data = DrawingFeedback(**json.load(f))
    else:

        gocam_json = model.model_dump_json(exclude_unset=True)
        command = f"{PROMPT}: {gocam_json}"
        #print(f"COMMAND: {command}")

        try:
            result = draw_agent.run_sync(command, deps=deps, model=ai_model, model_settings=settings)
        except Exception as e:
            print(f"Error: {e}")
            continue
        print(result.data)

        with open(fn, "w") as f:
            f.write(result.data.svg_content)
        with open(fn_json, "w") as f:
            f.write(result.data.model_dump_json())
    with open(f"{fn}.md", "w") as f:
        f.write(f"# Figure: {model.title}\n\n")
        f.write(f"id: [{model.id}](https://bioregistry.io/{model.id})\n\n")
        f.write(f"![img](FIG-{model_num}-{title}.svg)\n\n")
        f.write(result.data.legend)
        f.write("\n\n")
        f.write(f"Feedback from AI on figure:\n\n```json\n{result.data.feedback.model_dump_json()}\n```\n")
## gomodel:56170d5200000012 kctd10 in heart development PMID:24430697 

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[10], line 34
     31 #print(f"COMMAND: {command}")
     33 try:
---> 34     result = draw_agent.run_sync(command, deps=deps, model=ai_model, model_settings=settings)
     35 except Exception as e:
     36     print(f"Error: {e}")

File ~/repos/aurelian/.venv/lib/python3.11/site-packages/pydantic_ai/agent.py:558, in Agent.run_sync(self, user_prompt, result_type, message_history, model, deps, model_settings, usage_limits, usage, infer_name)
    556 if infer_name and self.name is None:
    557     self._infer_name(inspect.currentframe())
--> 558 return get_event_loop().run_until_complete(
    559     self.run(
    560         user_prompt,
    561         result_type=result_type,
    562         message_history=message_history,
    563         model=model,
    564         deps=deps,
    565         model_settings=model_settings,
    566         usage_limits=usage_limits,
    567         usage=usage,
    568         infer_name=False,
    569     )
    570 )

File ~/repos/aurelian/.venv/lib/python3.11/site-packages/nest_asyncio.py:92, in _patch_loop.<locals>.run_until_complete(self, future)
     90     f._log_destroy_pending = False
     91 while not f.done():
---> 92     self._run_once()
     93     if self._stopping:
     94         break

File ~/repos/aurelian/.venv/lib/python3.11/site-packages/nest_asyncio.py:115, in _patch_loop.<locals>._run_once(self)
    108     heappop(scheduled)
    110 timeout = (
    111     0 if ready or self._stopping
    112     else min(max(
    113         scheduled[0]._when - self.time(), 0), 86400) if scheduled
    114     else None)
--> 115 event_list = self._selector.select(timeout)
    116 self._process_events(event_list)
    118 end_time = self.time() + self._clock_resolution

File /opt/homebrew/Cellar/python@3.11/3.11.9/Frameworks/Python.framework/Versions/3.11/lib/python3.11/selectors.py:566, in KqueueSelector.select(self, timeout)
    564 ready = []
    565 try:
--> 566     kev_list = self._selector.control(None, max_ev, timeout)
    567 except InterruptedError:
    568     return ready

KeyboardInterrupt: 
Judging drawing for: A pathway diagram illustrating the role of kctd10 in heart development, featuring protein interactions and gene regulation that occur in the zebrafish heart development process, particularly showing how kctd10 negatively regulates tbx5a which positively regulates has2 in atrioventricular canal development.
Converting SVG to PNG
Judging drawing for: An improved pathway diagram illustrating the role of kctd10 in zebrafish heart development, showing how kctd10 protein binding negatively regulates tbx5a transcription factor activity, which in turn positively regulates has2 expression, all within the context of regionalization and atrioventricular canal development. The diagram includes a cellular context with a clearly marked nucleus where kctd10 and tbx5a function, and includes an improved layout that minimizes crossing arrows.
Converting SVG to PNG

print("done")
done

# find all reviews in go-cam-reviews/ by glob searching *md
import glob
files = glob.glob("go-cam-reviews/*.md")
#cborg_api_key
summarization_model = OpenAIModel(
    "anthropic/claude-sonnet",
    provider=OpenAIProvider(
        base_url="https://api.cborg.lbl.gov",
        api_key=cborg_api_key),
)
for fn in files:
    print(fn)
    content = open(fn).read()
    #print(len(content))
    # content = content[:200]
    command = f"Summarize this review: {content}"
    #result = gocam_reviewer_agent.run_sync(command, deps=deps, model=ai_model, model_settings=settings)
    result = gocam_review_summarizer_agent.run_sync(command, deps=deps, model=summarization_model)
    #print(result.data)
    # save the json in the same dir
    with open(fn + ".json", "w") as f:
        f.write(result.data.model_dump_json(indent=2))
    #break
print("Done")