import os
import pickle
import typing
import hpotk
from gpsea.model import ProteinMetadata
from ._api import ProteinMetadataService
[docs]
class ProteinAnnotationCache:
"""A class that stores or retrieves ProteinMetadata objects using pickle format
Methods:
get_annotations(protein_id:str): Searches a given data directory for a pickle file with given ID and returns ProteinMetadata
store_annotations(protein_id:str, annotation:Sequence[ProteinMetadata]): Creates a pickle file with given ID and stores the given ProteinMetadata into that file
"""
def __init__(self, datadir: str) -> None:
"""Constructs all necessary attributes for a ProteinAnnotationCache object
Args:
datadir (str): A string that references an existing directory that does or will contain all pickle files being stored
"""
if not os.path.isdir(datadir):
raise ValueError(f'datadir {datadir} must be an existing directory')
self._datadir = datadir
[docs]
def get_annotations(self, protein_id: str) -> typing.Optional[ProteinMetadata]:
"""Searches a given data directory for a pickle file with given ID and returns ProteinMetadata from file. Returns None if no file is found.
Args:
protein_id (str): The protein_id associated with the desired ProteinMetadata
"""
fpath = self._create_file_name(protein_id)
if os.path.isfile(fpath):
with open(fpath, 'rb') as fh:
return pickle.load(fh)
else:
return None
[docs]
def store_annotations(self, protein_id: str, annotation: ProteinMetadata):
"""Creates a pickle file with the given protein id in the file name. Loads the ProteinMetadata given into the file for storage.
Args:
protein_id (str): The protein_id associated with the ProteinMetadata
annotation (Sequence[ProteinMetadata]): A sequence of ProteinMetadata objects that will be stored under the given protein id
"""
fpath = self._create_file_name(protein_id)
with open(fpath, 'wb') as f:
pickle.dump(annotation, f)
def _create_file_name(self, prot_id: str) -> str:
"""Creates a file name with full location and the protein id (e.g. "/path/to/desired/directory/NP_037407.4.pickle")
Args:
prot_id (str): The protein_id associated with the ProteinMetadata
"""
fname = f'{prot_id}.pickle'
return os.path.join(self._datadir, fname)