Source code for modelindex.models.Metadata

from typing import Union, Dict, List

from modelindex.models.BaseModelIndex import BaseModelIndex
from modelindex.utils import lowercase_keys, full_filepath, load_any_file, merge_dicts


[docs]class Metadata(BaseModelIndex): """Metadata for a model.""" COMMON_FIELDS = [ "FLOPs", "Parameters", "Epochs", "Batch Size", "Training Data", "Training Techniques", "Training Resources", "Architecture", ]
[docs] def __init__(self, flops: Union[str, int] = None, parameters: Union[str, int] = None, epochs: Union[str, int] = None, batch_size: Union[str, int] = None, training_data: [str, List] = None, training_techniques: [str, List] = None, training_resources: str = None, architecture: [str, List] = None, _filepath: str = None, **kwargs, ): """ Args: flops (str,int): number of FLOPs parameters (str,int): total number of parameters for the model epochs (str,int): how many epochs the model was trained batch_size (str,int): batch size for the model training_data (str,list): one or a list of datasets used in training training_techniques (str,list): one or a list of training techniques training_resources (str): hardware used to train architecture (str, List): one or a list of architectures used in the model _filepath (str): path to the file where the data is coming from **kwargs: any other custom metadata """ d = { "FLOPs": flops, "Parameters": parameters, "Epochs": epochs, "Batch Size": batch_size, "Training Data": training_data, "Training Techniques": training_techniques, "Training Resources": training_resources, "Architecture": architecture, **kwargs, } # only save non-None values data = {k: v for k, v in d.items() if v is not None} super().__init__( data=data, filepath=_filepath )
def _check(self, silent=True): if not isinstance(self.data, dict) and not isinstance(self.data, list): self.check_errors.add("Metadata should be either a list or a dict")
[docs] @staticmethod def from_dict(d: Dict, _filepath: str = None): """Construct Metadata from a dict. Args: d (dict): A dictionary of values _filepath (str): path to the file where the data is coming from """ # be flexible to a common error where if isinstance(d, list) and len(d) > 0: d = merge_dicts(d) lc_keys = lowercase_keys(d) # common error: have metadata twice if len(lc_keys) == 1 and "metadata" in lc_keys: d = d[lc_keys["metadata"]] lc_keys = lowercase_keys(d) dd = d.copy() for field_name in Metadata.COMMON_FIELDS: key = field_name.lower() if key in lc_keys: dd[field_name] = dd.pop(lc_keys[key]) # try with _ instead of space in the field name if " " in field_name: key = field_name.lower().replace(" ", "_") if key in lc_keys: dd[field_name] = dd.pop(lc_keys[key]) return Metadata( _filepath=_filepath, **dd, )
[docs] @staticmethod def from_file(filepath: str = None, parent_filepath: str = None): """Load a Metadata from a file. Args: filepath (str): File from which to load the metadata parent_filepath (str): Parent filename (if file is imported from another file) """ fullpath = full_filepath(filepath, parent_filepath) raw, md_path = load_any_file(filepath, parent_filepath) d = raw if isinstance(d, dict): return Metadata.from_dict(d, fullpath) raise ValueError(f"Expected a dictionary with metadata, " f"but got something else in file at" f"'{fullpath}'")
# Getters @property def flops(self): """Get the FLOPs""" return self.data.get("FLOPs", None) @property def parameters(self): """Get number of parameters""" return self.data.get("Parameters", None) @property def epochs(self): """Get epochs""" return self.data.get("Epochs", None) @property def batch_size(self): """Get batch size""" return self.data.get("Batch Size", None) @property def training_data(self): """Get training data used""" return self.data.get("Training Data", None) @property def training_techniques(self): """Get techniques used""" return self.data.get("Training Techniques", None) @property def training_resources(self): """Get training resources used""" return self.data.get("Training Resources", None) @property def architecture(self): """Get the architecture(s) used.""" return self.data.get("Architecture", None) # Setters @flops.setter def flops(self, value): self.data["FLOPs"] = value @parameters.setter def parameters(self, value): self.data["Parameters"] = value @epochs.setter def epochs(self, value): self.data["Epochs"] = value @batch_size.setter def batch_size(self, value): self.data["Batch Size"] = value @training_data.setter def training_data(self, value): self.data["Training Data"] = value @training_techniques.setter def training_techniques(self, value): self.data["Training Techniques"] = value @training_resources.setter def training_resources(self, value): self.data["Training Resources"] = value @architecture.setter def architecture(self, value): self.data["Architecture"] = value