Source code for modelindex.models.Model

import os
import copy
from typing import Dict, Union, List

from ordered_set import OrderedSet

from modelindex.models.Metadata import Metadata
from modelindex.models.BaseModelIndex import BaseModelIndex
from modelindex.models.Result import Result
from modelindex.models.ResultList import ResultList
from modelindex.utils import lowercase_keys, full_filepath, load_any_file, expand_wildcard_path, merge_lists_data


[docs]class Model(BaseModelIndex): """Model represents the ML model. """ COMMON_FIELDS = [ "Name", "Metadata", "Results", "Paper", "Code", "Weights", "Config", "README", "In Collection", "Image", ]
[docs] def __init__(self, name: str = None, metadata: Union[Dict, Metadata, str] = None, results: Union[List, ResultList, Result, Dict, str] = None, paper: Union[str, Dict] = None, code: str = None, weights: str = None, config: str = None, readme: str = None, in_collection: Union[str, List[str]] = None, image: str = None, _filepath: str = None, _path_to_readme: str=None, **kwargs, ): """ Args: name (str): Name of the model metadata (Metadata, dict, str): Metadata object, metadata dict or a filepath to the metadata file results (ResultList, Result, list, dict, str): ResultList, a single Results, a list of result dicts, a single result dict, or a filepath to the result file paper (str, dict): URL to the paper, or a structured dict with paper metadata (title, url) code (str): URL to the code snippet weights (str): URL to the pretrained weights config (str): URL to the config file readme (str): path to the README file for the model in_collection (str, List): name of the collection to which the model belongs to image (str): path or URL to an image for the model _filepath: The file path to where the data was loaded from _path_to_readme: Path to the markdown readme file if data is coming from there **kwargs: Any other custom fields """ check_errors = OrderedSet() if metadata is not None and isinstance(metadata, str): # link to a metadata file try: metadata = Metadata.from_file(metadata, _filepath) except (IOError, ValueError) as e: check_errors.add(str(e)) elif metadata is not None and not isinstance(metadata, Metadata): metadata = Metadata.from_dict(metadata, _filepath) if results is not None and isinstance(results, str): # link to 1+ result files results_list = [] for results_file in expand_wildcard_path(results, _filepath): try: results_list.append(ResultList.from_file(results_file, _filepath)) except (IOError, ValueError) as e: check_errors.add(str(e)) results = merge_lists_data(results_list) elif results is not None and not isinstance(results, ResultList): results = ResultList(results, _filepath) d = { "Name": name, "Metadata": metadata, "Results": results, "Paper": paper, "Code": code, "Weights": weights, "Config": config, "README": readme, "In Collection": in_collection, "Image": image, **kwargs, } # Only non-empty items data = {k: v for k, v in d.items() if v is not None} self._path_to_readme = _path_to_readme self._full_model = self super().__init__( data=data, filepath=_filepath, check_errors=check_errors, )
def _check(self, silent=True): if self.name is None or self.name == "": self.check_errors.add("Field 'Name' cannot be empty") if self._readme_is_filepath() and not self._path_to_readme: # check if the README exists fullpath = full_filepath(self.readme, self.filepath) if not os.path.isfile(fullpath): self.check_errors.add(f"Path to README file {self.readme} is not a valid file.") if self.image and not self.image.startswith("http"): fullpath = full_filepath(self.image, self.filepath) if not os.path.isfile(fullpath): self.check_errors.add(f"Path to Image file {self.image} is not a valid file.")
[docs] @classmethod def from_dict(cls, d: Dict, _filepath: str = None, _path_to_readme: str = None): """Create a Model from a dictionary. Args: d (dict): dictionary containing models data _filepath (str): The file path to where the data was loaded from _path_to_readme (str): Path to the README file if metadata was extracted from a README """ lc_keys = lowercase_keys(d) copy_fields = [ "name", "paper", "code", "weights", "config", "readme", "metadata", "results", "in_collection", "image", ] dd = d.copy() for field_name in copy_fields: key = field_name.lower() if key in lc_keys: dd[field_name] = dd.pop(lc_keys[key]) # try with _ instead of space in the field name if " " in field_name: key = field_name.lower().replace(" ", "_") if key in lc_keys: dd[field_name] = dd.pop(lc_keys[key]) if _path_to_readme: dd["readme"] = _path_to_readme return cls( _filepath=_filepath, _path_to_readme=_path_to_readme, **dd, )
[docs] @staticmethod def from_file(filepath: str = None, parent_filepath: str = None): """Load a Model from a file. Args: filepath (str): File from which to load the model parent_filepath (str): Parent filename (if file is imported from another file) """ fullpath = full_filepath(filepath, parent_filepath) raw, md_path = load_any_file(filepath, parent_filepath) d = raw if isinstance(raw, dict): lc_keys = lowercase_keys(raw) if "model" in lc_keys: d = raw[lc_keys["model"]] elif "models" in lc_keys: # called Model.from_file() on a model list, fallback to ModelList d = raw[lc_keys["models"]] if isinstance(d, list): from modelindex.models.ModelList import ModelList return ModelList(d, fullpath, md_path) return Model.from_dict(d, fullpath, md_path) else: raise ValueError(f"Expected a model dict, but got " f"something else in file '{fullpath}'")
def _readme_is_filepath(self): return self.readme and self.readme.endswith(".md") and len(self.readme) < 256 def build_full_model(self, col): # Builds a full model based on the parent collection # col: Collection model_full = copy.deepcopy(col) self_copy = copy.deepcopy(self) # Merge from src to dest dictionary by this key def merge_by_key(d_dest, d_src): for key in d_src.keys(): # if doesn't exist, just copy over if key not in d_dest: d_dest[key] = d_src[key] else: # if exists try to merge dicts and lists if isinstance(d_dest[key], list): if isinstance(d_src[key], list): d_dest[key].extend(d_src[key]) else: d_dest[key].append(d_src[key]) elif isinstance(d_dest[key], dict): if isinstance(d_src[key], dict): # copy values that don't exist for k, v in d_src[key]: if k not in d_dest[key]: d_dest[key][k] = v else: d_dest[key] = d_src[key] else: # overwrite if not a list or dict d_dest[key] = d_src[key] # merge all fields from this model for key, value in self_copy.data.items(): if key == "Metadata": if isinstance(model_full.metadata, Metadata) and isinstance(value, Metadata): merge_by_key(model_full.metadata.data, value.data) else: model_full.metadata = value elif key == "Results": if isinstance(model_full.results, ResultList) and isinstance(value, ResultList): model_full.results.data.extend(value.data) else: model_full.results = value else: # copy over if it doesn't exist in the collection model_full.data[key] = value self._full_model = model_full return model_full
[docs] def readme_content(self): """Get the content of the README file (instead of just the path as returned by .readme())""" if not self.readme: return None elif self._path_to_readme: with open(self.filepath, "r") as f: return f.read() elif self._readme_is_filepath(): if self.filepath: fullpath = full_filepath(self.readme, self.filepath) else: fullpath = self.readme with open(fullpath, "r") as f: return f.read() else: return self.readme
# Getters @property def name(self): """Get the model name""" return self.data.get("Name", None) @property def paper(self): """Get the model paper""" return self.data.get("Paper", None) @property def code(self): """Get the URL to code""" return self.data.get("Code", None) @property def weights(self): """Get the URL to weights""" return self.data.get("Weights", None) @property def config(self): """Get the URL to the config file""" return self.data.get("Config", None) @property def readme(self): """Get the path to the model README""" return self.data.get("README", None) @property def metadata(self): """Get the metadata as a Metadata object""" return self.data.get("Metadata", None) @property def results(self): """Get the results as a Result object""" return self.data.get("Results", None) @property def in_collection(self): """Get the name of the collection of which this model is part of.""" return self.data.get("In Collection", None) @property def image(self): """Get the path or URL to the image for the model""" return self.data.get("Image", None) @property def full_model(self): """Get the model with all the inherited properties from the collection (read-only property)""" return self._full_model # Setters @name.setter def name(self, value): self.data["Name"] = value @paper.setter def paper(self, value): self.data["Paper"] = value @code.setter def code(self, value): self.data["Code"] = value @weights.setter def weights(self, value): self.data["Weights"] = value @config.setter def config(self, value): self.data["Config"] = value @readme.setter def readme(self, value): self.data["README"] = value @in_collection.setter def in_collection(self, value): self.data["In Collection"] = value @image.setter def image(self, value): self.data["Image"] = value @metadata.setter def metadata(self, value): if value is not None and not isinstance(value, Metadata) and not isinstance(value, str): self.data["Metadata"] = Metadata.from_dict(value) else: self.data["Metadata"] = value @results.setter def results(self, value): if value is not None and not isinstance(value, ResultList) and not isinstance(value, str): self.data["Results"] = ResultList(value) else: self.data["Results"] = value