import logging
from typing import Any, Dict, List, Literal, Optional
from _collections_abc import dict_keys, dict_values
logger = logging.getLogger(__name__)
[docs]
class DeepChemModelConfigMapper:
"""Mappings between models and their configuration in Deepchem.
This class contains mappings between the models and their configuration in
Deepchem. It is used to generate the model cards while uploading
models.
The main purpose of this class is to validate and parse the model parameters
from the config.yaml file and generate the model cards.
**The config.yaml file contains the following parameters:**
1. model_class (required): The model class in Deepchem.
2. init_args (optional): The init arguments for the model.
3. train_args (optional): The train arguments for the model.
4. description (optional): The description of the model (will be stored in the model card).
5. featurizer (optional): The featurizer for the model (will be stored in the model card).
Sample config.yaml file:
------------------------
.. code-block:: yaml
model_class: GCNModel
init_args:
n_tasks: 1
mode: classification
batch: 2
learning_rate: 0.0003
train_args:
nb_epoch: 1
description: Description of the model (will be stored in the model card)
Parameters
----------
model_class : Any
The model class in Deepchem.
model_class_name : str, optional
The name of the model class. If not provided, will be inferred.
required_init_params : list, optional
A list of required init parameters.
optional_init_params : list, optional
A list of optional init parameters.
required_train_params : list, optional
A list of required train parameters.
optional_train_params : list, optional
A list of optional train parameters.
tasks : dict, optional
A Dictionary of tasks mapped to their respective parameter name supported by the model.
Examples
--------
>>> from deepchem_server.core.model_config_mapper import DeepChemModelConfigMapper
>>> from deepchem.models import GCNModel
>>> model = DeepChemModelConfigMapper(
... model_class=GCNModel,
... required_init_params=["init_param"],
... optional_init_params=["init_param1", "init_param2"],
... required_train_params=["train_param"],
... optional_train_params=["train_param1", "train_param2"])
>>> model.get_model_class_name()
'gcn'
>>> model.get_model_class()
<class 'deepchem.models.torch_models.gcn.GCNModel'>
>>> model
<class 'deepchem.models.torch_models.gcn.GCNModel'>
>>> model.add_init_params(["test_required_init_param"])
>>> model.get_init_params("required")
['init_param', 'test_required_init_param']
>>> model.add_init_params(["test_optional_init_param"], "optional")
>>> model.get_init_params("optional")
['init_param1', 'init_param2', 'test_optional_init_param']
>>> model.get_init_params()
{'required': ['init_param', 'test_required_init_param'],
'optional': ['init_param1', 'init_param2', 'test_optional_init_param']}
>>> model.add_tasks({"task1": "task", "task2": "mode"})
>>> model.get_tasks()
{'task1': 'task', 'task2': 'mode'}
In the above example, the model tasks are mapped to their respective parameter name supported by the model.
For example, the task "task1" is mapped to parameter "task" and the task so, during model initialization,
if "task1" is provided as a task, then the parameter "task" will be used to initialize the model. Similarly,
if "task2" is provided as a task, then the parameter "mode" will be used to initialize the model.
"""
[docs]
@staticmethod
def parse_params(required_params: Optional[List], optional_params: Optional[List]) -> Dict:
"""Parse the required and optional parameters of the model.
Returns a dictionary with the required and optional parameters.
Parameters
----------
required_params : list, optional
A list of required parameters.
optional_params : list, optional
A list of optional parameters.
Returns
-------
dict
A dictionary containing the required and optional parameters.
"""
if required_params is None:
required_params = []
if optional_params is None:
optional_params = []
return {
'required': required_params,
'optional': optional_params,
}
[docs]
@staticmethod
def get_class_name(model_class: Any) -> str:
"""Try to detect the model name for the model.
Parameters
----------
model_class : Any
The model class.
Returns
-------
str
The model class name.
"""
try:
model_class_name = model_class.__name__
except AttributeError:
if model_class.__class__.__name__ == 'SklearnModel':
model_class_name = model_class.model.__class__.__name__
else:
model_class_name = str(model_class.__class__)
logger.error(f"{model_class.__class__}: Model class name not provided and could not be inferred.")
return model_class_name
[docs]
def __init__(
self,
model_class: Any,
model_class_name: Optional[str] = None,
required_init_params: Optional[List] = None,
optional_init_params: Optional[List] = None,
required_train_params: Optional[List] = None,
optional_train_params: Optional[List] = None,
tasks: Optional[Dict] = None,
) -> None:
"""Initialize DeepChemModelConfigMapper."""
if model_class_name is None:
model_class_name = self.get_class_name(model_class)
if tasks is None:
tasks = {}
self.model_config_mapping = {
"model_class": model_class,
"model_class_name": model_class_name,
"init_params": self.parse_params(required_init_params, optional_init_params),
"train_params": self.parse_params(required_train_params, optional_train_params),
"tasks": tasks,
}
[docs]
def add_init_params(self, init_params: List, kind: Literal["required", "optional"] = "required") -> None:
"""Add the init parameters to the model config mapping.
Parameters
----------
init_params : list
A list of init parameters.
kind : {'required', 'optional'}, optional
Whether the init parameters are required or optional, by default 'required'.
Returns
-------
None
"""
self.model_config_mapping['init_params'][kind].extend(init_params)
[docs]
def add_train_params(self, train_params: List, kind: Literal["required", "optional"] = "required") -> None:
"""Add the train parameters to the model config mapping.
Parameters
----------
train_params : list
A list of train parameters.
kind : {'required', 'optional'}, optional
Whether the train parameters are required or optional, by default 'required'.
Returns
-------
None
"""
self.model_config_mapping['train_params'][kind].extend(train_params)
[docs]
def add_tasks(self, tasks: Dict) -> None:
"""Add the tasks to the model config mapping.
Parameters
----------
tasks : dict
A dictionary of tasks mapped to their respective parameter name supported by the model.
Returns
-------
None
"""
self.model_config_mapping['tasks'].update(tasks)
[docs]
def get_model_class(self) -> Any:
"""Return the model class for the model.
Returns
-------
Any
The model class for the model.
"""
return self.model_config_mapping['model_class']
[docs]
def get_model_class_name(self) -> str:
"""Return the model class name for the model.
Returns
-------
str
The model class name for the model.
"""
return self.model_config_mapping['model_class_name']
[docs]
def get_init_params(self, kind: Literal["required", "optional", None] = None) -> Dict:
"""Return the initialization parameters for the model.
Parameters
----------
kind : {'required', 'optional', None}, optional
If kind is None, then the function returns all the init parameters
for the model. If kind is "required", then the function returns
only the required init parameters. If kind is "optional", then the
function returns only the optional init parameters.
Returns
-------
dict
Returns a dictionary containing the init parameters for the model.
"""
if kind is not None:
return self.model_config_mapping['init_params'][kind]
return self.model_config_mapping['init_params']
[docs]
def get_train_params(self, kind: Literal["required", "optional", None] = None) -> Dict:
"""Return the train parameters for the model.
Parameters
----------
kind : {'required', 'optional', None}, optional
If kind is None, then the function returns all the train parameters
for the model. If kind is "required", then the function returns
only the required train parameters. If kind is "optional", then the
function returns only the optional train parameters.
Returns
-------
dict
Returns a dictionary containing the train parameters for the model.
"""
if kind is not None:
return self.model_config_mapping['train_params'][kind]
return self.model_config_mapping['train_params']
[docs]
def get_tasks(self) -> Dict:
"""Return the tasks for the model.
Returns
-------
dict
Returns a Dictionary containing the tasks mapped to their respective parameter name of the model.
"""
return self.model_config_mapping['tasks']
[docs]
def __getitem__(self, item: str) -> Any:
"""Return the mentioned item from the model config mapping.
Parameters
----------
item : str
The item to be returned from the model config mapping.
Returns
-------
Any
The item from the model config mapping.
"""
return self.model_config_mapping[item]
[docs]
def __str__(self) -> str:
"""Return the model class name for the model.
Returns
-------
str
The model class name.
Examples
--------
>>> from deepchem_server.core.model_config_mapper import DeepChemModelConfigMapper
>>> from deepchem.models import GCNModel
>>> model = DeepChemModelConfigMapper(
... model_class=GCNModel,
... required_init_params=["init_param"],
... optional_init_params=["init_param1", "init_param2"],
... required_train_params=["train_param"],
... optional_train_params=["train_param1", "train_param2"])
>>> str(model)
'GCNModel'
"""
return self.model_config_mapping['model_class_name']
[docs]
def __repr__(self) -> Any:
"""Return the model class for the model.
Returns
-------
Any
The model class.
Examples
--------
>>> from deepchem_server.core.model_config_mapper import DeepChemModelConfigMapper
>>> from deepchem.models import GCNModel
>>> model = DeepChemModelConfigMapper(
... model_class=GCNModel,
... required_init_params=["init_param"],
... optional_init_params=["init_param1", "init_param2"],
... required_train_params=["train_param"],
... optional_train_params=["train_param1", "train_param2"])
>>> model
<class 'deepchem.models.torch_models.gcn.GCNModel'>
"""
return repr(self.get_model_class())
[docs]
class ModelAddressWrapper(dict):
"""Wrapper for deepchem-server model name and deepchem model config.
This class is used to wrap the deepchem-server model name and deepchem model config.
It is used as a custom dictionary to map the deepchem-server model name to the
deepchem model config.
Examples
--------
>>> from deepchem_server.core.model_config_mapper import ModelAddressWrapper, DeepChemModelConfigMapper
>>> from deepchem.models import GCNModel
>>> model = DeepChemModelConfigMapper(
... model_class=GCNModel,
... required_init_params=["init_param"],
... optional_init_params=["init_param1", "init_param2"],
... required_train_params=["train_param"],
... optional_train_params=["train_param1", "train_param2"])
>>> model_address_map = ModelAddressWrapper({"gcn": model})
>>> model_address_map
{'gcn': <class 'deepchem.models.torch_models.gcn.GCNModel'>}
>>> model_address_map['gcn']
<class 'deepchem.models.torch_models.gcn.GCNModel'>
>>> model_address_map.get_model_class_name('gcn')
'gcn'
>>> # using key value pairs
>>> from sklearn.linear_model import LinearRegression
>>> from deepchem.models import SklearnModel
>>> model = DeepChemModelConfigMapper(
... model_class=SklearnModel,
... required_init_params=None,
... optional_init_params=["fit_intercept", "copy_X", "n_jobs", "positive"],
... required_train_params=None,
... optional_train_params=None)
>>> model_address_map['linear_regression'] = model
>>> model_address_map['linear_regression']
<class 'deepchem.models.sklearn_models.SklearnModel'>
"""
[docs]
def __init__(self, *args, **kwargs) -> None:
"""Initialize ModelAddressWrapper.
Parameters
----------
*args
Variable length argument list. Expected dict as first argument.
**kwargs
Arbitrary keyword arguments for model mappings.
Raises
------
TypeError
If more than 1 positional argument is provided or if the first
argument is not a dict.
"""
super().__init__()
if args:
if len(args) > 1:
raise TypeError(f"ModelAddressWrapper expected at most 1 arguments, got {len(args)}")
arg = args[0] # type: ignore[index]
if isinstance(arg, dict):
for key, value in arg.items():
self.__setitem__(key, value)
else:
raise TypeError(f"ModelAddressWrapper expected dict, got {type(arg)}")
if kwargs:
for key, value in kwargs.items():
self.__setitem__(key, value)
[docs]
def get_model_config(
self,
key: str,
kind: Literal["model_name", "class_name"] = "model_name") -> Optional[DeepChemModelConfigMapper]:
"""Return the model config map given the model key.
Parameters
----------
key : str
The name/key of the model.
kind : {'model_name', 'class_name'}, optional
Whether the key is the model name or the model class, by default 'model_name'.
Returns
-------
DeepChemModelConfigMapper or None
The model config map for the model, or None if not found.
"""
if kind == "model_name":
return self.__dict__[key]
elif kind == "class_name":
for model_name, model_config_map in self.__dict__.items():
if model_config_map.get_model_class_name() == key:
return model_config_map
return None
[docs]
def get_model_name_from_class_name(self, model_class_name: str) -> Optional[str]:
"""Return the model name for the model class name.
The class will be used when parsing the config.yaml file,
since we don't have the model name in the config.yaml file.
Parameters
----------
model_class_name : str
The model class name for the model.
Returns
-------
str or None
The model name for the model, or None if not found.
"""
for model_name, model_config_map in self.__dict__.items():
if model_config_map.get_model_class_name() == model_class_name:
return model_name
return None
[docs]
def get_model_class_name(self, key: str) -> str:
"""Return the model class name for the model key.
Since using a key to access the ModelAddressWrapper returns the model
class, this function reduces the code complexity.
The below code snippets are equivalent:
>>> from deepchem.models import GCNModel
>>> model_address_map = ModelAddressWrapper({"gcn": DeepChemModelConfigMapper(model_class=GCNModel)})
>>> model_address_map.get_model_class_name("gcn")
'GCNModel'
>>> model_address_map.get_model_config("gcn").get_model_class_name()
'GCNModel'
Parameters
----------
key : str
The name/key of the model.
Returns
-------
str
The model class name for the model.
"""
return self.__dict__[key].get_model_class_name()
[docs]
def get_model_class_names(self) -> List[str]:
"""Return the model class names for the models.
Returns
-------
list of str
The model class names for the models.
"""
return [self.__dict__[key].get_model_class_name() for key in self.__dict__.keys()]
[docs]
def __setitem__(self, key: str, value: DeepChemModelConfigMapper) -> None:
"""Set item in the wrapper.
Parameters
----------
key : str
The model name key.
value : DeepChemModelConfigMapper
The model config mapper to store.
Returns
-------
None
"""
self.__dict__[key] = value
[docs]
def __getitem__(self, key: str) -> Any:
"""Get item from the wrapper.
Parameters
----------
key : str
The model name key.
Returns
-------
Any
The model config mapper.
"""
return self.__dict__[key].get_model_class()
[docs]
def __contains__(self, key) -> bool:
"""Check if the key is in the wrapper.
Parameters
----------
key : str
The model name key.
Returns
-------
bool
True if the key is in the wrapper, False otherwise.
"""
return key in self.__dict__.keys()
[docs]
def keys(self) -> dict_keys:
"""Return the keys of the wrapper.
Returns
-------
dict_keys
The keys of the wrapper.
"""
return self.__dict__.keys()
[docs]
def values(self) -> dict_values:
"""Return the values of the wrapper.
Returns
-------
dict_values
The values of the wrapper.
"""
return self.__dict__.values()