Source code for deepchem_server.core.address

import os
from typing import Optional

from deepchem_server.core import config


DEEPCHEM_ADDRESS_PREFIX = 'deepchem://'


[docs] class DeepchemAddress(object): """A uniform representation to refer deepchem Objects. DeepchemAddress provides access to storage location of the object by inferring it from the DeepchemAddress provided. Parameters ---------- address : str The address of the object. kind : str, optional The kind of object. Can be 'data' or 'model', by default 'data'. Examples -------- >>> address = 'deepchem://profile/project/zinc.csv' >>> deepchem_address = DeepchemAddress(address) """ address_prefix: str = 'deepchem://'
[docs] def __init__(self, address: str, kind: Optional[str] = "data") -> None: """Initialize DeepchemAddress. Parameters ---------- address : str The address of the object. kind : str, optional The kind of object, by default 'data'. """ if address.startswith(DeepchemAddress.address_prefix): self.address = address else: self.address = DeepchemAddress.address_prefix + address self.kind = kind parsed_address = DeepchemAddress.parse_address(self.address) self.profile = parsed_address['profile'] self.project = parsed_address['project'] self.key = parsed_address['key']
[docs] @classmethod def make_deepchem_address_from_filename(cls, end: str) -> str: """Return a deepchem address string from a filename. Parameters ---------- end : str The filename whose DeepchemAddress we are creating. Returns ------- str The DeepchemAddress of the file in the format deepchem://<storage_loc>/<end>. Raises ------ ValueError If no datastore is configured. Examples -------- >>> DeepchemAddress.make_deepchem_address_from_filename('temp.txt') deepchem://test_company/test_user/working_dir/temp.txt """ datastore = config.get_datastore() if datastore is None: raise ValueError("No datastore configured") return DeepchemAddress.address_prefix + os.path.join(datastore.storage_loc, end)
[docs] @classmethod def get_key(cls, address: str) -> str: """Return the key from an address. A key is used to refer to one of DeepChem's dataset or model. Parameters ---------- address : str The address string whose key we are extracting. Returns ------- str The extracted key from the address. Examples -------- The following are all examples for different formats of the same address Example 1: ---------- >>> dataset_address = 'deepchem://deepchem/data/delaney' >>> key = DeepchemAddress.get_key(dataset_address) >>> key delaney Example 2: ---------- >>> dataset_address = 'deepchem/data/delaney' >>> key = DeepchemAddress.get_key(dataset_address) >>> key deepchem/data/delaney Example 3: ---------- >>> dataset_address = 'delaney' >>> key = DeepchemAddress.get_key(dataset_address) delaney """ if address.startswith(DeepchemAddress.address_prefix): address = address[len(DeepchemAddress.address_prefix):] return '/'.join(address.split('/')[2:]) return address
[docs] @classmethod def parse_address(cls, address: str) -> dict: """Return different components of the address. Parameters ---------- address : str The deepchem address of the object. Returns ------- dict Dictionary containing 'profile', 'project', and 'key' components. Raises ------ ValueError If the address format is invalid. Examples -------- >>> address = 'deepchem://user/test/file' >>> parsed_address = DeepchemAddress.parse_address(address) >>> parsed_address {'profile': 'user', 'project': 'test', 'key': 'file'} """ if address.startswith(DeepchemAddress.address_prefix): address = address[len(DeepchemAddress.address_prefix):] tokens = address.split('/') if len(tokens) < 3: raise ValueError('Invalid deepchem address') parsed_address = dict() parsed_address['profile'] = tokens[0] parsed_address['project'] = tokens[1] parsed_address['key'] = '/'.join(tokens[2:]) return parsed_address
[docs] @classmethod def get_path(cls, storage_loc: str, address: str, format: Optional[str] = 's3', base_dir: Optional[str] = None) -> str: """Return the path of the object in the storage from the address. When the format is ``local``, the ``base_dir`` is used as the base directory and ensures that the path returned matches the OS path format. Parameters ---------- storage_loc : str The storage location of the object (used in case the address is not in default deepchem address format). address : str The deepchem address of the object. format : {'s3', 'local'}, optional The format of the path to be returned, by default 's3'. base_dir : str, optional The base directory to be used in case of 'local' format. Returns ------- str The path of the object in the specified format. Raises ------ ValueError If the format is not 's3' or 'local'. Examples -------- All the following examples return the same path - profile/project/key Example 1: ---------- >>> address = 'deepchem://profile/project/key' >>> storage_loc = 'profile/project' >>> path = DeepchemAddress.get_path(storage_loc, address) >>> path profile/project/key Example 2: ---------- >>> address = 'profile/project/key' >>> storage_loc = 'profile/project' >>> path = DeepchemAddress.get_path(storage_loc, address) >>> path profile/project/key Example 3: ---------- >>> address = 'key' >>> storage_loc = 'profile/project' >>> path = DeepchemAddress.get_path(storage_loc, address) >>> path profile/project/key """ try: # Address is of the form deepchem://profile/project/key address_parsed = DeepchemAddress.parse_address(address) profile = address_parsed["profile"] project = address_parsed["project"] key = address_parsed["key"] if format == 's3': return profile + '/' + project + '/' + key elif format == 'local': key = key.replace('/', os.sep) if base_dir is not None: return os.path.join(base_dir, profile, project, key) else: return os.path.join(profile, project, key) except ValueError: # Address is not in the form deepchem://profile/project/key if not address.startswith(storage_loc): address = DEEPCHEM_ADDRESS_PREFIX + storage_loc.strip('/') + '/' + address address_key = DeepchemAddress.get_key(address) if format == 's3': return storage_loc + address_key elif format == 'local': address_key = address_key.replace('/', os.sep) return os.path.join(storage_loc, address_key) # if the format is neither s3 nor local raise ValueError(f"Unknown format: {format}")
[docs] @classmethod def get_parent_key(cls, address: str) -> str: """Return the parent key of the object. Parameters ---------- address : str The deepchem address of the object or the key of the object. Returns ------- str The parent key path. Examples -------- >>> address = 'deepchem://profile/project/parent1/parent2/key' >>> parent_key = DeepchemAddress.get_parent_key(address) >>> parent_key parent1/parent2 >>> address = 'profile/project/parent1/parent2/key' >>> parent_key = DeepchemAddress.get_parent_key(address) >>> parent_key parent1/parent2 """ object_key = DeepchemAddress.get_key(address) if '/' not in object_key: return '' return '/'.join(object_key.split('/')[:-1]) + '/'
[docs] @classmethod def get_object_name(cls, address: str) -> str: """Return the name of the object. Parameters ---------- address : str The deepchem address of the object or the key of the object. Returns ------- str The object name. Examples -------- >>> address = 'deepchem://profile/project/parent1/parent2/key' >>> object_name = DeepchemAddress.get_object_name(address) >>> object_name key >>> address = 'profile/project/parent1/parent2/key' >>> object_name = DeepchemAddress.get_object_name(address) >>> object_name key """ object_key = DeepchemAddress.get_key(address) if '/' not in object_key: return object_key return object_key.split('/')[-1]
[docs] def __str__(self) -> str: """Return string representation of the address. Returns ------- str The address string. """ return self.address
[docs] def __repr__(self) -> str: """Return string representation of the address. Returns ------- str The address string. """ return self.address