Skip to content

blend.config

Config¤

Utilities to deal with the configuration files.

Config ¤

Config makes it easy to load and use config files.

A Config object will load the config file and enhance the paths, e.g., prepend the base folder, if needed. The config object can also be used like a dictionary or even more convinently using a list as the path to the value.

conf = Config(file_path="test.json", base_folder="/tmp")
conf[["etl", "raw"]]

Parameters:

Name Type Description Default
file_path str

path to the config file. If the path is relative path, please specify the base folder.

required
base_folder str, optional

the base folder for our working directory, defaults to None

required

_enhance_local_paths(config, base_folder=None) private staticmethod ¤

Appends base_folder to the local paths in the configs and also the file path if name key is present.

Parameters:

Name Type Description Default
config dict

dictionary of configuration.

required
base_folder str, optional

base folder of all the artifacts, defaults to None

None
Source code in haferml/blend/config.py
@staticmethod
def _enhance_local_paths(config, base_folder=None):
    """
    Appends base_folder to the local paths in the configs and also the file path if name key is present.

    :param config: dictionary of configuration.
    :type config: dict
    :param base_folder: base folder of all the artifacts, defaults to None
    :type base_folder: str, optional
    """

    all_paths = _get_all_paths_in_dict(config)

    for p in all_paths:
        if p[-1] == "local":
            p_local_value = get_config(config, p)
            p_local_parent_path = p[:-1]

            if base_folder is not None:
                p_local_value = os.path.join(base_folder, p_local_value)
            _update_dict_recursively(
                config, p_local_parent_path + ["local_absolute"], p_local_value
            )

            p_local_parent_value = get_config(config, p_local_parent_path)
            if "name" in p_local_parent_value:
                p_name_value = p_local_parent_value["name"]
                p_name_value = os.path.join(p_local_value, p_name_value)
                _update_dict_recursively(
                    config, p_local_parent_path + ["name_absolute"], p_name_value
                )

get(self, path) ¤

Retrieve config for a given path down in the configs.

conf = Config(file_path="test.json", base_folder="/tmp")
conf.get(["etl", "raw"])

Parameters:

Name Type Description Default
path list

path to the specific configurations

required

Returns:

Type Description

configuration of for the specific path

Source code in haferml/blend/config.py
def get(self, path):
    """
    Retrieve config for a given path down in the configs.

    ```
    conf = Config(file_path="test.json", base_folder="/tmp")
    conf.get(["etl", "raw"])
    ```

    :param path: path to the specific configurations
    :type path: list
    :return: configuration of for the specific path
    """

    config = get_config(self.config, path)

    return config

construct_paths(config, base_folder) ¤

construct_paths reconstructs the path based on base folder.

The local key in config will be used. Typically, the config shall be something like

config = {
    "local": "gauss/data",
    "name": "my_data.parquet"
}

If the base folder is base_folder=/tmp, the config will become

config = {
    "local": "/tmp/gauss/data",
    "name": "my_data.parquet",
    "file_path": "/tmp/gauss/data/my_data.parquet"
}

Parameters:

Name Type Description Default
config dict

the config dictionary that contains a local key

required
base_folder str

base folder that will be prepended to the path in config["local"]

required
Source code in haferml/blend/config.py
def construct_paths(config, base_folder):
    """
    construct_paths reconstructs the path based on base folder.

    The `local` key in `config` will be used. Typically, the config shall be something like

    ```
    config = {
        "local": "gauss/data",
        "name": "my_data.parquet"
    }
    ```

    If the base folder is `base_folder=/tmp`, the config will become

    ```
    config = {
        "local": "/tmp/gauss/data",
        "name": "my_data.parquet",
        "file_path": "/tmp/gauss/data/my_data.parquet"
    }
    ```

    :param dict config: the config dictionary that contains a `local` key
    :param str base_folder: base folder that will be prepended to the path in `config["local"]`
    """

    if not config.get("local"):
        logger.warning(f"{config} does not contain local key ")
        return config

    config_recon = {}
    config_local = config["local"]
    config_name = config.get("name")
    config_local = os.path.join(base_folder, config_local)
    config_recon["local"] = config_local
    if config_name:
        config_local_full = os.path.join(config_local, config_name)
        config_recon["file_path"] = config_local_full

    return {**config, **config_recon}

get_config(configs, path) ¤

Get value of the configs under specified path

>>> get_config({'etl':{'raw':{'local':'data/raw', 'remote': 's3://haferml-tutorials/rideindego/marshall/data/raw'}}},['etl','raw'])
{'local':'data/raw', 'remote': 's3://haferml-tutorials/rideindego/marshall/data/raw'}

Parameters:

Name Type Description Default
configs dict

input dictionary

required
path list

path to the value to be obtained

required
Source code in haferml/blend/config.py
def get_config(configs, path):
    """
    Get value of the configs under specified path

    ```
    >>> get_config({'etl':{'raw':{'local':'data/raw', 'remote': 's3://haferml-tutorials/rideindego/marshall/data/raw'}}},['etl','raw'])
    {'local':'data/raw', 'remote': 's3://haferml-tutorials/rideindego/marshall/data/raw'}
    ```

    :param dict configs: input dictionary
    :param list path: path to the value to be obtained

    """

    # Construct the path
    if not isinstance(path, (list, tuple)):
        logger.warning(f"path is not list nor tuple, converting to list: {path}")
        path = [path]

    # Find the values
    res = configs.copy()
    for p in path:
        res = res[p]

    return res

load_config(config_path, base_folder=None) ¤

load_config loads the config files of the project from a path and generate a dictionary.

If no base_folder is not specified, the path will be treated as it is. For example, config_path=a/b/c.json will be a file relative to the python work directory; while config_path=/a/b/c.json will be the absolute path.

Parameters:

Name Type Description Default
config_path str

path to the config file

required
base_folder str

the base folder of the whole project

None
Source code in haferml/blend/config.py
def load_config(config_path, base_folder=None):
    """
    load_config loads the config files of the project from a path and generate a dictionary.

    If no `base_folder` is not specified, the path will be treated as it is. For example,
    `config_path=a/b/c.json` will be a file relative to the python work directory; while
    `config_path=/a/b/c.json` will be the absolute path.


    :param str config_path: path to the config file
    :param str base_folder: the base folder of the whole project
    :param base_folder: str, optional
    """
    if config_path is None:
        raise Exception(f"config_path has not been specified...")

    if base_folder is not None:
        config_path = os.path.join(base_folder, config_path)

    if not os.path.exists(config_path):
        raise Exception(
            f"config file path {config_path} does not exist! Beware of the relative path."
        )

    logger.debug(f"Loading config from {config_path}")
    with open(config_path, "r") as fp:
        config = json.load(fp)

    if not config:
        logger.warning(f"The config is empty: {config}")

    return config