Skip to content

data.wrangle.misc

Data - Wrangling - MISC¤

convert_str_repr_to_list(inp) ¤

convert_str_repr_to_list concerts string representation of list to list

Source code in haferml/data/wrangle/misc.py
def convert_str_repr_to_list(inp):
    """
    convert_str_repr_to_list concerts string representation of list to list
    """

    res = []
    if isinstance(inp, str):
        try:
            res = literal_eval(inp)
        except Exception as e:
            raise Exception(f"Could not convert {inp} to list")
    elif isinstance(inp, (list, tuple, set)):
        res = list(inp)

    return res

convert_str_repr_to_tuple(inp) ¤

convert_str_repr_to_tuple converts string representation of tuple to tuple

Source code in haferml/data/wrangle/misc.py
def convert_str_repr_to_tuple(inp):
    """
    convert_str_repr_to_tuple converts string representation of tuple to tuple
    """

    res = []
    if isinstance(inp, str):
        try:
            res = literal_eval(inp)
        except Exception as e:
            raise Exception(f"Could not convert {inp} to list")
    if isinstance(inp, (list, tuple, set)):
        res = tuple(inp)

    return res

convert_to_bool(data) ¤

convert_to_bool converts input to bool type in python.

The following values are converted to True:

  1. 'true'
  2. 'yes'
  3. '1'
  4. 'y'
  5. 1

The following values are converted to False:

  1. 'false'
  2. 'no'
  3. '0'
  4. 'n'
  5. 0

Parameters:

Name Type Description Default
data

input data

required

Returns:

Type Description
bool

boolean value of the input data

Source code in haferml/data/wrangle/misc.py
def convert_to_bool(data):
    """
    convert_to_bool converts input to bool type in python.

    The following values are converted to True:

    1. 'true'
    2. 'yes'
    3. '1'
    4. 'y'
    5. 1

    The following values are converted to False:

    1. 'false'
    2. 'no'
    3. '0'
    4. 'n'
    5. 0

    :param data: input data
    :return: boolean value of the input data
    :rtype: bool
    """
    res = None
    if data is None:
        return res
    elif isinstance(data, bool):
        res = data
    elif isinstance(data, str):
        if data.lower().strip() in ["true", "yes", "1", "y"]:
            res = True
        elif data.lower().strip() in ["false", "no", "0", "n"]:
            res = False
        else:
            res = None
    elif isinstance(data, (float, int)):
        res = bool(data)

    return res

eu_float_string_to_float(data) ¤

eu_float_string_to_float converts strings in EU format to floats

Parameters:

Name Type Description Default
data str

string of the float in EU conventions

required

Returns:

Type Description
float

converted float from the string

Source code in haferml/data/wrangle/misc.py
def eu_float_string_to_float(data):
    """
    eu_float_string_to_float converts strings in EU format to floats

    :param data: string of the float in EU conventions
    :type data: str
    :return: converted float from the string
    :rtype: float
    """
    if isinstance(data, str):
        res = data.replace(".", "")
        res = res.replace(",", ".")
        try:
            res = float(res)
        except Exception as e:
            raise Exception(f"Could not convert string {data} to float: {e}")
    else:
        raise TypeError("Input data should be string")

    return res

get_all_paths_in_dict(dic, path=None) ¤

Retrieve all the possible paths in a nested dictionary.

Warning

List dictionaries under keys are not supported.

test_dict_small = {
    "etl": {
        "local": "this/is/local",
        "name": "my_data.parquet",
        "remote": "s3://my/remote"
    },
    "model": {
        "artifacts": {
            "predict": {
                "local": "this/is/local/predict",
                "remote": "s3://my/remote/predict"
            }
        }
    }
}

all_paths = get_all_paths_in_dict(test_dict_small, [])
print(all_paths)

We get

[['etl', 'local'], ['etl', 'name'], ['etl', 'remote'], ['model', 'artifacts', 'predict', 'local'], ['model', 'artifacts', 'predict', 'remote']]

Parameters:

Name Type Description Default
dic dict

dictionary to be get data from

required
path list

path of keys to extract value

None
Source code in haferml/data/wrangle/misc.py
def get_all_paths_in_dict(dic, path=None):
    """
    Retrieve all the possible paths in a nested dictionary.

    !!! warning
        List dictionaries under keys are not supported.


    ```
    test_dict_small = {
        "etl": {
            "local": "this/is/local",
            "name": "my_data.parquet",
            "remote": "s3://my/remote"
        },
        "model": {
            "artifacts": {
                "predict": {
                    "local": "this/is/local/predict",
                    "remote": "s3://my/remote/predict"
                }
            }
        }
    }

    all_paths = get_all_paths_in_dict(test_dict_small, [])
    print(all_paths)
    ```

    We get

    ```
    [['etl', 'local'], ['etl', 'name'], ['etl', 'remote'], ['model', 'artifacts', 'predict', 'local'], ['model', 'artifacts', 'predict', 'remote']]
    ```

    :param dic: dictionary to be get data from
    :type dic: dict
    :param path: path of keys to extract value
    :type path: list
    """
    if path is None:
        path = []
    if not isinstance(dic, dict):
        return [path]
    else:
        ret = []
        for k, v in dic.items():
            ret.extend(get_all_paths_in_dict(v, path + [k]))
        return ret

get_value_in_dict_recursively(dictionary, path, ignore_path_fail=None) ¤

Get value of a dictionary according to specified path (names)

Parameters:

Name Type Description Default
dictionary dict

input dictionary

required
path list

path to the value to be obtained This function always returns the value or None. >>> get_value_in_dict_recursively({'lvl_1':{'lvl_2':{'lvl_3':'lvl_3_value'}}},['lvl_1','lvl_3']) {'lvl_3':'lvl_3_value'} >>> get_value_in_dict_recursively({1:{2:{3:'hi'}}},[1,'2',3]) {'hi'}

required
Source code in haferml/data/wrangle/misc.py
def get_value_in_dict_recursively(dictionary, path, ignore_path_fail=None):
    """
    Get value of a dictionary according to specified path (names)

    :param dict dictionary: input dictionary
    :param list path: path to the value to be obtained

    This function always returns the value or None.

    ```
    >>> get_value_in_dict_recursively({'lvl_1':{'lvl_2':{'lvl_3':'lvl_3_value'}}},['lvl_1','lvl_3'])
    {'lvl_3':'lvl_3_value'}
    >>> get_value_in_dict_recursively({1:{2:{3:'hi'}}},[1,'2',3])
    {'hi'}
    ```
    """
    if ignore_path_fail is None:
        ignore_path_fail = True

    if isinstance(path, list):
        path_temp = path.copy()
    elif isinstance(path, tuple):
        path_temp = list(path).copy()
    else:
        logger.warning(f"path is not list or tuple, converting to list: {path}")
        path_temp = [path].copy()

    if len(path_temp) > 1:
        pop = path_temp.pop(0)
        try:
            pop = int(pop)
        except ValueError:
            if ignore_path_fail:
                logger.warning(f"can not get path")
                pass
            else:
                raise Exception(f"specified path ({path}) is not acceptable")

        try:
            return get_value_in_dict_recursively(dictionary[pop], path_temp)
        except:
            logger.debug(f"did not get values for {pop}")
            return None
    elif len(path_temp) == 0:
        return None
    else:
        try:
            val = int(path_temp[0])
        except:
            val = path_temp[0]
        try:
            return dictionary[val]
        except KeyError:
            logger.error(f"KeyError: Could not find {path_temp[0]}")
            return None
        except TypeError:
            logger.error(f"TypeError: Could not find {path_temp[0]}")
            return None

update_dict_recursively(dictionary, key_path, value) ¤

update or insert values to a dictionary recursively.

>>> update_dict_recursively({}, ['a', 'b', 1, 2], 'this_value')
{'a': {'b': {1: {2: 'this_value'}}}}

Parameters:

Name Type Description Default
dictionary dict

the dictionary to be inserted into

required
key_path list

the path for the insertion value

required
value

value to be inserted

required

Returns:

Type Description

a dictionary with the inserted value

Source code in haferml/data/wrangle/misc.py
def update_dict_recursively(dictionary, key_path, value):
    """
    update or insert values to a dictionary recursively.

    ```
    >>> update_dict_recursively({}, ['a', 'b', 1, 2], 'this_value')
    {'a': {'b': {1: {2: 'this_value'}}}}
    ```

    :param dict dictionary: the dictionary to be inserted into
    :param list key_path: the path for the insertion value
    :param value: value to be inserted
    :returns: a dictionary with the inserted value
    """
    sub_dictionary = dictionary
    for key in key_path[:-1]:
        if key not in sub_dictionary:
            sub_dictionary[key] = {}
        sub_dictionary = sub_dictionary[key]

    sub_dictionary[key_path[-1]] = value

    return dictionary