Skip to content

sync.local

Sync - Local¤

isoencode(obj) ¤

isoencode decodes many different objects such as np.bool -> regular bool.

with open(log_file_path, "a+") as fp:
    json.dump(self.report, fp, default=isoencode)
Source code in haferml/sync/local.py
def isoencode(obj):
    """
    isoencode decodes many different objects such as np.bool -> regular bool.

    ```python
    with open(log_file_path, "a+") as fp:
        json.dump(self.report, fp, default=isoencode)
    ```
    """
    if isinstance(obj, datetime.datetime):
        return obj.isoformat()
    if isinstance(obj, datetime.date):
        return obj.isoformat()
    if isinstance(obj, np.ndarray):
        return obj.tolist()
    if isinstance(obj, np.int64):
        return int(obj)
    if isinstance(obj, np.float64):
        return float(obj)
    if isinstance(obj, np.bool_):
        return bool(obj)

load_records(data_path_inp) ¤

Load data from a line deliminated json file. Instead of loading pandas for such a simple job, this function does the work in most cases.

Parameters:

Name Type Description Default
data_path_inp

data file path

required

Returns:

Type Description

list of dicts

Source code in haferml/sync/local.py
def load_records(data_path_inp):
    """Load data from a line deliminated json file. Instead of loading pandas for such a simple job, this function does the work in most cases.

    :param data_path_inp: data file path
    :return: list of dicts
    """

    data = []

    with open(data_path_inp, "r") as fp:
        for line in fp:
            line = line.replace("null", ' "None" ')
            try:
                line_data = json.loads(line.strip())
            except Exception as ee:
                logger.warning("could not load ", line, "\n", ee)
            data.append(line_data)

    return data

prepare_folders(folder_list=None, base_folder=None) ¤

prepare_folders creates the necessary folders

Parameters:

Name Type Description Default
base_folder str

base folder of the whole project

None
folder_list list

list of folders to create, relative to base_folder

None
Source code in haferml/sync/local.py
def prepare_folders(folder_list=None, base_folder=None):
    """
    prepare_folders creates the necessary folders

    :param base_folder: base folder of the whole project
    :type base_folder: str
    :param folder_list: list of folders to create, relative to base_folder
    :type folder_list: list
    """
    if folder_list is None:
        raise Exception("Please specify the list of folder using fodler_list")

    if os.path.exists(base_folder):
        logger.info(f"Using base folder {base_folder}!")

    # prepare the model folder
    if isinstance(folder_list, (tuple, list, set)):
        pass
    elif isinstance(folder_list, str):
        logger.warning(f"Converting to list: {folder_list} to a list")
        folder_list = [folder_list]

    for folder in folder_list:
        folder = os.path.join(base_folder, folder)
        if not os.path.exists(folder):
            os.makedirs(folder)
            logger.info(f"created {folder}")

save_records(data_inp, output, is_flush=None, write_mode=None) ¤

Save list of dicts to file. Instead of loading pandas for such a simple job, this function does the work in most cases.

:is_flush: whether to flush data to file for each row written to file

Parameters:

Name Type Description Default
data_inp

dict or list of dict to be saved

required
output

path to output file

required

Returns:

Type Description

None

Source code in haferml/sync/local.py
def save_records(data_inp, output, is_flush=None, write_mode=None):
    """Save list of dicts to file. Instead of loading pandas for such a simple job, this function does the work in most cases.

    :param data_inp: dict or list of dict to be saved
    :param output: path to output file
    :is_flush: whether to flush data to file for each row written to file
    :return: None
    """

    if write_mode is None:
        write_mode = "a+"

    if is_flush is None:
        is_flush = False

    if isinstance(data_inp, list):
        data = data_inp
    elif isinstance(data_inp, dict):
        data = [data_inp]
    else:
        raise Exception("Input data is neither list nor dict: {}".format(data_inp))

    try:
        with open(output, write_mode) as fp:
            for i in data:
                json.dump(i, fp)
                fp.write("\n")
                if is_flush:
                    fp.flush()
    except Exception as ee:
        raise Exception("Could not load data to file: {}".format(ee))