Skip to content

preprocess.ingredients

Preprocess - Ingredients¤

OrderedProcessor ¤

Go through an ordered methods in OrderedProcessor to transform a dataframe.

Beware of the exceptions.

Add member functions to transform the dataframe

@attribute(order=1)
def _transformer_created_at(self, dataframe):
    pass

_get_transforms(self) private ¤

_get_transforms extracts the list of transformers.

This method can be replaced by the decorator with_transforms.

Source code in haferml/preprocess/ingredients.py
def _get_transforms(self):
    """
    _get_transforms extracts the list of transformers.

    This method can be replaced by the decorator `with_transforms`.
    """

    all_methods = dict(inspect.getmembers(self))
    transforms = []

    for method_name, method_func in all_methods.items():
        if hasattr(method_func, "order"):
            method_order = method_func.order
            logger.info(f"{method_name} has order {method_order}")

            transforms.append(
                {"name": method_name, "method": method_func, "order": method_order}
            )
    transforms = sorted(transforms, key=lambda k: k["order"])
    self.transforms = {m["name"]: m["method"] for m in transforms}

    logger.debug("All methods: {}".format(all_methods))
    logger.info("Ordered predefined transformers: {}".format(self.transforms))

attributes(**attrs) ¤

A decorator to set attributes of member functions in a class.

class AGoodClass:
    def __init__(self):
        self.size = 0

    @attributes(order=1)
    def first_good_member(self, new):
        return "first good member"

    @attributes(order=2)
    def second_good_member(self, new):
        return "second good member"

References: 1. https://stackoverflow.com/a/48146924/1477359

Source code in haferml/preprocess/ingredients.py
def attributes(**attrs):
    """
    A decorator to set attributes of member functions in a class.

    ```python
    class AGoodClass:
        def __init__(self):
            self.size = 0

        @attributes(order=1)
        def first_good_member(self, new):
            return "first good member"

        @attributes(order=2)
        def second_good_member(self, new):
            return "second good member"
    ```

    References:
    1. https://stackoverflow.com/a/48146924/1477359

    """

    def decorator(f):
        @wraps(f)
        def wrapper(*args, **kwargs):
            return f(*args, **kwargs)

        for attr_name, attr_value in attrs.items():
            setattr(wrapper, attr_name, attr_value)

        return wrapper

    return decorator

order(ord) ¤

order is decorator to order the pipeline classes. This decorator specifies a property named "order" to the member function so that we can use the property to order the member functions.

This order function can be combined with the decorator with_transforms which orders the member functions.

class AGoodClass:
    def __init__(self):
        self.size = 0

    @order(1)
    def first_good_member(self, new):
        return "first good member"

    @order(2)
    def second_good_member(self, new):
        return "second good member"
Source code in haferml/preprocess/ingredients.py
def order(ord):
    """
    `order` is decorator to order the pipeline classes. This decorator specifies a property named "order" to the member function so that we can use the property to order the member functions.

    This `order` function can be combined with the decorator `with_transforms` which orders the member functions.

    ```python
    class AGoodClass:
        def __init__(self):
            self.size = 0

        @order(1)
        def first_good_member(self, new):
            return "first good member"

        @order(2)
        def second_good_member(self, new):
            return "second good member"
    ```
    """
    return attributes(order=ord)

with_transforms(attr=None) ¤

with_transforms is a decorator that builds the ordered transformations and assigns the transforms to the property self.transforms. self.transforms is a dictionary and the keys are the name of the transformation functions.

@with_transforms
def run(self):
    for t in self.transforms:
        dataframe = self.transforms[t](dataframe)
        logger.info(f"transformation {t} is done.")

Here is a full example of the decorator.

class AGoodClass:
    def __init__(self):
        pass

    @order(1)
    def first_good_member(self, new):
        return f"{new} - appended first good member"

    @order(2)
    def second_good_member(self, new):
        return f"{new} - appended second good member"

    @with_transforms()
    def bench(self, name):
        logger.info(name)
        logger.info(self.transforms)
        for t in self.transforms:
            name = self.transforms[t](name)
            logger.info(f"transformation {t} is done. Got strings: {name}")

a = AGoodClass()

a.bench("a name")

If you would rather use a different attribute name such as "rank", the with_transforms decorator can also be customized.

class AGoodClass:
    def __init__(self):
        pass

    @attributes(rank=1)
    def first_good_member(self, new):
        return f"{new} - appended first good member"

    @attributes(rank=2)
    def second_good_member(self, new):
        return f"{new} - appended second good member"

    @with_transforms(attr="rank")
    def bench(self, name):
        logger.info(name)
        logger.info(self.transforms)
        for t in self.transforms:
            name = self.transforms[t](name)
            logger.info(f"transformation {t} is done. Got strings: {name}")

a = AGoodClass()

a.bench("a name")
Source code in haferml/preprocess/ingredients.py
def with_transforms(attr=None):
    """
    with_transforms is a decorator that builds the ordered transformations and
    assigns the transforms to the property `self.transforms`. `self.transforms`
    is a dictionary and the keys are the name of the transformation functions.


    ```python
    @with_transforms
    def run(self):
        for t in self.transforms:
            dataframe = self.transforms[t](dataframe)
            logger.info(f"transformation {t} is done.")
    ```

    Here is a full example of the decorator.

    ```python
    class AGoodClass:
        def __init__(self):
            pass

        @order(1)
        def first_good_member(self, new):
            return f"{new} - appended first good member"

        @order(2)
        def second_good_member(self, new):
            return f"{new} - appended second good member"

        @with_transforms()
        def bench(self, name):
            logger.info(name)
            logger.info(self.transforms)
            for t in self.transforms:
                name = self.transforms[t](name)
                logger.info(f"transformation {t} is done. Got strings: {name}")

    a = AGoodClass()

    a.bench("a name")
    ```

    If you would rather use a different attribute name such as "rank", the `with_transforms` decorator can also be customized.

    ```python
    class AGoodClass:
        def __init__(self):
            pass

        @attributes(rank=1)
        def first_good_member(self, new):
            return f"{new} - appended first good member"

        @attributes(rank=2)
        def second_good_member(self, new):
            return f"{new} - appended second good member"

        @with_transforms(attr="rank")
        def bench(self, name):
            logger.info(name)
            logger.info(self.transforms)
            for t in self.transforms:
                name = self.transforms[t](name)
                logger.info(f"transformation {t} is done. Got strings: {name}")

    a = AGoodClass()

    a.bench("a name")
    ```

    """
    if attr is None:
        attr = "order"

    def _with_transforms(f):
        def _get_transforms(self, *args):
            all_methods = dict(inspect.getmembers(self))
            transforms = []

            for method_name, method_func in all_methods.items():
                if hasattr(method_func, attr):
                    method_order = getattr(method_func, attr)
                    logger.debug(f"{method_name} has order {method_order}")

                    transforms.append(
                        {"name": method_name, "method": method_func, attr: method_order}
                    )
            transforms = sorted(transforms, key=lambda k: k[attr])
            self.transforms = {m["name"]: m["method"] for m in transforms}

            logger.debug("All methods: {}".format(all_methods))
            logger.debug("Ordered predefined transformers: {}".format(self.transforms))

            return f(self, *args)

        return _get_transforms

    return _with_transforms