Source code for ssbio.core.object

import logging
from copy import deepcopy

import pandas as pd

import ssbio.io
import ssbio.utils

log = logging.getLogger(__name__)

class Object(object):
    """Cobra core object with additional methods to update and get attributes"""

    def __init__(self, id=None, description=None, *args, **kwargs):
        self.id = id
        self.description = description
        self.notes = {}

    def __str__(self):
        return str(self.id)

    def __repr__(self):
        return "<%s %s at 0x%x>" % (self.__class__.__name__, self.id, id(self))

    def update(self, newdata, overwrite=False, only_keys=None):
        """Add/update any attributes from a dictionary.

        Args:
            newdata: Dictionary of attributes
            overwrite: If existing attributes should be overwritten if provided in newdata
            only_keys: List of keys to update

        Examples:
            >>> myobj = Object(id='hi', description='blankname')
            >>> myobj.update({'description':'withname'}, overwrite=False)
            >>> myobj.get_dict() == {'id': 'hi', 'description':'blankname'}
            True

            >>> myobj = Object(id='hi', description='blankname')
            >>> myobj.update({'description':'withname'}, overwrite=True)
            >>> myobj.get_dict() == {'id': 'hi', 'description':'withname'}
            True

            >>> myobj = Object(id='hi', description='blankname')
            >>> myobj.update({'description':'withname', 'randomkey':'randomval'}, overwrite=True)
            >>> myobj.get_dict() == {'id': 'hi', 'description': 'withname', 'randomkey': 'randomval'}
            True

            >>> myobj = Object(id='hi', description='blankname')
            >>> myobj.update({'description':'withname', 'randomkey':'randomval'}, overwrite=True, only_keys='description')
            >>> myobj.get_dict() == {'id': 'hi', 'description': 'withname'}
            True

            >>> myobj = Object(id='hi', description='blankname')
            >>> myobj.update({'description':'withname', 'randomkey':'randomval'}, overwrite=True, only_keys='randomkey')
            >>> myobj.get_dict() == {'id': 'hi', 'description': 'blankname', 'randomkey': 'randomval'}
            True

            >>> myobj = Object(id='hi', description='blankname')
            >>> myobj.update({'description':'withname', 'randomkey':'randomval'}, overwrite=False)
            >>> myobj.get_dict() == {'id': 'hi', 'description': 'blankname', 'randomkey': 'randomval'}
            True

            >>> myobj = Object(id='hi', description='blankname')
            >>> myobj.update({'description':'withname', 'randomkey':'randomval'}, overwrite=False, only_keys='randomkey')
            >>> myobj.get_dict() == {'id': 'hi', 'description': 'blankname', 'randomkey': 'randomval'}
            True

        """
        # Filter for list of keys in only_keys
        if only_keys:
            only_keys = ssbio.utils.force_list(only_keys)
            newdata = {k:v for k,v in newdata.items() if k in only_keys}

        # Update attributes
        for key, value in newdata.items():
            # Overwrite flag overwrites all attributes
            if overwrite:
                setattr(self, key, value)
            else:
                # Otherwise check if attribute is None and set it if so
                if hasattr(self, key):
                    if not getattr(self, key):
                        setattr(self, key, value)
                    else:
                        continue
                # Or just add a new attribute
                else:
                    setattr(self, key, value)

    def get_dict(self, only_attributes=None, exclude_attributes=None, df_format=False):
        """Get a dictionary of this object's attributes. Optional format for storage in a Pandas DataFrame.

        Args:
            only_attributes (str, list): Attributes that should be returned. If not provided, all are returned.
            exclude_attributes (str, list): Attributes that should be excluded.
            df_format (bool): If dictionary values should be formatted for a dataframe
                (everything possible is transformed into strings, int, or float -
                if something can't be transformed it is excluded)

        Returns:
            dict: Dictionary of attributes

        """

        # Choose attributes to return, return everything in the object if a list is not specified
        if not only_attributes:
            keys = list(self.__dict__.keys())
        else:
            keys = ssbio.utils.force_list(only_attributes)

        # Remove keys you don't want returned
        if exclude_attributes:
            exclude_attributes = ssbio.utils.force_list(exclude_attributes)
            for x in exclude_attributes:
                if x in keys:
                    keys.remove(x)

        # Copy attributes into a new dictionary
        df_dict = {}
        for k, orig_v in self.__dict__.items():
            if k in keys:
                v = deepcopy(orig_v)
                if df_format:
                    if v and not isinstance(v, str) and not isinstance(v, int) and not isinstance(v, float) and not isinstance(v, bool):
                        try:
                            df_dict[k] = ssbio.utils.force_string(deepcopy(v))
                        except TypeError:
                            log.warning('{}: excluding attribute from dict, cannot transform into string'.format(k))
                    elif not v and not isinstance(v, int) and not isinstance(v, float):
                        df_dict[k] = None
                    else:
                        df_dict[k] = deepcopy(v)
                else:
                    df_dict[k] = deepcopy(v)
        return df_dict

    def save_dataframes(self, outdir, prefix='df_'):
        """Save all attributes that start with "df" into a specified directory.

        Args:
            outdir (str): Path to output directory
            prefix (str): Prefix that dataframe attributes start with

        """
        # Get list of attributes that start with "df_"
        dfs = list(filter(lambda x: x.startswith(prefix), dir(self)))

        counter = 0
        for df in dfs:
            outpath = ssbio.utils.outfile_maker(inname=df, outext='.csv', outdir=outdir)
            my_df = getattr(self, df)
            if not isinstance(my_df, pd.DataFrame):
                raise TypeError('{}: object is not a Pandas DataFrame'.format(df))

            if my_df.empty:
                log.debug('{}: empty dataframe, not saving'.format(df))
            else:
                my_df.to_csv(outpath)
                log.debug('{}: saved dataframe'.format(outpath))
                counter += 1

        log.debug('Saved {} dataframes at {}'.format(counter, outdir))

    def save_pickle(self, outfile, protocol=2):
        """Save the object as a pickle file

        Args:
            outfile (str): Filename
            protocol (int): Pickle protocol to use. Default is 2 to remain compatible with Python 2

        Returns:
            str: Path to pickle file

        """
        ssbio.io.save_pickle(self, outfile, protocol)

    def __json_encode__(self):
        to_return = {}
        # Don't save properties, methods in the JSON
        for x in [a for a in dir(self) if not a.startswith('__') and not a.startswith('_{}__'.format(type(self).__name__)) and not isinstance(getattr(type(self), a, None), property) and not callable(getattr(self,a))]:
            to_return.update({x: getattr(self, x)})
        return to_return

    def save_json(self, outfile, compression=False):
        """Save the object as a JSON file using json_tricks"""
        ssbio.io.save_json(self, outfile, compression=compression)