Source code for law.contrib.pandas.formatter

# coding: utf-8

"""
Pandas target formatters.
"""

from __future__ import annotations

__all__ = ["DataFrameFormatter"]

import pathlib

from law.target.formatter import Formatter
from law.target.file import get_path, FileSystemFileTarget
from law.logger import get_logger
from law.util import no_value
from law._types import Any


logger = get_logger(__name__)


[docs] class DataFrameFormatter(Formatter): name = "pandas" @classmethod def accepts(cls, path: str | pathlib.Path | FileSystemFileTarget, mode: str) -> bool: # still missing: excel, html, xml, latex, feather, orc, sql, stata, markdown, ... suffixes = (".csv", ".json", ".parquet", ".h5", ".hdf5", ".pickle", ".pkl") return get_path(path).endswith(suffixes) @classmethod def load(cls, path: str | pathlib.Path | FileSystemFileTarget, *args, **kwargs) -> Any: import pandas # type: ignore[import-untyped] path = get_path(path) if path.endswith(".csv"): return pandas.read_csv(path, *args, **kwargs) if path.endswith(".json"): return pandas.read_json(path, *args, **kwargs) if path.endswith(".parquet"): return pandas.read_parquet(path, *args, **kwargs) if path.endswith((".h5", ".hdf5")): return pandas.read_hdf(path, *args, **kwargs) if path.endswith((".pickle", ".pkl")): return pandas.read_pickle(path, *args, **kwargs) suffix = pathlib.Path(path).suffix raise NotImplementedError(f"suffix \"{suffix}\" not implemented in DataFrameFormatter") @classmethod def dump( cls, path: str | pathlib.Path | FileSystemFileTarget, obj: Any, *args, **kwargs, ) -> Any: _path = get_path(path) perm = kwargs.pop("perm", no_value) if _path.endswith(".csv"): ret = obj.to_csv(_path, *args, **kwargs) elif _path.endswith(".json"): ret = obj.to_json(_path, *args, **kwargs) elif _path.endswith(".parquet"): ret = obj.to_parquet(_path, *args, **kwargs) elif _path.endswith((".h5", ".hdf5")): ret = obj.to_hdf(_path, *args, **kwargs) elif _path.endswith((".pickle", ".pkl")): ret = obj.to_pickle(_path, *args, **kwargs) else: suffix = pathlib.Path(_path).suffix raise NotImplementedError(f"suffix \"{suffix}\" not implemented in DataFrameFormatter") if perm != no_value: cls.chmod(path, perm) return ret