Source code for law.contrib.pyarrow.formatter
# coding: utf-8
"""
PyArrow target formatters.
"""
from __future__ import annotations
__all__ = ["ParquetFormatter", "ParquetTableFormatter"]
import pathlib
from law.target.formatter import Formatter
from law.target.file import FileSystemFileTarget, get_path
from law.logger import get_logger
from law.util import no_value
from law._types import Any
logger = get_logger(__name__)
[docs]
class ParquetFormatter(Formatter):
name = "parquet"
@classmethod
def accepts(cls, path: str | pathlib.Path | FileSystemFileTarget, mode: str) -> bool:
return get_path(path).endswith((".parquet", ".parq"))
@classmethod
def load(cls, path: str | pathlib.Path | FileSystemFileTarget, *args, **kwargs) -> Any:
import pyarrow.parquet as pq # type: ignore[import-untyped, import-not-found]
return pq.ParquetFile(get_path(path), *args, **kwargs)
[docs]
class ParquetTableFormatter(Formatter):
name = "parquet_table"
@classmethod
def accepts(cls, path: str | pathlib.Path | FileSystemFileTarget, mode: str) -> bool:
return get_path(path).endswith((".parquet", ".parq"))
@classmethod
def load(cls, path: str | pathlib.Path | FileSystemFileTarget, *args, **kwargs) -> Any:
import pyarrow.parquet as pq # type: ignore[import-untyped, import-not-found]
return pq.read_table(get_path(path), *args, **kwargs)
@classmethod
def dump(
cls,
path: str | pathlib.Path | FileSystemFileTarget,
obj: Any,
*args,
**kwargs,
) -> Any:
import pyarrow.parquet as pq # type: ignore[import-untyped, import-not-found]
perm = kwargs.pop("perm", no_value)
ret = pq.write_table(obj, get_path(path), *args, **kwargs)
if perm != no_value:
cls.chmod(path, perm)
return ret