Source code for pandas_streaming.df.dataframe_io
import io
import os
import zipfile
import pandas
import numpy
[docs]
def to_zip(df, zipfilename, zname="df.csv", **kwargs):
"""
Saves a :epkg:`Dataframe` into a :epkg:`zip` file.
It can be read by :meth:`read_zip`.
:param df: :epkg:`dataframe` or :class:`numpy.ndarray`
:param zipfilename: a :class:`zipfile.ZipFile` or a filename
:param zname: a filename in the zipfile
:param kwargs: parameters for :meth:`pandas.DataFrame.to_csv` or
:func:`numpy.save`
:return: zipfilename
.. exref::
:title: Saves and reads a dataframe in a zip file
:tag: dataframe
This shows an example on how to save and read a
:class:`pandas.DataFrame` directly into a zip file.
.. runpython::
:showcode:
import pandas
from pandas_streaming.df import to_zip, read_zip
df = pandas.DataFrame([dict(a=1, b="e"),
dict(b="f", a=5.7)])
name = "dfs.zip"
to_zip(df, name, encoding="utf-8", index=False)
df2 = read_zip(name, encoding="utf-8")
print(df2)
.. exref::
:title: Saves and reads a numpy array in a zip file
:tag: array
This shows an example on how to save and read a
:class:`numpy.ndarray` directly into a zip file.
.. runpython::
:showcode:
import numpy
from pandas_streaming.df import to_zip, read_zip
arr = numpy.array([[0.5, 1.5], [0.4, 1.6]])
name = "dfsa.zip"
to_zip(arr, name, 'arr.npy')
arr2 = read_zip(name, 'arr.npy')
print(arr2)
"""
if isinstance(df, pandas.DataFrame):
stb = io.StringIO()
ext = os.path.splitext(zname)[-1]
if ext == ".npy":
raise ValueError( # pragma: no cover
"Extension '.npy' cannot be used to save a dataframe."
)
df.to_csv(stb, **kwargs)
elif isinstance(df, numpy.ndarray):
stb = io.BytesIO()
ext = os.path.splitext(zname)[-1]
if ext != ".npy":
raise ValueError( # pragma: no cover
"Extension '.npy' is required when saving a numpy array."
)
numpy.save(stb, df, **kwargs)
else:
raise TypeError(f"Type not handled {type(df)}") # pragma: no cover
text = stb.getvalue()
if isinstance(zipfilename, str):
ext = os.path.splitext(zipfilename)[-1]
if ext != ".zip":
raise NotImplementedError( # pragma: no cover
f"Only zip file are implemented not '{ext}'."
)
zf = zipfile.ZipFile(zipfilename, "w") # pylint: disable=R1732
close = True
elif isinstance(zipfilename, zipfile.ZipFile):
zf = zipfilename
close = False
else:
raise TypeError( # pragma: no cover
f"No implementation for type '{type(zipfilename)}'"
)
zf.writestr(zname, text)
if close:
zf.close()
[docs]
def read_zip(zipfilename, zname=None, **kwargs):
"""
Reads a :epkg:`dataframe` from a :epkg:`zip` file.
It can be saved by :meth:`to_zip`.
:param zipfilename: a :class:`zipfile.ZipFile` or a filename
:param zname: a filename in zipfile, if None, takes the first one
:param kwargs: parameters for :func:`pandas.read_csv`
:return: :class:`pandas.DataFrame` or :class:`numpy.ndarray`
"""
if isinstance(zipfilename, str):
ext = os.path.splitext(zipfilename)[-1]
if ext != ".zip":
raise NotImplementedError( # pragma: no cover
f"Only zip files are supported not '{ext}'."
)
zf = zipfile.ZipFile(zipfilename, "r") # pylint: disable=R1732
close = True
elif isinstance(zipfilename, zipfile.ZipFile):
zf = zipfilename
close = False
else:
raise TypeError( # pragma: no cover
f"No implementation for type '{type(zipfilename)}'"
)
if zname is None:
zname = zf.namelist()[0]
content = zf.read(zname)
stb = io.BytesIO(content)
ext = os.path.splitext(zname)[-1]
if ext == ".npy":
df = numpy.load(stb, **kwargs)
else:
df = pandas.read_csv(stb, **kwargs)
if close:
zf.close()
return df