from typing import Tuple, Union
[docs]def vhistograms(
df: "pandas.DataFrame", # noqa: F821
metric: str = "time",
name: str = "name",
batch_size: str = "batch_size",
voc_size: str = "voc_size",
sup_title: str = "Compares Implementations of TfIdfVectorizer",
):
"""
Histograms with error bars.
:param df: data
:param metric: metric to show
:param name: experiment name
:param batch_size: first column for the variations
:param voc_size: second column for the variations
:param sup_title: figure title
:return: axes
.. runpython::
import pandas
from onnx_extended.plotting.data import vhistograms_data
df = pandas.DataFrame(vhistograms_data())
print(df.head())
.. plot::
import pandas
from onnx_extended.plotting.data import vhistograms_data
from onnx_extended.plotting.benchmark import vhistograms
df = pandas.DataFrame(vhistograms_data())
vhistograms(df)
"""
import matplotlib.pyplot as plt
batch_sizes = list(sorted(set(df[batch_size])))
voc_sizes = list(sorted(set(df[voc_size])))
B = len(batch_sizes)
V = len(voc_sizes)
fig, ax = plt.subplots(V, B, figsize=(B * 2, V * 2), sharex=True, sharey=True)
fig.suptitle(sup_title)
for b in range(B):
for v in range(V):
aa = ax[v, b]
sub = df[
(df[batch_size] == batch_sizes[b]) & (df[voc_size] == voc_sizes[v])
][[name, metric]].set_index(name)
if 0 in sub.shape:
continue
sub[metric].plot.bar(
ax=aa, logy=True, rot=0, color=["blue", "orange", "green"]
)
if b == 0:
aa.set_ylabel(f"vocabulary={voc_sizes[v]}")
if v == V - 1:
aa.set_xlabel(f"batch_size={batch_sizes[b]}")
aa.grid(True)
if ax is None:
fig.tight_layout()
return ax
[docs]def hhistograms(
df: "pandas.DataFrame", # noqa: F821
keys: Union[str, Tuple[str, ...]] = "name",
metric: str = "average",
baseline: str = "baseline",
title: str = "Benchmark",
limit: int = 50,
ax=None,
):
"""
Histograms with error bars.
Shows the first best performances.
:param df: data
:param keys: columns to graph by
:param metric: metric to display
:param baseline: column `keys[-1]`, no matter what it should be displayed
:param title: graph title
:param limit: number of performances to display
:param ax: existing axes
:return: axes
.. runpython::
import pandas
from onnx_extended.plotting.data import hhistograms_data
df = pandas.DataFrame(hhistograms_data())
print(df.head())
.. plot::
import pandas
from onnx_extended.plotting.data import hhistograms_data
from onnx_extended.plotting.benchmark import hhistograms
df = pandas.DataFrame(hhistograms_data())
hhistograms(df, keys=("input", "name"))
"""
import pandas
if not isinstance(keys, (tuple, list)):
keys = (keys,)
dfm = (
df[list(keys) + [metric]]
.groupby(list(keys), as_index=False)
.agg(["mean", "min", "max"])
.copy()
)
if dfm.shape[1] == 3:
dfm = dfm.reset_index(drop=False)
dfm.columns = list(keys) + [metric] + ["min", "max"]
dfi = dfm.sort_values(metric).reset_index(drop=True)
base = dfi[dfi[keys[-1]].str.contains(baseline)]
not_base = dfi[~dfi[keys[-1]].str.contains(baseline)].reset_index(drop=True)
if not_base.shape[0] > limit:
not_base = not_base[:limit]
merged = pandas.concat([base, not_base], axis=0)
merged = merged.sort_values(metric).reset_index(drop=True).set_index(list(keys))
if ax is None:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1, figsize=(10, merged.shape[0] / 2))
err_min = merged[metric] - merged["min"]
err_max = merged["max"] - merged[metric]
merged[[metric]].plot.barh(
ax=ax,
title=title,
xerr=[err_min, err_max],
)
b = df.loc[df[keys[-1]] == baseline, metric].mean()
ax.plot([b, b], [0, df.shape[0]], "r--")
ax.set_xlim(
[
(df["min_exec"].min() + df[metric].min()) / 2,
(df[metric].max() + df[metric].max()) / 2,
]
)
# ax.set_xscale("log")
fig.tight_layout()
return ax