Measuring CPU performance with a vector sum

The example compares the time spend in computing the sum of all coefficients of a matrix when the function walks through the coefficients by rows or by columns.

Vector Sum

from tqdm import tqdm
import numpy
import matplotlib.pyplot as plt
from pandas import DataFrame
from teachcompute.ext_test_case import measure_time, unit_test_going
from teachcompute.validation.cpu._validation import vector_sum_array as vector_sum

obs = []
dims = [500, 700, 800, 900, 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 2000]
if unit_test_going():
    dims = [10, 20, 30]
for dim in tqdm(dims):
    values = numpy.ones((dim, dim), dtype=numpy.float32).ravel()
    diff = abs(vector_sum(dim, values, True) - dim**2)

    res = measure_time(lambda: vector_sum(dim, values, True), max_time=0.5)

    obs.append(
        dict(
            dim=dim,
            size=values.size,
            time=res["average"],
            direction="rows",
            time_per_element=res["average"] / dim**2,
            diff=diff,
        )
    )

    diff = abs(vector_sum(dim, values, False) - dim**2)
    res = measure_time(lambda: vector_sum(dim, values, False), max_time=0.5)

    obs.append(
        dict(
            dim=dim,
            size=values.size,
            time=res["average"],
            direction="cols",
            time_per_element=res["average"] / dim**2,
            diff=diff,
        )
    )


df = DataFrame(obs)
piv = df.pivot(index="dim", columns="direction", values="time_per_element")
print(piv)
  0%|          | 0/14 [00:00<?, ?it/s]
  7%|▋         | 1/14 [00:01<00:14,  1.08s/it]
 14%|█▍        | 2/14 [00:02<00:14,  1.23s/it]
 21%|██▏       | 3/14 [00:03<00:14,  1.28s/it]
 29%|██▊       | 4/14 [00:04<00:12,  1.26s/it]
 36%|███▌      | 5/14 [00:06<00:11,  1.25s/it]
 43%|████▎     | 6/14 [00:07<00:10,  1.26s/it]
 50%|█████     | 7/14 [00:08<00:09,  1.29s/it]
 57%|█████▋    | 8/14 [00:10<00:07,  1.27s/it]
 64%|██████▍   | 9/14 [00:11<00:06,  1.29s/it]
 71%|███████▏  | 10/14 [00:12<00:05,  1.28s/it]
 79%|███████▊  | 11/14 [00:13<00:03,  1.24s/it]
 86%|████████▌ | 12/14 [00:15<00:02,  1.25s/it]
 93%|█████████▎| 13/14 [00:16<00:01,  1.21s/it]
100%|██████████| 14/14 [00:17<00:00,  1.21s/it]
100%|██████████| 14/14 [00:17<00:00,  1.24s/it]
direction          cols          rows
dim
500        1.125726e-09  1.278846e-09
700        1.488420e-09  1.148282e-09
800        1.140735e-09  1.351736e-09
900        1.458899e-09  1.121927e-09
1000       1.511043e-09  1.473863e-09
1100       1.953898e-09  1.377475e-09
1200       1.528615e-09  1.157154e-09
1300       2.381678e-09  1.219445e-09
1400       1.898999e-09  1.525794e-09
1500       2.736938e-09  1.757406e-09
1600       4.575156e-09  1.142311e-09
1700       6.412007e-09  1.280492e-09
1800       5.874604e-09  1.197260e-09
2000       6.639695e-09  1.258228e-09

Plots

piv_diff = df.pivot(index="dim", columns="direction", values="diff")
piv_time = df.pivot(index="dim", columns="direction", values="time")

fig, ax = plt.subplots(1, 3, figsize=(12, 6))
piv.plot(ax=ax[0], logx=True, title="Comparison between two summation")
piv_diff.plot(ax=ax[1], logx=True, logy=True, title="Summation errors")
piv_time.plot(ax=ax[2], logx=True, logy=True, title="Total time")
fig.tight_layout()
fig.savefig("plot_bench_cpu_vector_sum.png")
Comparison between two summation, Summation errors, Total time
/home/xadupre/.local/lib/python3.10/site-packages/pandas/plotting/_matplotlib/core.py:822: UserWarning: Data has no positive values, and therefore cannot be log-scaled.
  labels = axis.get_majorticklabels() + axis.get_minorticklabels()

The summation by rows is much faster as expected. That explains why it is usually more efficient to transpose the first matrix before a matrix multiplication.

Total running time of the script: (0 minutes 19.234 seconds)

Gallery generated by Sphinx-Gallery