Simple parallelization in Jupyter Notebooks

The usual modules concurrent.futures and multiprocessing do not work correctly in notebooks on all platforms (notably on OSX there are issues). What does work is joblib, which also offers a simple way to run embarrassingly parallel jobs.

from joblib import Parallel, delayed
import numpy as np

# silly function that simulates numerical workload
def run(n):
    r = 0
    rng = np.random.default_rng(1)
    for itry in range(1000):
        x = rng.normal(size=n)
        r += np.sum(x)
    return r

n = np.full((4,), 100000)
y = Parallel(8)(delayed(run)(ni) for ni in n)

import time
import matplotlib.pyplot as plt

njob = np.arange(1, 9)
n = (10 ** np.arange(2, 6, 0.5)).astype(int)
data = np.empty((len(njob), len(n)))
for i, njobi in enumerate(njob):
    for j, nj in enumerate(n):
        t = time.perf_counter()
        n_per_job = nj // njobi
        Parallel(njobi)(delayed(run)(ni) for ni in np.full(njobi, n_per_job))
        t = time.perf_counter() - t
        data[i, j] = t
        print(njobi, nj, t)

for i, njobi in enumerate(njob):
    plt.plot(n, data[0] / data[i], label=f"{njobi}")
plt.loglog()
plt.legend();