import numpy as np
import numba as nb
import matplotlib.pyplot as plt
from iminuit import Minuit
from scipy.stats import norm
import boost_histogram as bh In Bohm and Zech, NIMA 748 (2014) 1-6, the scaled Poisson distribution is discussed, which is an approximate distribution for sums of weights. This distribution is used in fits to histograms which are filled with weighted samples.
I demonstrate here that a fit based on the SPD performs better than a least-squares fit, which effectively uses a normal distribution instead of the SPD to describe random fluctuations. I run repeated simulations in which weighted samples are generated. They are filled into a generalized histogram that keeps track of the variance of the weights. The cost function for the fit is simple: all bins in the histogram have the same constant expectation. The cost is computed with the least-squares method and the SPD method, respectively.
I measure the bias and the variance of the results in repeated toy experiments. The SPD-based cost function yields a smaller bias and the estimated uncertainty better reflects the true variance of the fit results.
@nb.njit
def cost_lsq(w, wvar, mu):
r = 0.0
for i in range(len(w)):
if wvar[i] > 0:
r += (w[i] - mu) ** 2 / wvar[i]
return r
@nb.njit
def cost_ml(w, wvar, mu):
# Bohm and Zech, NIMA 748 (2014) 1-6
r = 0.0
for i in range(len(w)):
if wvar[i] > 0 and w[i] * mu > 0:
s_inv = w[i] / wvar[i]
mu_prime = mu * s_inv
n_prime = w[i] * s_inv
r += mu_prime - n_prime * np.log(mu_prime)
return 2 * rnmc = 1000
def run(cost, w_mu, w_sigma_rel, mu_pts, bins):
expected_mu = w_mu * mu_pts / bins
rng = np.random.default_rng(seed=1)
fitted_mu = []
fitted_mu_var = []
h = bh.Histogram(bh.axis.Regular(bins, 0, 1), storage=bh.storage.Weight())
for imc in range(nmc):
h.reset()
n = rng.poisson(mu_pts)
x = rng.uniform(size=n)
wi = rng.normal(w_mu, w_sigma_rel * w_mu, size=n)
h.fill(x, weight=wi)
m = Minuit(lambda mu: cost(h.values(), h.variances(), mu), mu=expected_mu)
m.limits["mu"] = (0, None)
m.migrad()
if m.valid:
fitted_mu.append(m.values[0])
fitted_mu_var.append(m.errors[0] ** 2)
else:
print("error", imc)
bias = np.mean(fitted_mu) - expected_mu
sample_var = np.var(fitted_mu, ddof=1)
mean_fitted_var = np.mean(fitted_mu_var)
return expected_mu, bias, mean_fitted_var, sample_var
results = tuple(map(lambda cost: run(cost, 1.0, 0.1, 100, 5), (cost_lsq, cost_ml)))
results = np.array(results)
labels = "LSQ", "ML"
for label, (expected_mu, bias, mean_fitted_var, sample_var ) in zip(labels, results):
print(label)
print(f" relative bias = {bias / expected_mu:.3f}")
print(f" average estimated variance {mean_fitted_var ** 0.5:.3f}, true variance {sample_var ** 0.5:.3f}")LSQ
relative bias = -0.042
average estimated variance 1.966, true variance 2.158
ML
relative bias = -0.001
average estimated variance 2.008, true variance 2.035