import numpy as np
import numba as nb
import matplotlib.pyplot as plt
from iminuit import Minuit
from scipy.stats import norm
import boost_histogram as bh
In Bohm and Zech, NIMA 748 (2014) 1-6, the scaled Poisson distribution is discussed, which is an approximate distribution for sums of weights. This distribution is used in fits to histograms which are filled with weighted samples.
I demonstrate here that a fit based on the SPD performs better than a least-squares fit, which effectively uses a normal distribution instead of the SPD to describe random fluctuations. I run repeated simulations in which weighted samples are generated. They are filled into a generalized histogram that keeps track of the variance of the weights. The cost function for the fit is simple: all bins in the histogram have the same constant expectation. The cost is computed with the least-squares method and the SPD method, respectively.
I measure the bias and the variance of the results in repeated toy experiments. The SPD-based cost function yields a smaller bias and the estimated uncertainty better reflects the true variance of the fit results.
@nb.njit
def cost_lsq(w, wvar, mu):
= 0.0
r for i in range(len(w)):
if wvar[i] > 0:
+= (w[i] - mu) ** 2 / wvar[i]
r return r
@nb.njit
def cost_ml(w, wvar, mu):
# Bohm and Zech, NIMA 748 (2014) 1-6
= 0.0
r for i in range(len(w)):
if wvar[i] > 0 and w[i] * mu > 0:
= w[i] / wvar[i]
s_inv = mu * s_inv
mu_prime = w[i] * s_inv
n_prime += mu_prime - n_prime * np.log(mu_prime)
r return 2 * r
= 1000
nmc
def run(cost, w_mu, w_sigma_rel, mu_pts, bins):
= w_mu * mu_pts / bins
expected_mu = np.random.default_rng(seed=1)
rng = []
fitted_mu = []
fitted_mu_var = bh.Histogram(bh.axis.Regular(bins, 0, 1), storage=bh.storage.Weight())
h for imc in range(nmc):
h.reset()= rng.poisson(mu_pts)
n = rng.uniform(size=n)
x = rng.normal(w_mu, w_sigma_rel * w_mu, size=n)
wi =wi)
h.fill(x, weight= Minuit(lambda mu: cost(h.values(), h.variances(), mu), mu=expected_mu)
m "mu"] = (0, None)
m.limits[
m.migrad()if m.valid:
0])
fitted_mu.append(m.values[0] ** 2)
fitted_mu_var.append(m.errors[else:
print("error", imc)
= np.mean(fitted_mu) - expected_mu
bias = np.var(fitted_mu, ddof=1)
sample_var = np.mean(fitted_mu_var)
mean_fitted_var return expected_mu, bias, mean_fitted_var, sample_var
= tuple(map(lambda cost: run(cost, 1.0, 0.1, 100, 5), (cost_lsq, cost_ml)))
results = np.array(results)
results
= "LSQ", "ML"
labels for label, (expected_mu, bias, mean_fitted_var, sample_var ) in zip(labels, results):
print(label)
print(f" relative bias = {bias / expected_mu:.3f}")
print(f" average estimated variance {mean_fitted_var ** 0.5:.3f}, true variance {sample_var ** 0.5:.3f}")
LSQ
relative bias = -0.042
average estimated variance 1.966, true variance 2.158
ML
relative bias = -0.001
average estimated variance 2.008, true variance 2.035