import numpy as np
import ROOT
from pyamptools.utility.general import remove_all_whitespace
label_size = 0.06
title_size = 0.07
[docs]
def calculate_chi_squared(data_hist, mc_hist):
chi2 = 0
ndf = 0
for bin in range(1, data_hist.GetNbinsX() + 1):
observed = data_hist.GetBinContent(bin)
expected = mc_hist.GetBinContent(bin)
error = data_hist.GetBinError(bin)
if error > 0:
chi2 += ((observed - expected) ** 2) / (error ** 2)
ndf += 1
# Reduce by the number of parameters (usually the number of bins)
reduced_chi2 = chi2 / ndf if ndf > 0 else float('inf')
return reduced_chi2
[docs]
def define_if_not_observed(df, function, OBSERVED_FORMULAE, name, columns):
"""
If requested function has not observed, define it and return new name
If requested function has been observed, return the name associated with the observed function
Args:
df: RDataFrame
function: function string
OBSERVED_FORMULAE: dict of observed {function: variable_name} pairs
name: new variable name to define if not observed
columns: list of column names in RDataFrame to check if we need to Redefine
Returns:
df: RDataFrame
name: string
"""
if function in OBSERVED_FORMULAE: # check if function observed
old_name = OBSERVED_FORMULAE[function]
return df, old_name
else:
OBSERVED_FORMULAE[function] = name
df = df.Redefine(name, function) if name in columns else df.Define(name, function) # check if name already exists
return df, name
[docs]
def book_histogram(df, HISTS_TO_BOOK, columns):
"""
Book histograms for a given type (data, bkgnd, accMC, genMC)
Args:
HISTS_TO_BOOK: dict of histograms to book, with associated function call, binning scheme, draw options
columns: list of column names in RDataFrame to check if we need to Redefine
Returns:
BOOKED_HISTOGRAMS: list of booked Histos
DRAW_OPTIONS: list of draw options
"""
## ALL HISTOGRAMS FOR A SOURCE FILES
BOOKED_HISTOGRAMS = []
OBSERVED_FORMULAE = {} # Track observed functions matching to defined variable names: {function: variable_name}
DRAW_OPTIONS = []
# Booking hists are lazy! Book all of them first, then run filling/Drawing
for hname, booked_hist in HISTS_TO_BOOK.items():
histIs1D = len(booked_hist) == 7
if histIs1D:
xname, xfunction, title, nx_bins, x_min, x_max, drawOptions = booked_hist
xfunction = remove_all_whitespace(xfunction)
df, xname = define_if_not_observed(df, xfunction, OBSERVED_FORMULAE, xname, columns)
BOOKED_HISTOGRAMS.append(df.Histo1D((hname, title, nx_bins, x_min, x_max), xname, "weight"))
DRAW_OPTIONS.append(drawOptions)
else: # assume 2D
xname, xfunction, title, nx_bins, x_min, x_max, yname, yfunction, ny_bins, y_min, y_max, drawOptions = booked_hist
xfunction, yfunction = remove_all_whitespace(xfunction), remove_all_whitespace(yfunction)
df, xname = define_if_not_observed(df, xfunction, OBSERVED_FORMULAE, xname, columns)
df, yname = define_if_not_observed(df, yfunction, OBSERVED_FORMULAE, yname, columns)
BOOKED_HISTOGRAMS.append(df.Histo2D((hname, title, nx_bins, x_min, x_max, ny_bins, y_min, y_max), xname, yname, "weight"))
DRAW_OPTIONS.append(drawOptions)
return BOOKED_HISTOGRAMS, DRAW_OPTIONS
[docs]
def turn_on_specifc_waveset(plotGen, results, waveset, verbose=True):
"""
Turn on a specific waveset which is an semicolon separated list of amplitude names.
If waveset = all, then turn on all amplitudes
Example:
wavesets = 'resAmp1;resAmp2' # Will turn on only resAmp1 and resAmp2.
AmpTools uses Reaction::Sum::Amp naming structure for amplitudes.
The example will turn on all amplitudes with 'resAmp1' or 'resAmp2' anywhere in their name
Args:
plotGen: PlotGenerator
results: FitResults
waveset: string
verbose: bool
"""
keepAllAmps = waveset == "all"
reactionList = results.reactionList() # vector<string>
sums = plotGen.uniqueSums() # vector<string>
amps = plotGen.uniqueAmplitudes() # vector<string>
# Initialize a map of matching amplitude names to their index
amp_map = {}
if verbose:
print(f" >> Plotting waveset: {waveset}")
waves = waveset.split(";")
if keepAllAmps:
print(f" >> Keeping all amplitudes: {amps}")
_waves = amps if keepAllAmps else waves
for wave in _waves:
amp_map[wave] = -1 # Placeholder
# Re-enable all amplitudes in all reactions
for reaction in reactionList:
plotGen.enableReaction(reaction)
# Re-enable all sums
for i in range(len(sums)):
plotGen.enableSum(i)
# Map index of your requested amplitudes
for i, amp in enumerate(amps):
if amp in amp_map or keepAllAmps:
amp_map[amp] = i
for k, v in amp_map.items():
if v == -1:
print(f" >> WARNING: Amplitude {k} not found in fit results. Exiting...")
exit()
# Turn on only the requested amplitudes
if keepAllAmps:
for i in range(len(amps)):
plotGen.enableAmp(i)
else:
for i in range(len(amps)):
plotGen.disableAmp(i)
for i in amp_map.values():
plotGen.enableAmp(i)
[docs]
def draw_histograms(
results,
hist_output_name,
particles,
HISTS_TO_BOOK,
wavesets="all",
stack_background=False,
plot_acc_corrected=False,
plot_chi2=False,
output_format="pdf"
):
"""
Draw histograms from a FitResults object. Histograms are booked using the book_histogram() function that uses macros to compute
kinematic quantities to plot. Booked histograms are lazily evaluated / filled with RDataFrame.
Args:
results (FitResults): FitResults object
hist_output_name (str): Output file name, do not include file type
particles (List[str]): List of particles in reaction
HISTS_TO_BOOK (Dict[str, List]): Dictionary of histograms to book. See book_histogram() for details
wavesets (str): Space separated list of wavesets to turn on. Wavesets are semi-colon ; separated list of amplitudes. "all" turns on all waves.
stack_background (bool): Stack backgrounds instead of subtracting them
plot_acc_corrected (bool): Also plot acceptance corrected data, compare to weighted genmc
plot_chi2 (bool): Plot chi2 values on the plot
output_format (str): Output file format, "pdf" or "png"
Returns:
None, dumps a pdf file based on hist_output_name
"""
output_format = output_format.lower()
assert output_format in ["pdf", "png"], "Output format must be 'pdf' or 'png'"
THStack = ROOT.THStack
TCanvas = ROOT.TCanvas
plotGen = ROOT.PlotGenerator(results)
cfgInfo = plotGen.cfgInfo()
print("Loaded data into PlotGenerator...")
assert "." not in hist_output_name, "Do not include file type in the output name ( -o flag )"
N_BOOKED_HISTS = len(HISTS_TO_BOOK)
N_PARTICLES = len(particles)
kData, kBkgnd, kGenMC, kAccMC = plotGen.kData, plotGen.kBkgnd, plotGen.kGenMC, plotGen.kAccMC
# kNumTypes = plotGen.kNumTypes
kColors = {kData: ROOT.kBlack, kBkgnd: ROOT.kRed - 9, kAccMC: ROOT.kGreen - 8, kGenMC: ROOT.kAzure - 4} # for the 4 data sources
reactionNames = list(results.reactionList())
zlm_polAngles = [float(cfgInfo.amplitudeList(reactionName, "", "").at(0).factors().at(0).at(5)) for reactionName in reactionNames]
### FOR EACH WAVESET, PLOT THE HISTOGRAMS ###
wavesets = wavesets.split(" ")
for waveset in wavesets:
turn_on_specifc_waveset(plotGen, results, waveset)
HISTOGRAM_STORAGE = {} # {type: [hist1, hist2, ...]}
DRAW_OPT_STORAGE = {}
for srctype in [kData, kBkgnd, kGenMC, kAccMC]:
########### LOAD THE DATA ###########
# Reaction: { Variable: [Values] } }
value_map = {}
for reactionName, zlm_polAngle in zip(reactionNames, zlm_polAngles):
_value_map = plotGen.projected_values([reactionName], srctype, N_PARTICLES)
_value_map = dict(_value_map[srctype])
n_entries = len(_value_map['EnP0'])
_value_map["POLANGLE"] = np.full(n_entries, zlm_polAngle, dtype=np.float32)
for k, v in _value_map.items():
if k not in value_map:
value_map[k] = []
value_map[k].extend(v)
value_map = {k: np.array(v, dtype=np.float32) for k, v in value_map.items()}
df = ROOT.RDF.FromNumpy(value_map) # NOTE: This is a zero-copy operation
columns = df.GetColumnNames()
######### ADD NEW COLUMNS DATA FOR NICER CALCULATIONS #########
df = df.Define("GLUEXTARGET", "std::vector<float> p{0.0, 0.0, 0.0, 0.938272}; return p;")
for i, particle in enumerate(particles):
cmd = f"std::vector<float> p{{ PxP{i}, PyP{i}, PzP{i}, EnP{i} }}; return p;"
df = df.Define(f"{particle}", cmd)
# print(df.Describe())
######### BOOK HISTOGRAMS #########
BOOKED_HISTOGRAMS, DRAW_OPTIONS = book_histogram(df, HISTS_TO_BOOK, columns)
HISTOGRAM_STORAGE[srctype] = BOOKED_HISTOGRAMS
DRAW_OPT_STORAGE[srctype] = DRAW_OPTIONS
###########################################################
### NOW CONFIGURE HOW YOU WANT TO DRAW THE HISTOGRAMS ####
### HERE IS AN EXAMPLE... BUT IMPOSSIBLE TO MAKE GENERIC #
nrows = int(np.floor(np.sqrt(len(HISTS_TO_BOOK))))
ncols = int(np.ceil(len(HISTS_TO_BOOK) / nrows))
# ncols = int(np.ceil(np.sqrt(len(HISTS_TO_BOOK))))
# nrows = ncols
canvas = TCanvas("canvas", "canvas", ncols*400, nrows*400)
canvas.Clear()
canvas.Divide(ncols, nrows)
if plot_acc_corrected:
canvas_gen = TCanvas("canvas_gen", "canvas_gen", ncols*400, nrows*400)
canvas_gen.Clear()
canvas_gen.Divide(ncols, nrows)
corrected_data_hists = []
efficiency_hists = []
# Turn off some axes if unused
# for ihist in range(N_BOOKED_HISTS, ncols*nrows):
# canvas.cd(ihist + 1)
# ROOT.gPad.SetFrameLineWidth(0)
# ROOT.gPad.SetBorderSize(0)
# ROOT.gPad.SetTickx(0)
# ROOT.gPad.SetTicky(0)
# ROOT.gPad.SetFillColor(0)
latex = ROOT.TLatex()
latex.SetNDC()
latex.SetTextSize(0.07)
latex.SetTextColor(ROOT.kRed)
_waveset = "_".join(waveset.split(";"))
output_name = hist_output_name + f"_{_waveset}"
# canvas.Print(f"{output_name}.pdf[")
stacks = []
for ihist in range(N_BOOKED_HISTS):
data_hist = HISTOGRAM_STORAGE[kData][ihist]
data_hist.SetMarkerStyle(ROOT.kFullCircle)
data_hist.SetMarkerSize(1.0)
data_hist.GetXaxis().SetNdivisions(505) # 5 primary divisions, 0 secondary, 5 tertiary
data_hist.GetYaxis().SetNdivisions(505)
# Set font sizes for data histogram
data_hist.GetXaxis().SetLabelSize(label_size)
data_hist.GetXaxis().SetTitleSize(title_size)
data_hist.GetYaxis().SetLabelSize(label_size)
data_hist.GetYaxis().SetTitleSize(title_size)
bkgnd_hist = HISTOGRAM_STORAGE[kBkgnd][ihist]
accmc_hist = HISTOGRAM_STORAGE[kAccMC][ihist]
accmc_hist.SetFillColorAlpha(kColors[kAccMC], 0.9)
accmc_hist.SetLineWidth(0)
accmc_hist.GetXaxis().SetNdivisions(505) # 5 primary divisions, 0 secondary, 5 tertiary
accmc_hist.GetYaxis().SetNdivisions(505)
# Set font sizes for accmc histogram
accmc_hist.GetXaxis().SetLabelSize(label_size)
accmc_hist.GetXaxis().SetTitleSize(title_size)
accmc_hist.GetYaxis().SetLabelSize(label_size)
accmc_hist.GetYaxis().SetTitleSize(title_size)
data_hist.Sumw2()
bkgnd_hist.Sumw2()
accmc_hist.Sumw2()
if plot_acc_corrected:
genmc_hist = HISTOGRAM_STORAGE[kGenMC][ihist]
genmc_hist.Sumw2()
genmc_hist.SetFillColorAlpha(kColors[kGenMC], 0.9)
genmc_hist.SetLineWidth(0)
genmc_hist.GetXaxis().SetNdivisions(505) # 5 primary divisions
genmc_hist.GetYaxis().SetNdivisions(505)
# Set font sizes for genmc histogram
genmc_hist.GetXaxis().SetLabelSize(label_size)
genmc_hist.GetXaxis().SetTitleSize(title_size)
genmc_hist.GetYaxis().SetLabelSize(label_size)
genmc_hist.GetYaxis().SetTitleSize(title_size)
corrected_data_hists.append(data_hist.Clone())
corrected_data_hists[-1].SetMarkerStyle(ROOT.kFullCircle)
corrected_data_hists[-1].SetMarkerSize(1.0)
corrected_data_hists[-1].GetXaxis().SetNdivisions(505) # 5 primary divisions
corrected_data_hists[-1].GetYaxis().SetNdivisions(505)
# Set font sizes for corrected data histogram
corrected_data_hists[-1].GetXaxis().SetLabelSize(label_size)
corrected_data_hists[-1].GetXaxis().SetTitleSize(title_size)
corrected_data_hists[-1].GetYaxis().SetLabelSize(label_size)
corrected_data_hists[-1].GetYaxis().SetTitleSize(title_size)
corrected_data_hists[-1].Sumw2()
canvas.cd(ihist + 1)
if not stack_background: # SUBTRACT BACKGROUND
data_hist.Add(bkgnd_hist.GetPtr(), -1)
if plot_acc_corrected:
corrected_data_hists[-1].Add(bkgnd_hist.GetPtr(), -1)
# data_hist.Draw("E") # Draw first to set labels and y-limits
# data_hist.GetYaxis().SetLabelOffset(0.01)
# data_hist.GetYaxis().SetTitleOffset(1.70)
# accmc_hist.Draw("HIST SAME")
accmc_hist.Draw("HIST")
data_hist.Draw("E SAME")
accmc_hist.GetYaxis().SetLabelOffset(0.01)
accmc_hist.GetYaxis().SetTitleOffset(1.70)
accmc_hist.SetMinimum(0)
max_y = 1.2 * max(data_hist.GetMaximum(), accmc_hist.GetMaximum())
accmc_hist.SetMaximum(max_y)
# chi2 = data_hist.Chi2Test(accmc_hist.GetPtr(), "CHI2/NDF")
chi2 = calculate_chi_squared(data_hist, accmc_hist)
if plot_chi2:
latex.DrawLatex(0.25, 0.87, f"#chi^{{2}}/bin = {chi2:.1f}")
if plot_acc_corrected: # Overlay acceptance corrected data only if bkgnd subtracted
canvas_gen.cd(ihist + 1)
efficiency_hists.append(accmc_hist.Clone())
efficiency_hists[-1].Divide(genmc_hist.GetPtr())
# set bins where there is very little accmc data as it will lead to large errs in efficiency
for bin in range(1, efficiency_hists[-1].GetNbinsX() + 1):
if accmc_hist.GetBinContent(bin) < 20:
efficiency_hists[-1].SetBinContent(bin, 0)
corrected_data_hists[-1].Divide(efficiency_hists[-1])
genmc_hist.Draw("HIST")
genmc_hist.GetYaxis().SetLabelOffset(0.01)
genmc_hist.GetYaxis().SetTitleOffset(1.70)
corrected_data_hists[-1].Draw("E SAME")
genmc_hist.SetMinimum(0)
max_y = 1.2 * max(corrected_data_hists[-1].GetMaximum(), genmc_hist.GetMaximum())
genmc_hist.SetMaximum(max_y)
chi2_gen = calculate_chi_squared(corrected_data_hists[-1], genmc_hist)
if plot_chi2:
latex.DrawLatex(0.25, 0.87, f"#chi^{{2}}/bin = {chi2_gen:.1f}")
else: # STACK BACKGROUND
stacks.append(THStack("stack", ""))
for booked_hist, srctype in zip([bkgnd_hist, accmc_hist], [kBkgnd, kAccMC]):
booked_hist.SetFillColorAlpha(kColors[srctype], 0.9)
booked_hist.SetLineWidth(0)
hist_ptr = booked_hist.GetPtr()
stacks[-1].Add(hist_ptr)
stacks[-1].Draw("HIST")
data_hist.Draw("E SAME")
stacks[-1].SetMinimum(0)
# Set font sizes for stacked histogram
stacks[-1].GetXaxis().SetLabelSize(label_size)
stacks[-1].GetXaxis().SetTitleSize(title_size)
stacks[-1].GetXaxis().SetNdivisions(505)
stacks[-1].GetYaxis().SetLabelSize(label_size)
stacks[-1].GetYaxis().SetTitleSize(title_size)
stacks[-1].GetYaxis().SetNdivisions(505)
stacks[-1].GetYaxis().SetLabelOffset(0.01)
stacks[-1].GetYaxis().SetTitleOffset(1.70)
max_y = 1.2 * max(data_hist.GetMaximum(), stacks[-1].GetStack().Last().GetMaximum())
stacks[-1].SetMaximum(max_y)
chi2_stack = calculate_chi_squared(data_hist, stacks[-1].GetStack().Last())
if plot_chi2:
latex.DrawLatex(0.25, 0.87, f"#chi^{{2}}/bin = {chi2_stack:.1f}")
canvas.Print(f"{output_name}.{output_format}")
if plot_acc_corrected:
canvas_gen.Print(f"{output_name}_acc_corrected.{output_format}")
# canvas.Print(f"{output_name}.pdf")
# canvas.Print(f"{output_name}.pdf]")
# THStack is drawn on TCanvas. Deleting TCanvas (which normally happens when it goes out of scope)
# before THStack will lead to improper deallocation. Also deleting elements of stacks in a for loop
# does not work. The entire object needs to be deleted
del stacks
del canvas