Source code for pyamptools.utility.plotgen_utils

import numpy as np
import ROOT
from pyamptools.utility.general import remove_all_whitespace

label_size = 0.06
title_size = 0.07

[docs] def calculate_chi_squared(data_hist, mc_hist): chi2 = 0 ndf = 0 for bin in range(1, data_hist.GetNbinsX() + 1): observed = data_hist.GetBinContent(bin) expected = mc_hist.GetBinContent(bin) error = data_hist.GetBinError(bin) if error > 0: chi2 += ((observed - expected) ** 2) / (error ** 2) ndf += 1 # Reduce by the number of parameters (usually the number of bins) reduced_chi2 = chi2 / ndf if ndf > 0 else float('inf') return reduced_chi2
[docs] def define_if_not_observed(df, function, OBSERVED_FORMULAE, name, columns): """ If requested function has not observed, define it and return new name If requested function has been observed, return the name associated with the observed function Args: df: RDataFrame function: function string OBSERVED_FORMULAE: dict of observed {function: variable_name} pairs name: new variable name to define if not observed columns: list of column names in RDataFrame to check if we need to Redefine Returns: df: RDataFrame name: string """ if function in OBSERVED_FORMULAE: # check if function observed old_name = OBSERVED_FORMULAE[function] return df, old_name else: OBSERVED_FORMULAE[function] = name df = df.Redefine(name, function) if name in columns else df.Define(name, function) # check if name already exists return df, name
[docs] def book_histogram(df, HISTS_TO_BOOK, columns): """ Book histograms for a given type (data, bkgnd, accMC, genMC) Args: HISTS_TO_BOOK: dict of histograms to book, with associated function call, binning scheme, draw options columns: list of column names in RDataFrame to check if we need to Redefine Returns: BOOKED_HISTOGRAMS: list of booked Histos DRAW_OPTIONS: list of draw options """ ## ALL HISTOGRAMS FOR A SOURCE FILES BOOKED_HISTOGRAMS = [] OBSERVED_FORMULAE = {} # Track observed functions matching to defined variable names: {function: variable_name} DRAW_OPTIONS = [] # Booking hists are lazy! Book all of them first, then run filling/Drawing for hname, booked_hist in HISTS_TO_BOOK.items(): histIs1D = len(booked_hist) == 7 if histIs1D: xname, xfunction, title, nx_bins, x_min, x_max, drawOptions = booked_hist xfunction = remove_all_whitespace(xfunction) df, xname = define_if_not_observed(df, xfunction, OBSERVED_FORMULAE, xname, columns) BOOKED_HISTOGRAMS.append(df.Histo1D((hname, title, nx_bins, x_min, x_max), xname, "weight")) DRAW_OPTIONS.append(drawOptions) else: # assume 2D xname, xfunction, title, nx_bins, x_min, x_max, yname, yfunction, ny_bins, y_min, y_max, drawOptions = booked_hist xfunction, yfunction = remove_all_whitespace(xfunction), remove_all_whitespace(yfunction) df, xname = define_if_not_observed(df, xfunction, OBSERVED_FORMULAE, xname, columns) df, yname = define_if_not_observed(df, yfunction, OBSERVED_FORMULAE, yname, columns) BOOKED_HISTOGRAMS.append(df.Histo2D((hname, title, nx_bins, x_min, x_max, ny_bins, y_min, y_max), xname, yname, "weight")) DRAW_OPTIONS.append(drawOptions) return BOOKED_HISTOGRAMS, DRAW_OPTIONS
[docs] def turn_on_specifc_waveset(plotGen, results, waveset, verbose=True): """ Turn on a specific waveset which is an semicolon separated list of amplitude names. If waveset = all, then turn on all amplitudes Example: wavesets = 'resAmp1;resAmp2' # Will turn on only resAmp1 and resAmp2. AmpTools uses Reaction::Sum::Amp naming structure for amplitudes. The example will turn on all amplitudes with 'resAmp1' or 'resAmp2' anywhere in their name Args: plotGen: PlotGenerator results: FitResults waveset: string verbose: bool """ keepAllAmps = waveset == "all" reactionList = results.reactionList() # vector<string> sums = plotGen.uniqueSums() # vector<string> amps = plotGen.uniqueAmplitudes() # vector<string> # Initialize a map of matching amplitude names to their index amp_map = {} if verbose: print(f" >> Plotting waveset: {waveset}") waves = waveset.split(";") if keepAllAmps: print(f" >> Keeping all amplitudes: {amps}") _waves = amps if keepAllAmps else waves for wave in _waves: amp_map[wave] = -1 # Placeholder # Re-enable all amplitudes in all reactions for reaction in reactionList: plotGen.enableReaction(reaction) # Re-enable all sums for i in range(len(sums)): plotGen.enableSum(i) # Map index of your requested amplitudes for i, amp in enumerate(amps): if amp in amp_map or keepAllAmps: amp_map[amp] = i for k, v in amp_map.items(): if v == -1: print(f" >> WARNING: Amplitude {k} not found in fit results. Exiting...") exit() # Turn on only the requested amplitudes if keepAllAmps: for i in range(len(amps)): plotGen.enableAmp(i) else: for i in range(len(amps)): plotGen.disableAmp(i) for i in amp_map.values(): plotGen.enableAmp(i)
[docs] def draw_histograms( results, hist_output_name, particles, HISTS_TO_BOOK, wavesets="all", stack_background=False, plot_acc_corrected=False, plot_chi2=False, output_format="pdf" ): """ Draw histograms from a FitResults object. Histograms are booked using the book_histogram() function that uses macros to compute kinematic quantities to plot. Booked histograms are lazily evaluated / filled with RDataFrame. Args: results (FitResults): FitResults object hist_output_name (str): Output file name, do not include file type particles (List[str]): List of particles in reaction HISTS_TO_BOOK (Dict[str, List]): Dictionary of histograms to book. See book_histogram() for details wavesets (str): Space separated list of wavesets to turn on. Wavesets are semi-colon ; separated list of amplitudes. "all" turns on all waves. stack_background (bool): Stack backgrounds instead of subtracting them plot_acc_corrected (bool): Also plot acceptance corrected data, compare to weighted genmc plot_chi2 (bool): Plot chi2 values on the plot output_format (str): Output file format, "pdf" or "png" Returns: None, dumps a pdf file based on hist_output_name """ output_format = output_format.lower() assert output_format in ["pdf", "png"], "Output format must be 'pdf' or 'png'" THStack = ROOT.THStack TCanvas = ROOT.TCanvas plotGen = ROOT.PlotGenerator(results) cfgInfo = plotGen.cfgInfo() print("Loaded data into PlotGenerator...") assert "." not in hist_output_name, "Do not include file type in the output name ( -o flag )" N_BOOKED_HISTS = len(HISTS_TO_BOOK) N_PARTICLES = len(particles) kData, kBkgnd, kGenMC, kAccMC = plotGen.kData, plotGen.kBkgnd, plotGen.kGenMC, plotGen.kAccMC # kNumTypes = plotGen.kNumTypes kColors = {kData: ROOT.kBlack, kBkgnd: ROOT.kRed - 9, kAccMC: ROOT.kGreen - 8, kGenMC: ROOT.kAzure - 4} # for the 4 data sources reactionNames = list(results.reactionList()) zlm_polAngles = [float(cfgInfo.amplitudeList(reactionName, "", "").at(0).factors().at(0).at(5)) for reactionName in reactionNames] ### FOR EACH WAVESET, PLOT THE HISTOGRAMS ### wavesets = wavesets.split(" ") for waveset in wavesets: turn_on_specifc_waveset(plotGen, results, waveset) HISTOGRAM_STORAGE = {} # {type: [hist1, hist2, ...]} DRAW_OPT_STORAGE = {} for srctype in [kData, kBkgnd, kGenMC, kAccMC]: ########### LOAD THE DATA ########### # Reaction: { Variable: [Values] } } value_map = {} for reactionName, zlm_polAngle in zip(reactionNames, zlm_polAngles): _value_map = plotGen.projected_values([reactionName], srctype, N_PARTICLES) _value_map = dict(_value_map[srctype]) n_entries = len(_value_map['EnP0']) _value_map["POLANGLE"] = np.full(n_entries, zlm_polAngle, dtype=np.float32) for k, v in _value_map.items(): if k not in value_map: value_map[k] = [] value_map[k].extend(v) value_map = {k: np.array(v, dtype=np.float32) for k, v in value_map.items()} df = ROOT.RDF.FromNumpy(value_map) # NOTE: This is a zero-copy operation columns = df.GetColumnNames() ######### ADD NEW COLUMNS DATA FOR NICER CALCULATIONS ######### df = df.Define("GLUEXTARGET", "std::vector<float> p{0.0, 0.0, 0.0, 0.938272}; return p;") for i, particle in enumerate(particles): cmd = f"std::vector<float> p{{ PxP{i}, PyP{i}, PzP{i}, EnP{i} }}; return p;" df = df.Define(f"{particle}", cmd) # print(df.Describe()) ######### BOOK HISTOGRAMS ######### BOOKED_HISTOGRAMS, DRAW_OPTIONS = book_histogram(df, HISTS_TO_BOOK, columns) HISTOGRAM_STORAGE[srctype] = BOOKED_HISTOGRAMS DRAW_OPT_STORAGE[srctype] = DRAW_OPTIONS ########################################################### ### NOW CONFIGURE HOW YOU WANT TO DRAW THE HISTOGRAMS #### ### HERE IS AN EXAMPLE... BUT IMPOSSIBLE TO MAKE GENERIC # nrows = int(np.floor(np.sqrt(len(HISTS_TO_BOOK)))) ncols = int(np.ceil(len(HISTS_TO_BOOK) / nrows)) # ncols = int(np.ceil(np.sqrt(len(HISTS_TO_BOOK)))) # nrows = ncols canvas = TCanvas("canvas", "canvas", ncols*400, nrows*400) canvas.Clear() canvas.Divide(ncols, nrows) if plot_acc_corrected: canvas_gen = TCanvas("canvas_gen", "canvas_gen", ncols*400, nrows*400) canvas_gen.Clear() canvas_gen.Divide(ncols, nrows) corrected_data_hists = [] efficiency_hists = [] # Turn off some axes if unused # for ihist in range(N_BOOKED_HISTS, ncols*nrows): # canvas.cd(ihist + 1) # ROOT.gPad.SetFrameLineWidth(0) # ROOT.gPad.SetBorderSize(0) # ROOT.gPad.SetTickx(0) # ROOT.gPad.SetTicky(0) # ROOT.gPad.SetFillColor(0) latex = ROOT.TLatex() latex.SetNDC() latex.SetTextSize(0.07) latex.SetTextColor(ROOT.kRed) _waveset = "_".join(waveset.split(";")) output_name = hist_output_name + f"_{_waveset}" # canvas.Print(f"{output_name}.pdf[") stacks = [] for ihist in range(N_BOOKED_HISTS): data_hist = HISTOGRAM_STORAGE[kData][ihist] data_hist.SetMarkerStyle(ROOT.kFullCircle) data_hist.SetMarkerSize(1.0) data_hist.GetXaxis().SetNdivisions(505) # 5 primary divisions, 0 secondary, 5 tertiary data_hist.GetYaxis().SetNdivisions(505) # Set font sizes for data histogram data_hist.GetXaxis().SetLabelSize(label_size) data_hist.GetXaxis().SetTitleSize(title_size) data_hist.GetYaxis().SetLabelSize(label_size) data_hist.GetYaxis().SetTitleSize(title_size) bkgnd_hist = HISTOGRAM_STORAGE[kBkgnd][ihist] accmc_hist = HISTOGRAM_STORAGE[kAccMC][ihist] accmc_hist.SetFillColorAlpha(kColors[kAccMC], 0.9) accmc_hist.SetLineWidth(0) accmc_hist.GetXaxis().SetNdivisions(505) # 5 primary divisions, 0 secondary, 5 tertiary accmc_hist.GetYaxis().SetNdivisions(505) # Set font sizes for accmc histogram accmc_hist.GetXaxis().SetLabelSize(label_size) accmc_hist.GetXaxis().SetTitleSize(title_size) accmc_hist.GetYaxis().SetLabelSize(label_size) accmc_hist.GetYaxis().SetTitleSize(title_size) data_hist.Sumw2() bkgnd_hist.Sumw2() accmc_hist.Sumw2() if plot_acc_corrected: genmc_hist = HISTOGRAM_STORAGE[kGenMC][ihist] genmc_hist.Sumw2() genmc_hist.SetFillColorAlpha(kColors[kGenMC], 0.9) genmc_hist.SetLineWidth(0) genmc_hist.GetXaxis().SetNdivisions(505) # 5 primary divisions genmc_hist.GetYaxis().SetNdivisions(505) # Set font sizes for genmc histogram genmc_hist.GetXaxis().SetLabelSize(label_size) genmc_hist.GetXaxis().SetTitleSize(title_size) genmc_hist.GetYaxis().SetLabelSize(label_size) genmc_hist.GetYaxis().SetTitleSize(title_size) corrected_data_hists.append(data_hist.Clone()) corrected_data_hists[-1].SetMarkerStyle(ROOT.kFullCircle) corrected_data_hists[-1].SetMarkerSize(1.0) corrected_data_hists[-1].GetXaxis().SetNdivisions(505) # 5 primary divisions corrected_data_hists[-1].GetYaxis().SetNdivisions(505) # Set font sizes for corrected data histogram corrected_data_hists[-1].GetXaxis().SetLabelSize(label_size) corrected_data_hists[-1].GetXaxis().SetTitleSize(title_size) corrected_data_hists[-1].GetYaxis().SetLabelSize(label_size) corrected_data_hists[-1].GetYaxis().SetTitleSize(title_size) corrected_data_hists[-1].Sumw2() canvas.cd(ihist + 1) if not stack_background: # SUBTRACT BACKGROUND data_hist.Add(bkgnd_hist.GetPtr(), -1) if plot_acc_corrected: corrected_data_hists[-1].Add(bkgnd_hist.GetPtr(), -1) # data_hist.Draw("E") # Draw first to set labels and y-limits # data_hist.GetYaxis().SetLabelOffset(0.01) # data_hist.GetYaxis().SetTitleOffset(1.70) # accmc_hist.Draw("HIST SAME") accmc_hist.Draw("HIST") data_hist.Draw("E SAME") accmc_hist.GetYaxis().SetLabelOffset(0.01) accmc_hist.GetYaxis().SetTitleOffset(1.70) accmc_hist.SetMinimum(0) max_y = 1.2 * max(data_hist.GetMaximum(), accmc_hist.GetMaximum()) accmc_hist.SetMaximum(max_y) # chi2 = data_hist.Chi2Test(accmc_hist.GetPtr(), "CHI2/NDF") chi2 = calculate_chi_squared(data_hist, accmc_hist) if plot_chi2: latex.DrawLatex(0.25, 0.87, f"#chi^{{2}}/bin = {chi2:.1f}") if plot_acc_corrected: # Overlay acceptance corrected data only if bkgnd subtracted canvas_gen.cd(ihist + 1) efficiency_hists.append(accmc_hist.Clone()) efficiency_hists[-1].Divide(genmc_hist.GetPtr()) # set bins where there is very little accmc data as it will lead to large errs in efficiency for bin in range(1, efficiency_hists[-1].GetNbinsX() + 1): if accmc_hist.GetBinContent(bin) < 20: efficiency_hists[-1].SetBinContent(bin, 0) corrected_data_hists[-1].Divide(efficiency_hists[-1]) genmc_hist.Draw("HIST") genmc_hist.GetYaxis().SetLabelOffset(0.01) genmc_hist.GetYaxis().SetTitleOffset(1.70) corrected_data_hists[-1].Draw("E SAME") genmc_hist.SetMinimum(0) max_y = 1.2 * max(corrected_data_hists[-1].GetMaximum(), genmc_hist.GetMaximum()) genmc_hist.SetMaximum(max_y) chi2_gen = calculate_chi_squared(corrected_data_hists[-1], genmc_hist) if plot_chi2: latex.DrawLatex(0.25, 0.87, f"#chi^{{2}}/bin = {chi2_gen:.1f}") else: # STACK BACKGROUND stacks.append(THStack("stack", "")) for booked_hist, srctype in zip([bkgnd_hist, accmc_hist], [kBkgnd, kAccMC]): booked_hist.SetFillColorAlpha(kColors[srctype], 0.9) booked_hist.SetLineWidth(0) hist_ptr = booked_hist.GetPtr() stacks[-1].Add(hist_ptr) stacks[-1].Draw("HIST") data_hist.Draw("E SAME") stacks[-1].SetMinimum(0) # Set font sizes for stacked histogram stacks[-1].GetXaxis().SetLabelSize(label_size) stacks[-1].GetXaxis().SetTitleSize(title_size) stacks[-1].GetXaxis().SetNdivisions(505) stacks[-1].GetYaxis().SetLabelSize(label_size) stacks[-1].GetYaxis().SetTitleSize(title_size) stacks[-1].GetYaxis().SetNdivisions(505) stacks[-1].GetYaxis().SetLabelOffset(0.01) stacks[-1].GetYaxis().SetTitleOffset(1.70) max_y = 1.2 * max(data_hist.GetMaximum(), stacks[-1].GetStack().Last().GetMaximum()) stacks[-1].SetMaximum(max_y) chi2_stack = calculate_chi_squared(data_hist, stacks[-1].GetStack().Last()) if plot_chi2: latex.DrawLatex(0.25, 0.87, f"#chi^{{2}}/bin = {chi2_stack:.1f}") canvas.Print(f"{output_name}.{output_format}") if plot_acc_corrected: canvas_gen.Print(f"{output_name}_acc_corrected.{output_format}") # canvas.Print(f"{output_name}.pdf") # canvas.Print(f"{output_name}.pdf]") # THStack is drawn on TCanvas. Deleting TCanvas (which normally happens when it goes out of scope) # before THStack will lead to improper deallocation. Also deleting elements of stacks in a for loop # does not work. The entire object needs to be deleted del stacks del canvas