Jpp 20.0.0-rc.7
the software that should make you happy
Loading...
Searching...
No Matches
JTrialFactorsFromCondNLLR.cc
Go to the documentation of this file.
1#include <string>
2#include <iostream>
3#include <unistd.h> //getopt
4#include <filesystem>
5#include <vector>
6#include <map>
7#include <exception>
8#include <algorithm>
9
10#include "TROOT.h"
11#include "TFile.h"
12#include "TF1.h"
13#include "TH1D.h"
14#include "TH2D.h"
15#include "TGraph.h"
16#include "TMath.h"
17
19#include "Jeep/JParser.hh"
20
21
22/**
23 * \file
24 * Application that goes from the TH2D of NLLRs of multiple candidates, generate by
25 * JGen2_condNLLR, and computes the trial factor for their joint analysis.
26 * You can also duplicate candidates with duplicatesPerCandidate (for testing).
27 *
28 * \author fvazquez
29 */
30int main(int argc, char **argv)
31{
32 using namespace std;
33 using namespace JPP;
34 namespace fs = std::filesystem;
35
36 typedef JAbstractHistogram<double> histogram_type;
37
38 fs::path outputFilepath;
39 fs::path candidateNllrListFilepath;
40 size_t duplicatesPerCandidate;
41 histogram_type X;
42 int debug;
43
44 try {
45
46 JParser<> zap;
47
48 zap['o'] = make_field(outputFilepath, "output file with histograms");
49 zap['i'] = make_field(candidateNllrListFilepath, "file with list of H2D NLLR files to use, each one an output from JPseudoExperimentCondNLLR");
50 zap['n'] = make_field(duplicatesPerCandidate, "number of times to duplicate candidates") = 1;
51 zap['x'] = make_field(X, "x-axis of -log(p) histogram") = histogram_type(80, 0., +8);
52 zap['d'] = make_field(debug) = 1;
53
54 zap(argc, argv);
55 }
56 catch(const exception& error) {
57 FATAL(error.what() << endl);
58 }
59
60
61 // Read list of files to open
62 vector<fs::path> candidateNllrFilepath_vec;
63 ifstream file( candidateNllrListFilepath );
64 string line;
65 //if (file.is_open()) {
66 if (true) {
67 while (getline(file, line)) {
68 if ( fs::is_regular_file(line) ) {
69 candidateNllrFilepath_vec.push_back(line);
70 } else {
71 cout << " Skipping file '" << line << "' : it does not exist" << endl;
72 }
73 }
74 file.close();
75 }
76 else {
77 cerr << "Unable to open file: " << candidateNllrListFilepath << endl;
78 }
79 size_t numberOfCandidates = candidateNllrFilepath_vec.size();
80 size_t numberOfCandidates_eff = numberOfCandidates * duplicatesPerCandidate;
81
82 // Prepare output objects
83 size_t nbins_pT = X.getNumberOfBins(); // Note that these are actually -log10(pT) values, to help with histogram presentation
84 size_t min_pT = X.getLowerLimit();
85 size_t max_pT = X.getUpperLimit();
86 TFile outfile(outputFilepath.c_str(), "recreate");
87 TH1D h1d_pT_rcdf_bestof( "h1d_pT_rcdf_bestofcandidate", "Distribution of p_{local} over complete dataset (best 1st period candidate); -log_{10}(p_{local}); Reverse CDF",
88 nbins_pT, min_pT, max_pT);
89 TH1D h1d_pT_rcdf_constr( "h1d_pT_rcdf_constrcandidate", "Distribution of p_{local} over complete dataset (best filtered candidate); -log_{10}(p_{local}); Reverse CDF",
90 nbins_pT, min_pT, max_pT);
91 TH1D h1d_pT_trialfactor_bestof( "h1d_pT_trialfactor_bestofcandidate", "Trial factor for p_{local} over complete dataset (best 1st period candidate); -log_{10}(p_{local}); Trial factor",
92 nbins_pT, min_pT, max_pT);
93 TH1D h1d_pT_trialfactor_constr( "h1d_pT_trialfactor_constrcandidate", "Trial factor for p_{local} over complete dataset (best filtered candidate); -log_{10}(p_{local}); Trial factor",
94 nbins_pT, min_pT, max_pT);
95
96 for (size_t pT_bin = 1; pT_bin != nbins_pT+1; ++pT_bin) {
97 h1d_pT_rcdf_bestof.SetBinContent(pT_bin, 0.); // Will be filled out with sum terms
98 h1d_pT_rcdf_constr.SetBinContent(pT_bin, 1.); // Will be filled out with product terms
99 }
100
101
102 // Loop over candidates
103 double numberOfCandidates_constr_eff = 0;
104 cout << "Looping over " << numberOfCandidates << " candidates, duplicated " << duplicatesPerCandidate << " times" << endl;
105 for (size_t i_cand = 0; i_cand != numberOfCandidates; ++i_cand) {
106 // Load inputs
107 TFile infile( ( candidateNllrFilepath_vec[i_cand] ).c_str(), "read");
108 TH2D* h2d_nllr = infile.Get<TH2D>("h2d_nllr");
109 TH2D* h2d_nllr_constr = infile.Get<TH2D>("h2d_nllr_constr");
110
111 size_t nbins_L1 = h2d_nllr->GetNbinsX();
112 size_t nbins_LT = h2d_nllr->GetNbinsY();
113 TH1D* h1d_LT = h2d_nllr->ProjectionY();
114 h1d_LT->GetXaxis()->SetRange(0,nbins_LT+1); // So cumulative includes under/overflow
115 TH1D* h1d_LT_rcdf = (TH1D*) h1d_LT->GetCumulative(false);
116 h1d_LT->Delete();
117
118
119 // *** Fill out "Selected" distribution contribution ***
120 // Assume h2d_nllr axes have binning starting at zero, that it represents a 2D pdf (i.e. integral == 1),
121 // This would work differently if they were actual functions, or if the histograms didn't start at zero...
122
123 TH1D* h1d_L1 = h2d_nllr->ProjectionX();
124 h1d_L1->GetXaxis()->SetRange(0,nbins_L1+1); // So cumulative includes under/overflow
125 TH1D* h1d_L1_cdf = (TH1D*) h1d_L1->GetCumulative(); // We use the fact that hist_cumul.Bin(n) = hist.Bin(1)+ ... hist.Bin(n)
126 h1d_L1->Delete();
127
128 // Prepare the projections of NLLR_all (LT) into histograms, per value of NLLR_1st (L1)
129 vector<TH1D*> h1d_LT_cdf_perL1_vec = {};
130 for (size_t L1_bin = 0; L1_bin != nbins_L1+1; ++L1_bin) {
131 TH1D* h1d_LT_L1slice = h2d_nllr->ProjectionY( ("_py_" + to_string(L1_bin)).c_str(), L1_bin, L1_bin ) ;
132 h1d_LT_cdf_perL1_vec.push_back( (TH1D*) h1d_LT_L1slice->GetCumulative() );
133 h1d_LT_L1slice->Delete();
134 if (h1d_LT_cdf_perL1_vec.back()->GetBinContent(nbins_LT)>0) {
135 h1d_LT_cdf_perL1_vec.back()->Scale(1./h1d_LT_cdf_perL1_vec.back()->GetBinContent(nbins_LT));
136 } // else { what? Replace all bins by 0.5? }
137 }
138
139 // Compute the contribution to the final CDF from this candidate (boosted by Duplicates term)
140 TGraph gpT_rcdf_bestof_sumcontr;
141 for (size_t LT_bin = 1; LT_bin != nbins_LT+1; ++LT_bin) {
142 double rcdf_sumcontr = 1./numberOfCandidates - TMath::Power( h1d_L1_cdf->GetBinContent(0), int(numberOfCandidates_eff) )/numberOfCandidates * h1d_LT_cdf_perL1_vec[0]->GetBinContent(LT_bin-1);
143 for (size_t L1_bin = 1; L1_bin != nbins_L1+1; ++L1_bin) {
144 rcdf_sumcontr -= ( TMath::Power( h1d_L1_cdf->GetBinContent(L1_bin), int(numberOfCandidates_eff) ) - TMath::Power( h1d_L1_cdf->GetBinContent(L1_bin-1), int(numberOfCandidates_eff)) )/numberOfCandidates * h1d_LT_cdf_perL1_vec[L1_bin]->GetBinContent(LT_bin-1) ;
145 }
146 double pT = h1d_LT_rcdf->GetBinContent(LT_bin);
147 double neglog10pT = ( pT > 0 ? -TMath::Log10(pT) : 100);
148 gpT_rcdf_bestof_sumcontr.AddPoint( neglog10pT, rcdf_sumcontr );
149 }
150 //TODO: Sanitize TGraph against multiple points in a row with same X value
151
152 // Fill final histogram with reverse CDF contribution
153 for (size_t pT_bin = 1; pT_bin != nbins_pT+1; ++pT_bin) {
154 double rcdf_sumcontr = 0;
155 double neglog10pT = h1d_pT_rcdf_bestof.GetBinLowEdge(pT_bin);
156 if (pT_bin == 1) {
157 rcdf_sumcontr = 1./numberOfCandidates;
158 } else if ( neglog10pT <= gpT_rcdf_bestof_sumcontr.GetPointX(nbins_LT-1) ) {
159 rcdf_sumcontr = gpT_rcdf_bestof_sumcontr.Eval( neglog10pT ); //TODO: Do better than Eval
160 }
161 h1d_pT_rcdf_bestof.AddBinContent(pT_bin, rcdf_sumcontr);
162 }
163
164 // Clean-up
165 h1d_L1_cdf->Delete();
166 for (size_t L1_bin = 0; L1_bin != nbins_L1+1; ++L1_bin) {
167 h1d_LT_cdf_perL1_vec[L1_bin]->Delete();
168 }
169
170
171 // *** Fill out "Filtered" distribution contribution ***
172 // Assume h2d_nllr_constr has same weighting as h2d_nllr, where h2d_nllr->Integral() == 1.
173 // TODO : Double-check calculations for last bin...
174
175 TH1D* h1d_LT_constr = h2d_nllr_constr->ProjectionY();
176 h1d_LT_constr->GetXaxis()->SetRange(0,nbins_LT+1); // So cumulative includes under/overflow
177 double P_passfilter = h1d_LT_constr->Integral(0, nbins_LT+1);
178 TH1D* h1d_LT_cdf_constr = (TH1D*) h1d_LT_constr->GetCumulative();
179 h1d_LT_cdf_constr->Scale( 1./P_passfilter );
180 h1d_LT_constr->Delete();
181 numberOfCandidates_constr_eff += P_passfilter * duplicatesPerCandidate;
182
183 // Compute the contribution to the final CDF from this candidate (boosted by Duplicates term)
184 TGraph gpT_cdf_constr_prodcontr;
185 for (size_t LT_bin = 1; LT_bin != nbins_LT+1; ++LT_bin) {
186 double cdf_prodcontr = TMath::Power( (1.-P_passfilter) + P_passfilter * h1d_LT_cdf_constr->GetBinContent(LT_bin-1), int(duplicatesPerCandidate)) ;
187 double pT = h1d_LT_rcdf->GetBinContent(LT_bin);
188 double neglog10pT = ( pT > 0 ? -TMath::Log10(pT) : 100);
189 gpT_cdf_constr_prodcontr.AddPoint( neglog10pT, cdf_prodcontr );
190 }
191 //TODO: Sanitize TGraph against multiple points in a row with same X value
192
193 // Fill final histogram with CDF contribution (to be turned into a reverse CDF at the end)
194 for (size_t pT_bin = 1; pT_bin != nbins_pT+1; ++pT_bin) {
195 double cdf_prodcontr = 1.;
196 double neglog10pT = h1d_pT_rcdf_constr.GetBinLowEdge(pT_bin);
197 if (pT_bin == 1) {
198 cdf_prodcontr = 0;
199 } else if ( neglog10pT <= gpT_cdf_constr_prodcontr.GetPointX(nbins_LT-1) ) {
200 cdf_prodcontr = gpT_cdf_constr_prodcontr.Eval( neglog10pT ); //TODO: Do better than Eval
201 }
202 h1d_pT_rcdf_constr.SetBinContent(pT_bin,
203 h1d_pT_rcdf_constr.GetBinContent(pT_bin) * cdf_prodcontr);
204 }
205
206 // Clean-up
207 h1d_LT_cdf_constr->Delete();
208
209 h2d_nllr->Delete();
210 h2d_nllr_constr->Delete();
211
212 } // End loop on candidates
213
214
215 // Turn output histograms from a CDF into a reverse CDF (p-values),
216 // unset errors, and fill out trial factors
217 for (size_t pT_bin = 1; pT_bin != nbins_pT+1; ++pT_bin) {
218 h1d_pT_rcdf_bestof.SetBinError(pT_bin,0);
219 h1d_pT_rcdf_constr.SetBinContent(pT_bin, 1- h1d_pT_rcdf_constr.GetBinContent(pT_bin) );
220 h1d_pT_rcdf_constr.SetBinError(pT_bin,0);
221
222 h1d_pT_trialfactor_bestof.SetBinContent(pT_bin, h1d_pT_rcdf_bestof.GetBinContent(pT_bin) / TMath::Power(10, - h1d_pT_rcdf_bestof.GetBinLowEdge(pT_bin) ) );
223 h1d_pT_trialfactor_bestof.SetBinError(pT_bin,0);
224 h1d_pT_trialfactor_constr.SetBinContent(pT_bin, h1d_pT_rcdf_constr.GetBinContent(pT_bin) / TMath::Power(10, - h1d_pT_rcdf_constr.GetBinLowEdge(pT_bin) ) );
225 h1d_pT_trialfactor_constr.SetBinError(pT_bin,0);
226 }
227 h1d_pT_rcdf_bestof.SetStats(0);
228 h1d_pT_rcdf_constr.SetStats(0);
229 h1d_pT_trialfactor_bestof.SetStats(0);
230 h1d_pT_trialfactor_constr.SetStats(0);
231
232 // Make naive trial factor functions
233 TF1 f1_pT_trialfactor_all("f1_pT_trialfactor_allcandidates", ( "(1 - TMath::Power(1-TMath::Power(10,-x), " + to_string(numberOfCandidates_eff) + " ) ) / TMath::Power(10,-x)" ).c_str(), min_pT, max_pT);
234 TF1 f1_pT_trialfactor_constr("f1_pT_trialfactor_constrcandidates", ( "(1 - TMath::Power(1-TMath::Power(10,-x), " + to_string(numberOfCandidates_constr_eff) + " ) ) / TMath::Power(10,-x)" ).c_str(), min_pT, max_pT);
235 f1_pT_trialfactor_all.SetLineWidth(1);
236 f1_pT_trialfactor_all.SetLineColor(1);
237 f1_pT_trialfactor_constr.SetLineWidth(1);
238 f1_pT_trialfactor_constr.SetLineColor(1);
239
240 // Save results
241 outfile.cd();
242 h1d_pT_rcdf_bestof.Write();
243 h1d_pT_rcdf_constr.Write();
244 h1d_pT_trialfactor_bestof.Write();
245 h1d_pT_trialfactor_constr.Write();
246 f1_pT_trialfactor_all.Write();
247 f1_pT_trialfactor_constr.Write();
248 outfile.Write();
249 outfile.Close();
250
251
252}
253
#define FATAL(A)
Definition JMessage.hh:67
int debug
debug level
Definition JSirene.cc:72
Utility class to parse command line options.
#define make_field(A,...)
macro to convert parameter to JParserTemplateElement object
Definition JParser.hh:2142
int main(int argc, char **argv)
Utility class to parse command line options.
Definition JParser.hh:1698
This name space includes all other name spaces (except KM3NETDAQ, KM3NET and ANTARES).
Simple data structure for histogram binning.