~ajf/TriggerApplication/library__daq_8h_source.html

 #ifndef LIBRARY_DAQ_H

 #define LIBRARY_DAQ_H


 #pragma once


 #include "build.h"


 #include <unistd.h>

 #include <stdlib.h>

 #include <stdio.h>

 #include <string.h>

 #include <thrust/extrema.h>

 #include <limits>

 #include <limits.h>

 #include <thrust/sort.h>

 #include <thrust/device_ptr.h>


 typedef unsigned short offset_t;


 // define variable types

 #if defined __HISTOGRAM_UCHAR__

 typedef unsigned char histogram_t;

 #elif defined __HISTOGRAM_USHORT__

 typedef unsigned short histogram_t;

 #elif defined __HISTOGRAM_UINT__

 typedef unsigned int histogram_t;

 #endif


 #if defined __TIME_OF_FLIGHT_UCHAR__

 typedef unsigned char time_of_flight_t;

 #elif defined __TIME_OF_FLIGHT_USHORT__

 typedef unsigned short time_of_flight_t;

 #elif defined __TIME_OF_FLIGHT_UINT__

 typedef unsigned int time_of_flight_t;

 #endif


 // define global variables //

 double distance_between_vertices; // linear distance between test vertices

 double wall_like_distance; // distance from wall (in units of distance_between_vertices) to define wall-like events

 unsigned int time_step_size; // time binning for the trigger

 __constant__ unsigned int constant_time_step_size;

 unsigned int water_like_threshold_number_of_pmts; // number of pmts above which a trigger is possible for water-like events

 unsigned int wall_like_threshold_number_of_pmts; // number of pmts above which a trigger is possible for wall-like events

 unsigned int nhits_threshold_min, nhits_threshold_max;

 double coalesce_time; // time such that if two triggers are closer than this they are coalesced into a single trigger

 double trigger_gate_up; // duration to be saved after the trigger time

 double trigger_gate_down; // duration to be saved before the trigger time

 unsigned int max_n_hits_per_job; // max n of pmt hits per job

 double dark_rate;

 float costheta_cone_cut; // max distance between measured costheta and cerenkov costheta

 __constant__ float constant_costheta_cone_cut;

 bool select_based_on_cone; // for mode == 10, set to 0 to select based on vertices, to 1 to select based on cone

 __constant__ bool constant_select_based_on_cone;

 bool return_vertex; // return vertex position

 bool return_direction; // return cone direction

 __constant__ bool constant_return_direction;

 double detector_height; // detector height

 double detector_radius; // detector radius

 unsigned int n_PMTs; // number of pmts in the detector

 __constant__ unsigned int constant_n_PMTs;

 double * PMT_x, *PMT_y, *PMT_z; // coordinates of the pmts in the detector

 unsigned int n_test_vertices; // number of test vertices

 unsigned int n_water_like_test_vertices; // number of test vertices

 __constant__ unsigned int constant_n_test_vertices;

 __constant__ unsigned int constant_n_water_like_test_vertices;

 double * vertex_x, * vertex_y, * vertex_z; // coordinates of test vertices

 unsigned int number_of_kernel_blocks;  // number of cores to be used

 dim3 number_of_kernel_blocks_3d;

 unsigned int number_of_threads_per_block; // number of threads per core to be used

 dim3 number_of_threads_per_block_3d;

 unsigned int grid_size;  // grid = (n cores) X (n threads / core)

 offset_t time_offset;  // ns, offset to make times positive

 __constant__ offset_t constant_time_offset;

 unsigned int n_time_bins; // number of time bins

 __constant__ unsigned int constant_n_time_bins;

 unsigned int n_direction_bins_theta; // number of direction bins

 __constant__ unsigned int constant_n_direction_bins_theta;

 unsigned int n_direction_bins_phi; // number of direction bins

 __constant__ unsigned int constant_n_direction_bins_phi;

 unsigned int n_direction_bins; // number of direction bins

 __constant__ unsigned int constant_n_direction_bins;

 unsigned int n_hits; // number of input hits from the detector

 __constant__ unsigned int constant_n_hits;

 unsigned int * host_ids; // pmt id of a hit

 unsigned int *device_ids;

 texture<unsigned int, 1, cudaReadModeElementType> tex_ids;

 unsigned int * host_times;  // time of a hit

 unsigned int *device_times;

 texture<unsigned int, 1, cudaReadModeElementType> tex_times;

 // corrected tim bin of each hit (for each vertex)

 unsigned int * host_time_bin_of_hit;

 unsigned int * device_time_bin_of_hit;

 // npmts per time bin

 histogram_t * device_n_pmts_per_time_bin; // number of active pmts in a time bin

 unsigned int * host_n_pmts_per_time_bin;

 unsigned int * device_n_pmts_nhits; // number of active pmts

 unsigned int * host_n_pmts_nhits;

 unsigned int * device_n_pmts_per_time_bin_and_direction_bin; // number of active pmts in a time bin and direction bin

 float * device_dx_per_time_bin; // dx from vertex to hit in a time bin

 float * device_dy_per_time_bin; // dy from vertex to hit in a time bin

 float * device_dz_per_time_bin; // dz from vertex to hit in a time bin

 float * device_meanx_per_time_bin; // sum of x positions of chosen PMTs

 float * device_meany_per_time_bin; // sum of y positions of chosen PMTs

 float * device_meanz_per_time_bin; // sum of z positions of chosen PMTs

 //unsigned int * device_time_nhits; // trigger time

 //unsigned int * host_time_nhits;

 // tof

 double speed_light_water;

 double cerenkov_angle_water;

 float cerenkov_costheta;

 __constant__ float constant_cerenkov_costheta;

 double twopi;

 bool cylindrical_grid;

 time_of_flight_t *device_times_of_flight; // time of flight between a vertex and a pmt

 time_of_flight_t *host_times_of_flight;

 float *device_light_dx; // x distance between a vertex and a pmt

 float *host_light_dx;

 float *device_light_dy; // y distance between a vertex and a pmt

 float *host_light_dy;

 float *device_light_dz; // z distance between a vertex and a pmt

 float *host_light_dz;

 float *device_light_dr; // distance between a vertex and a pmt

 float *host_light_dr;

 float *device_PMTx; // PMT x

 float *host_PMTx;

 float *device_PMTy; // PMT y

 float *host_PMTy;

 float *device_PMTz; // PMT z

 float *host_PMTz;

 bool *device_directions_for_vertex_and_pmt; // test directions for vertex and pmt

 bool *host_directions_for_vertex_and_pmt;

 texture<time_of_flight_t, 1, cudaReadModeElementType> tex_times_of_flight;

 texture<float, 1, cudaReadModeElementType> tex_light_dx;

 texture<float, 1, cudaReadModeElementType> tex_light_dy;

 texture<float, 1, cudaReadModeElementType> tex_light_dz;

 texture<float, 1, cudaReadModeElementType> tex_light_dr;

 texture<float, 1, cudaReadModeElementType> tex_PMTx;

 texture<float, 1, cudaReadModeElementType> tex_PMTy;

 texture<float, 1, cudaReadModeElementType> tex_PMTz;

 //texture<bool, 1, cudaReadModeElementType> tex_directions_for_vertex_and_pmt;

 // triggers

 std::vector<std::pair<unsigned int,unsigned int> > candidate_trigger_pair_vertex_time;  // pair = (v, t) = (a vertex, a time at the end of the 2nd of two coalesced bins)

 std::vector<unsigned int> candidate_trigger_npmts_in_time_bin; // npmts in time bin

 std::vector<unsigned int> candidate_trigger_npmts_in_cone_in_time_bin;

 std::vector<std::pair<unsigned int,unsigned int> > trigger_pair_vertex_time;

 std::vector<unsigned int> trigger_npmts_in_time_bin;

 std::vector<unsigned int> trigger_npmts_in_cone_in_time_bin;

 std::vector<std::pair<unsigned int,unsigned int> > final_trigger_pair_vertex_time;

 std::vector<double> output_trigger_information;

 std::vector<float> candidate_trigger_meanx_per_time_bin; // sum of pmts x in time bin

 std::vector<float> candidate_trigger_meany_per_time_bin; // sum of pmts y in time bin

 std::vector<float> candidate_trigger_meanz_per_time_bin; // sum of pmts z in time bin

 std::vector<float> trigger_meanx_per_time_bin;

 std::vector<float> trigger_meany_per_time_bin;

 std::vector<float> trigger_meanz_per_time_bin;

 // C timing

 struct timeval t0;

 struct timeval t1;

 // CUDA timing

 cudaEvent_t start, stop, total_start, total_stop;

 // make output txt file for plotting?

 bool output_txt;

 unsigned int correct_mode;

 unsigned int write_output_mode;

 // find candidates

 histogram_t * host_max_number_of_pmts_in_time_bin;

 histogram_t * device_max_number_of_pmts_in_time_bin;

 unsigned int *  host_vertex_with_max_n_pmts;

 unsigned int *  device_vertex_with_max_n_pmts;

 unsigned int * device_number_of_pmts_in_cone_in_time_bin;

 unsigned int * host_max_number_of_pmts_in_cone_in_time_bin;

 unsigned int * device_max_number_of_pmts_in_cone_in_time_bin;

 float * device_meanx_for_vertex_with_max_n_pmts_per_time_bin;

 float * device_meany_for_vertex_with_max_n_pmts_per_time_bin;

 float * device_meanz_for_vertex_with_max_n_pmts_per_time_bin;

 float * host_meanx_for_vertex_with_max_n_pmts_per_time_bin;

 float * host_meany_for_vertex_with_max_n_pmts_per_time_bin;

 float * host_meanz_for_vertex_with_max_n_pmts_per_time_bin;

 // gpu properties

 int max_n_threads_per_block;

 int max_n_blocks;

 // verbosity level

 bool use_verbose;

 bool use_timing;

 // files

 std::string event_file;

 std::string detector_file;

 std::string pmts_file;

 std::string output_file;

 std::string event_file_base;

 std::string event_file_suffix;

 std::string output_file_base;

 float elapsed_parameters, elapsed_pmts, elapsed_detector, elapsed_vertices,

   elapsed_threads, elapsed_tof, elapsed_directions, elapsed_memory_tofs_dev, elapsed_memory_directions_dev, elapsed_memory_candidates_host, elapsed_tofs_copy_dev,  elapsed_directions_copy_dev,

   elapsed_input, elapsed_memory_dev, elapsed_copy_dev, elapsed_kernel_correct_times_and_get_n_pmts_per_time_bin,

   elapsed_threads_candidates, elapsed_candidates_memory_dev, elapsed_candidates_kernel,

   elapsed_candidates_copy_host, choose_candidates, elapsed_coalesce, elapsed_gates, elapsed_free, elapsed_total,

   elapsed_tofs_free, elapsed_reset, elapsed_write_output;

 unsigned int greatest_divisor;

 unsigned int the_max_time;

 unsigned int nhits_window;

 int n_events;


 __global__ void kernel_find_vertex_with_max_npmts_in_timebin(histogram_t * np, histogram_t * mnp, unsigned int * vmnp);

 __global__ void kernel_find_vertex_with_max_npmts_in_timebin(histogram_t * np, histogram_t * mnp, unsigned int * vmnp, float * meanxin, float * meanyin, float * meanzin, float * meanxout, float * meanyout, float * meanzout);

 __global__ void kernel_find_vertex_with_max_npmts_and_center_of_mass_in_timebin(histogram_t * np, histogram_t * mnp, unsigned int * vmnp, unsigned int *nc, unsigned int *mnc);

 __global__ void kernel_find_vertex_with_max_npmts_in_timebin_and_directionbin(unsigned int * np, histogram_t * mnp, unsigned int * vmnp);


 unsigned int read_number_of_input_hits();

 bool read_input();

 void print_parameters();

 void print_parameters_2d();

 void print_input();

 void print_times_of_flight();

 void print_directions();

 void print_pmts();

 bool read_detector();

 bool read_the_pmts();

 bool read_the_detector();

 void make_test_vertices();

 bool setup_threads_for_tof();

 bool setup_threads_for_tof_biparallel();

 bool setup_threads_for_tof_2d(unsigned int A, unsigned int B);

 bool setup_threads_to_find_candidates();

 bool setup_threads_nhits();

 bool read_the_input();

 bool read_the_input_ToolDAQ(std::vector<int> PMTid, std::vector<int> time);

 void allocate_tofs_memory_on_device();

 void allocate_directions_memory_on_device();

 void allocate_correct_memory_on_device();

 void allocate_correct_memory_on_device_nhits();

 void allocate_candidates_memory_on_host();

 void allocate_candidates_memory_on_device();

 void make_table_of_tofs();

 void make_table_of_directions();

 void fill_correct_memory_on_device();

 void fill_tofs_memory_on_device();

 void fill_directions_memory_on_device();

 void fill_tofs_memory_on_device_nhits();

 void coalesce_triggers();

 void separate_triggers_into_gates();

 void separate_triggers_into_gates(std::vector<int> * trigger_ns, std::vector<int> * trigger_ts, std::vector<double> * trigger_vtx_xs, std::vector<double> * trigger_vtx_ys, std::vector<double> * trigger_vtx_zs, std::vector<double> * trigger_dir_xs, std::vector<double> * trigger_dir_ys, std::vector<double> * trigger_dir_zs);

 float timedifference_msec(struct timeval t0, struct timeval t1);

 void start_c_clock();

 double stop_c_clock();

 void start_cuda_clock();

 double stop_cuda_clock();

 void start_total_cuda_clock();

 double stop_total_cuda_clock();

 unsigned int get_distance_index(unsigned int pmt_id, unsigned int vertex_block);

 unsigned int get_time_index(unsigned int hit_index, unsigned int vertex_block);

 unsigned int get_direction_index_at_pmt(unsigned int pmt_id, unsigned int vertex_index, unsigned int direction_index);

 unsigned int get_direction_index_at_angles(unsigned int iphi, unsigned int itheta);

 unsigned int get_direction_index_at_time(unsigned int time_bin, unsigned int vertex_index, unsigned int direction_index);

 __device__ unsigned int device_get_distance_index(unsigned int pmt_id, unsigned int vertex_block);

 __device__ unsigned int device_get_time_index(unsigned int hit_index, unsigned int vertex_block);

 __device__ unsigned int device_get_direction_index_at_pmt(unsigned int pmt_id, unsigned int vertex_index, unsigned int direction_index);

 __device__ unsigned int device_get_direction_index_at_angles(unsigned int iphi, unsigned int itheta);

 __device__ unsigned int device_get_direction_index_at_time(unsigned int time_bin, unsigned int vertex_index, unsigned int direction_index);

 void print_gpu_properties();

 unsigned int read_number_of_pmts();

 bool read_pmts();

 void free_event_memories();

 void free_event_memories_nhits();

 void free_global_memories();

 void copy_candidates_from_device_to_host();

 void choose_candidates_above_threshold();

 bool set_input_file_for_event(int n);

 void set_output_file();

 void set_output_file_nhits(unsigned int threshold);

 void write_output();

 void write_output_nhits(unsigned int n);

 void initialize_output();

 void initialize_output_nhits();

 float read_value_from_file(std::string paramname, std::string filename);

 void read_user_parameters();

 void read_user_parameters_nhits();

 void check_cudamalloc_float(unsigned int size);

 void check_cudamalloc_int(unsigned int size);

 void check_cudamalloc_unsigned_short(unsigned int size);

 void check_cudamalloc_unsigned_char(unsigned int size);

 void check_cudamalloc_unsigned_int(unsigned int size);

 void check_cudamalloc_bool(unsigned int size);

 void setup_threads_for_histo(unsigned int n);

 unsigned int find_greatest_divisor(unsigned int n, unsigned int max);

 void setup_threads_for_histo();

 void setup_threads_for_histo_iterated(bool last);

 void setup_threads_for_histo_per(unsigned int n);


 unsigned int read_number_of_input_hits(){


   FILE *f=fopen(event_file.c_str(), "r");

   if (f == NULL){

     printf(" [2] cannot read input file \n");

     fclose(f);

     return 0;

   }


   unsigned int n_hits = 0;


   for (char c = getc(f); c != EOF; c = getc(f))

     if (c == '\n')

       n_hits ++;


   fclose(f);

   return n_hits;


 }


 bool read_input(){


   FILE *f=fopen(event_file.c_str(), "r");


   int time;

   double timef;

   unsigned int id;

   int min = INT_MAX;

   int max = INT_MIN;

   for( unsigned int i=0; i<n_hits; i++){

     if( fscanf(f, "%d %lf", &id, &timef) != 2 ){

       printf(" [2] problem scanning hit %d from input \n", i);

       fclose(f);

       return false;

     }

     time = int(floor(timef));

     host_times[i] = time;

     host_ids[i] = id;

     if( time > max ) max = time;

     if( time < min ) min = time;

   }


   if( min < 0 ){

     for(int i=0; i<n_hits; i++){

       host_times[i] -= min;

     }

     max -= min;

   }


   the_max_time = max;


   fclose(f);

   return true;


 }


 bool read_detector(){


   FILE *f=fopen(detector_file.c_str(), "r");

   double pmt_radius;

   if( fscanf(f, "%lf %lf %lf", &detector_height, &detector_radius, &pmt_radius) != 3 ){

     printf(" [2] problem scanning detector \n");

     fclose(f);

     return false;

   }


   fclose(f);

   return true;


 }


 void print_parameters(){


   printf(" [2] n_test_vertices = %d \n", n_test_vertices);

   printf(" [2] n_water_like_test_vertices = %d \n", n_water_like_test_vertices);

   printf(" [2] n_PMTs = %d \n", n_PMTs);

   printf(" [2] number_of_kernel_blocks = %d \n", number_of_kernel_blocks);

   printf(" [2] number_of_threads_per_block = %d \n", number_of_threads_per_block);

   printf(" [2] grid size = %d -> %d \n", number_of_kernel_blocks*number_of_threads_per_block, grid_size);


 }


 void print_parameters_2d(){


   printf(" [2] n_test_vertices = %d \n", n_test_vertices);

   printf(" [2] n_water_like_test_vertices = %d \n", n_water_like_test_vertices);

   printf(" [2] n_PMTs = %d \n", n_PMTs);

   printf(" [2] number_of_kernel_blocks = (%d, %d) = %d \n", number_of_kernel_blocks_3d.x, number_of_kernel_blocks_3d.y, number_of_kernel_blocks_3d.x * number_of_kernel_blocks_3d.y);

   printf(" [2] number_of_threads_per_block = (%d, %d) = %d \n", number_of_threads_per_block_3d.x, number_of_threads_per_block_3d.y, number_of_threads_per_block_3d.x * number_of_threads_per_block_3d.y);

   printf(" [2] grid size = %d -> %d \n", number_of_kernel_blocks_3d.x*number_of_kernel_blocks_3d.y*number_of_threads_per_block_3d.x*number_of_threads_per_block_3d.y, grid_size);


 }


 void print_input(){


   for(unsigned int i=0; i<n_hits; i++)

     printf(" [2] hit %d time %d id %d \n", i, host_times[i], host_ids[i]);


 }


 void print_pmts(){


   for(unsigned int i=0; i<n_PMTs; i++)

     printf(" [2] pmt %d x %f y %f z %f  \n", i, PMT_x[i], PMT_y[i], PMT_z[i]);


 }


 void print_times_of_flight(){


   printf(" [2] times_of_flight: (vertex, PMT) \n");

   unsigned int distance_index;

   for(unsigned int iv=0; iv<n_test_vertices; iv++){

     printf(" ( ");

     for(unsigned int ip=0; ip<n_PMTs; ip++){

       distance_index = get_distance_index(ip + 1, n_PMTs*iv);

       printf(" %d ", host_times_of_flight[distance_index]);

     }

     printf(" ) \n");

   }

 }


 void print_directions(){


   printf(" [2] directions: (vertex, PMT) \n");

   for(unsigned int iv=0; iv<n_test_vertices; iv++){

     printf(" [ ");

     for(unsigned int ip=0; ip<n_PMTs; ip++){

       printf(" ( ");

       for(unsigned int id=0; id<n_direction_bins; id++){

     printf("%d ", host_directions_for_vertex_and_pmt[get_direction_index_at_pmt(ip, iv, id)]);

       }

       printf(" ) ");

     }

     printf(" ] \n");

   }

 }


 bool read_the_pmts(){


   printf(" [2] --- read pmts \n");

   n_PMTs = read_number_of_pmts();

   if( !n_PMTs ) return false;

   printf(" [2] detector contains %d PMTs \n", n_PMTs);

   PMT_x = (double *)malloc(n_PMTs*sizeof(double));

   PMT_y = (double *)malloc(n_PMTs*sizeof(double));

   PMT_z = (double *)malloc(n_PMTs*sizeof(double));

   if( !read_pmts() ) return false;

   //print_pmts();

   return true;


 }


 bool read_the_detector(){


   printf(" [2] --- read detector \n");

   if( !read_detector() ) return false;

   printf(" [2] detector height %f cm, radius %f cm \n", detector_height, detector_radius);

   return true;


 }


 void make_test_vertices(){


   printf(" [2] --- make test vertices \n");

   float semiheight = detector_height/2.;

   n_test_vertices = 0;


   if( !cylindrical_grid ){


     // 1: count number of test vertices

     for(int i=-1*semiheight; i <= semiheight; i+=distance_between_vertices) {

       for(int j=-1*detector_radius; j<=detector_radius; j+=distance_between_vertices) {

     for(int k=-1*detector_radius; k<=detector_radius; k+=distance_between_vertices) {

       if(pow(j,2)+pow(k,2) > pow(detector_radius,2))

         continue;

       n_test_vertices++;

     }

       }

     }

     vertex_x = (double *)malloc(n_test_vertices*sizeof(double));

     vertex_y = (double *)malloc(n_test_vertices*sizeof(double));

     vertex_z = (double *)malloc(n_test_vertices*sizeof(double));


     // 2: assign coordinates to test vertices

     // water-like events

     n_test_vertices = 0;

     for(int i=-1*semiheight; i <= semiheight; i+=distance_between_vertices) {

       for(int j=-1*detector_radius; j<=detector_radius; j+=distance_between_vertices) {

     for(int k=-1*detector_radius; k<=detector_radius; k+=distance_between_vertices) {


       if(

          // skip endcap region

          abs(i) > semiheight - wall_like_distance*distance_between_vertices ||

          // skip sidewall region

          pow(j,2)+pow(k,2) > pow(detector_radius - wall_like_distance*distance_between_vertices,2)

           ) continue;


       vertex_x[n_test_vertices] = j*1.;

       vertex_y[n_test_vertices] = k*1.;

       vertex_z[n_test_vertices] = i*1.;

       n_test_vertices++;

     }

       }

     }

     n_water_like_test_vertices = n_test_vertices;


     // wall-like events

     for(int i=-1*semiheight; i <= semiheight; i+=distance_between_vertices) {

       for(int j=-1*detector_radius; j<=detector_radius; j+=distance_between_vertices) {

     for(int k=-1*detector_radius; k<=detector_radius; k+=distance_between_vertices) {


       if(

          abs(i) > semiheight - wall_like_distance*distance_between_vertices ||

          pow(j,2)+pow(k,2) > pow(detector_radius - wall_like_distance*distance_between_vertices,2)

           ){


         if(pow(j,2)+pow(k,2) > pow(detector_radius,2)) continue;


         vertex_x[n_test_vertices] = j*1.;

         vertex_y[n_test_vertices] = k*1.;

         vertex_z[n_test_vertices] = i*1.;

         n_test_vertices++;

       }

     }

       }

     }


   }else{ // cylindrical grid


     int n_vertical = detector_height/distance_between_vertices;

     double distance_vertical = detector_height/n_vertical;

     int n_radial = 2.*detector_radius/distance_between_vertices;

     double distance_radial = 2.*detector_radius/n_radial;

     int n_angular;

     double distance_angular;


     printf(" [2] distance_between_vertices %f, distance_vertical %f, distance_radial %f \n",

        distance_between_vertices, distance_vertical, distance_radial);


     double the_r, the_z, the_phi;

     bool first = false; // true: add extra layer near wall

                        // false: regular spacing


     bool add_extra_layer = first;


     // 1: count number of test vertices

     the_r = detector_radius;

     while( the_r >= 0. ){

       n_angular = twopi*the_r / distance_between_vertices;

       distance_angular = twopi/n_angular;


       the_z = -semiheight;


       while( the_z <= semiheight){


     the_phi = 0.;

     while( the_phi < twopi - distance_angular/2. ){


       n_test_vertices ++;


       if( the_r == 0. ) break;

       the_phi += distance_angular;

     }


     if( add_extra_layer ){

       if( the_z + semiheight < 0.3*distance_vertical ) // only true at bottom endcap

         the_z += distance_vertical/2.;

       else if( semiheight - the_z < 0.7*distance_vertical ) // only true near top endcap

         the_z += distance_vertical/2.;

       else

         the_z += distance_vertical;

     }else{

       the_z += distance_vertical;

     }

       }

       if( first ){

     the_r -= distance_radial/2.;

     first = false;

       }

       else{

     the_r -= distance_radial;

       }

     }


     vertex_x = (double *)malloc(n_test_vertices*sizeof(double));

     vertex_y = (double *)malloc(n_test_vertices*sizeof(double));

     vertex_z = (double *)malloc(n_test_vertices*sizeof(double));


     first = add_extra_layer;

     // 2: assign coordinates to test vertices

     // water-like events

     n_test_vertices = 0;


     the_r = detector_radius;

     while( the_r >= 0. ){


       // skip sidewall region

       if(the_r <= detector_radius - wall_like_distance*distance_between_vertices ){


     n_angular = twopi*the_r / distance_between_vertices;

     distance_angular = twopi/n_angular;


     the_z = -semiheight;


     while( the_z <= semiheight){


       // skip endcap region

       if( fabs(the_z) <= semiheight - wall_like_distance*distance_between_vertices ){


         the_phi = 0.;

         while( the_phi < twopi - distance_angular/2. ){


           vertex_x[n_test_vertices] = the_r*cos(the_phi);

           vertex_y[n_test_vertices] = the_r*sin(the_phi);

           vertex_z[n_test_vertices] = the_z;

           n_test_vertices ++;


           if( the_r == 0. ) break;

           the_phi += distance_angular;

         }

       }


       if( add_extra_layer ){

         if( the_z + semiheight < 0.3*distance_vertical ) // only true at bottom endcap

           the_z += distance_vertical/2.;

         else if( semiheight - the_z < 0.7*distance_vertical ) // only true near top endcap

           the_z += distance_vertical/2.;

         else

           the_z += distance_vertical;

       }else{

         the_z += distance_vertical;

       }

     }


       }

       if( first ){

     the_r -= distance_radial/2.;

     first = false;

       }

       else{

     the_r -= distance_radial;

       }

     }


     n_water_like_test_vertices = n_test_vertices;


     first = add_extra_layer;

     // wall-like events

     the_r = detector_radius;

     while( the_r >= 0. ){


       n_angular = twopi*the_r / distance_between_vertices;

       distance_angular = twopi/n_angular;


       the_z = -semiheight;


       while( the_z <= semiheight){


     if( fabs(the_z) > semiheight - wall_like_distance*distance_between_vertices ||

         the_r > detector_radius - wall_like_distance*distance_between_vertices ){


       the_phi = 0.;

       while( the_phi < twopi - distance_angular/2. ){


         vertex_x[n_test_vertices] = the_r*cos(the_phi);

         vertex_y[n_test_vertices] = the_r*sin(the_phi);

         vertex_z[n_test_vertices] = the_z;

         n_test_vertices ++;


         if( the_r == 0. ) break;

         the_phi += distance_angular;

       }

     }

     if( add_extra_layer ){

       if( the_z + semiheight < 0.3*distance_vertical ) // only true at bottom endcap

         the_z += distance_vertical/2.;

       else if( semiheight - the_z < 0.7*distance_vertical ) // only true near top endcap

         the_z += distance_vertical/2.;

       else

         the_z += distance_vertical;

     }else{

       the_z += distance_vertical;

     }

       }

       if( first ){

     the_r -= distance_radial/2.;

     first = false;

       }

       else{

     the_r -= distance_radial;

       }

     }


   }


   printf(" [2] made %d test vertices \n", n_test_vertices);


   return;


 }


 bool setup_threads_for_tof(){


   grid_size = n_test_vertices;


   number_of_kernel_blocks = grid_size / max_n_threads_per_block + 1;

   number_of_threads_per_block = ( number_of_kernel_blocks > 1 ? max_n_threads_per_block : grid_size);


   print_parameters();


   if( number_of_threads_per_block > max_n_threads_per_block ){

     printf(" [2] warning: number_of_threads_per_block = %d cannot exceed max value %d \n", number_of_threads_per_block, max_n_threads_per_block );

     return false;

   }


   if( number_of_kernel_blocks > max_n_blocks ){

     printf(" [2] warning: number_of_kernel_blocks = %d cannot exceed max value %d \n", number_of_kernel_blocks, max_n_blocks );

     return false;

   }


   return true;

 }


 bool setup_threads_for_tof_biparallel(){


   grid_size = n_test_vertices * n_hits;


   number_of_kernel_blocks = grid_size / max_n_threads_per_block + 1;

   number_of_threads_per_block = ( number_of_kernel_blocks > 1 ? max_n_threads_per_block : grid_size);


   print_parameters();


   if( number_of_threads_per_block > max_n_threads_per_block ){

     printf(" [2] --------------------- warning: number_of_threads_per_block = %d cannot exceed max value %d \n", number_of_threads_per_block, max_n_threads_per_block );

     return false;

   }


   if( number_of_kernel_blocks > max_n_blocks ){

     printf(" [2] warning: number_of_kernel_blocks = %d cannot exceed max value %d \n", number_of_kernel_blocks, max_n_blocks );

     return false;

   }


   return true;


 }


 bool setup_threads_for_tof_2d(unsigned int A, unsigned int B){


   if( std::numeric_limits<unsigned int>::max() / B  < A ){

     printf(" [2] --------------------- warning: B = %d times A = %d cannot exceed max value %u \n", B, A, std::numeric_limits<unsigned int>::max() );

     return false;

   }


   grid_size = A * B;

   unsigned int max_n_threads_per_block_2d = sqrt(max_n_threads_per_block);


   number_of_kernel_blocks_3d.x = A / max_n_threads_per_block_2d + 1;

   number_of_kernel_blocks_3d.y = B / max_n_threads_per_block_2d + 1;


   number_of_threads_per_block_3d.x = ( number_of_kernel_blocks_3d.x > 1 ? max_n_threads_per_block_2d : A);

   number_of_threads_per_block_3d.y = ( number_of_kernel_blocks_3d.y > 1 ? max_n_threads_per_block_2d : B);


   print_parameters_2d();


   if( number_of_threads_per_block_3d.x > max_n_threads_per_block_2d ){

     printf(" [2] --------------------- warning: number_of_threads_per_block x = %d cannot exceed max value %d \n", number_of_threads_per_block_3d.x, max_n_threads_per_block_2d );

     return false;

   }


   if( number_of_threads_per_block_3d.y > max_n_threads_per_block_2d ){

     printf(" [2] --------------------- warning: number_of_threads_per_block y = %d cannot exceed max value %d \n", number_of_threads_per_block_3d.y, max_n_threads_per_block_2d );

     return false;

   }


   if( number_of_kernel_blocks_3d.x > max_n_blocks ){

     printf(" [2] warning: number_of_kernel_blocks x = %d cannot exceed max value %d \n", number_of_kernel_blocks_3d.x, max_n_blocks );

     return false;

   }


   if( number_of_kernel_blocks_3d.y > max_n_blocks ){

     printf(" [2] warning: number_of_kernel_blocks y = %d cannot exceed max value %d \n", number_of_kernel_blocks_3d.y, max_n_blocks );

     return false;

   }


   if( std::numeric_limits<int>::max() / (number_of_kernel_blocks_3d.x*number_of_kernel_blocks_3d.y)  < number_of_threads_per_block_3d.x*number_of_threads_per_block_3d.y ){

     printf(" [2] --------------------- warning: grid size cannot exceed max value %u \n", std::numeric_limits<int>::max() );

     return false;

   }


   return true;


 }


 bool setup_threads_to_find_candidates(){


   number_of_kernel_blocks = n_time_bins / max_n_threads_per_block + 1;

   number_of_threads_per_block = ( number_of_kernel_blocks > 1 ? max_n_threads_per_block : n_time_bins);


   if( number_of_threads_per_block > max_n_threads_per_block ){

     printf(" [2] warning: number_of_threads_per_block = %d cannot exceed max value %d \n", number_of_threads_per_block, max_n_threads_per_block );

     return false;

   }


   return true;

 }


 bool setup_threads_nhits(){


   number_of_kernel_blocks_3d.x = 100;

   number_of_kernel_blocks_3d.y = 1;


   number_of_threads_per_block_3d.x = 1024;

   number_of_threads_per_block_3d.y = 1;


   print_parameters_2d();


   return true;


 }


 bool read_the_input(){


   printf(" [2] --- read input \n");

   n_hits = read_number_of_input_hits();

   if( !n_hits ) return false;

   printf(" [2] input contains %d hits \n", n_hits);

   host_ids = (unsigned int *)malloc(n_hits*sizeof(unsigned int));

   host_times = (unsigned int *)malloc(n_hits*sizeof(unsigned int));

   if( !read_input() ) return false;

   //time_offset = 600.; // set to constant to match trevor running

   n_time_bins = int(floor((the_max_time + time_offset)/time_step_size))+1; // floor returns the integer below

   printf(" [2] input max_time %d, n_time_bins %d \n", the_max_time, n_time_bins);

   printf(" [2] time_offset = %d ns \n", time_offset);

   //print_input();


   checkCudaErrors( cudaMemcpyToSymbol(constant_n_time_bins, &n_time_bins, sizeof(n_time_bins)) );

   checkCudaErrors( cudaMemcpyToSymbol(constant_n_hits, &n_hits, sizeof(n_hits)) );


   return true;

 }


 bool read_the_input_ToolDAQ(std::vector<int> PMTids, std::vector<int> times, int * earliest_time){


   printf(" [2] --- read input \n");

   n_hits = PMTids.size();

   if( !n_hits ) return false;

   if( n_hits != times.size() ){

     printf(" [2] n PMT ids %d but n times %d \n", n_hits, times.size());

     return false;

   }

   printf(" [2] input contains %d hits \n", n_hits);

   host_ids = (unsigned int *)malloc(n_hits*sizeof(unsigned int));

   host_times = (unsigned int *)malloc(n_hits*sizeof(unsigned int));


   //  if( !read_input() ) return false;

   // read_input()

   {

     int min = INT_MAX;

     int max = INT_MIN;

     int time;

     for(int i=0; i<PMTids.size(); i++){

       time = int(floor(times[i]));

       host_times[i] = time;

       host_ids[i] = PMTids[i];

       //      printf(" [2] input %d PMT %d time %d \n", i, host_ids[i], host_times[i]);

       if( time > max ) max = time;

       if( time < min ) min = time;

     }

     if( min < 0 ){

       for(int i=0; i<PMTids.size(); i++){

     host_times[i] -= min;

       }

       max -= min;

       min -= min;

     }

     the_max_time = max;

     *earliest_time = min - min % time_step_size;

   }


   //time_offset = 600.; // set to constant to match trevor running

   n_time_bins = int(floor((the_max_time + time_offset)/time_step_size))+1; // floor returns the integer below

   printf(" [2] input max_time %d, n_time_bins %d \n", the_max_time, n_time_bins);

   printf(" [2] time_offset = %d ns \n", time_offset);

   //print_input();


   checkCudaErrors( cudaMemcpyToSymbol(constant_n_time_bins, &n_time_bins, sizeof(n_time_bins)) );

   checkCudaErrors( cudaMemcpyToSymbol(constant_n_hits, &n_hits, sizeof(n_hits)) );


   return true;

 }


 void allocate_tofs_memory_on_device(){


   printf(" [2] --- allocate memory tofs \n");

 #if defined __TIME_OF_FLIGHT_UCHAR__

   check_cudamalloc_unsigned_char(n_test_vertices*n_PMTs);

 #elif defined __TIME_OF_FLIGHT_USHORT__

   check_cudamalloc_unsigned_short(n_test_vertices*n_PMTs);

 #elif defined __TIME_OF_FLIGHT_UINT__

   check_cudamalloc_unsigned_int(n_test_vertices*n_PMTs);

 #endif

   checkCudaErrors(cudaMalloc((void **)&device_times_of_flight, n_test_vertices*n_PMTs*sizeof(time_of_flight_t)));


   if( correct_mode == 8 && return_direction ){

     check_cudamalloc_float(3*n_PMTs);

     checkCudaErrors(cudaMalloc((void **)&device_PMTx, n_PMTs*sizeof(float)));

     checkCudaErrors(cudaMalloc((void **)&device_PMTy, n_PMTs*sizeof(float)));

     checkCudaErrors(cudaMalloc((void **)&device_PMTz, n_PMTs*sizeof(float)));

   }

   if( correct_mode == 10 ){


     check_cudamalloc_float(3*n_test_vertices*n_PMTs);

     checkCudaErrors(cudaMalloc((void **)&device_light_dx, n_test_vertices*n_PMTs*sizeof(float)));

     checkCudaErrors(cudaMalloc((void **)&device_light_dy, n_test_vertices*n_PMTs*sizeof(float)));

     checkCudaErrors(cudaMalloc((void **)&device_light_dz, n_test_vertices*n_PMTs*sizeof(float)));

     checkCudaErrors(cudaMalloc((void **)&device_light_dr, n_test_vertices*n_PMTs*sizeof(float)));


   }

   /*

   if( n_hits*n_test_vertices > available_memory ){

     printf(" [2] cannot allocate vector of %d, available_memory %d \n", n_hits*n_test_vertices, available_memory);

     return 0;

   }

   */


   if( correct_mode == 1 ){


     unsigned int max = max_n_threads_per_block;

     greatest_divisor = find_greatest_divisor ( n_test_vertices , max);

     printf(" [2] greatest divisor of %d below %d is %d \n", n_test_vertices, max, greatest_divisor);


   }


   return;


 }


 void allocate_directions_memory_on_device(){


   printf(" [2] --- allocate memory directions \n");

   check_cudamalloc_bool( n_test_vertices*n_direction_bins*n_PMTs);

   checkCudaErrors(cudaMalloc((void **)&device_directions_for_vertex_and_pmt, n_test_vertices*n_direction_bins*n_PMTs*sizeof(bool)));

   /*

   if( n_hits*n_test_vertices > available_memory ){

     printf(" [2] cannot allocate vector of %d, available_memory %d \n", n_hits*n_test_vertices, available_memory);

     return 0;

   }

   */


   return;


 }


 void allocate_correct_memory_on_device(){


   printf(" [2] --- allocate memory \n");

   /*

   if( n_hits > available_memory ){

     printf(" [2] cannot allocate vector of %d, available_memory %d \n", n_hits, available_memory);

     return 0;

   }

   */

   check_cudamalloc_unsigned_int(n_hits);

   checkCudaErrors(cudaMalloc((void **)&device_ids, n_hits*sizeof(unsigned int)));


   check_cudamalloc_unsigned_int(n_hits);

   checkCudaErrors(cudaMalloc((void **)&device_times, n_hits*sizeof(unsigned int)));

   /*

   if( n_test_vertices*n_PMTs > available_memory ){

     printf(" [2] cannot allocate vector of %d, available_memory %d \n", n_test_vertices*n_PMTs, available_memory);

     return 0;

   }

   */


   if( correct_mode != 9 ){

     check_cudamalloc_unsigned_int(n_time_bins*n_test_vertices);

     checkCudaErrors(cudaMalloc((void **)&device_n_pmts_per_time_bin, n_time_bins*n_test_vertices*sizeof(unsigned int)));

     if( correct_mode == 8 && return_direction ){

       checkCudaErrors(cudaMalloc((void **)&device_meanx_per_time_bin, n_time_bins*n_test_vertices*sizeof(float)));

       checkCudaErrors(cudaMalloc((void **)&device_meany_per_time_bin, n_time_bins*n_test_vertices*sizeof(float)));

       checkCudaErrors(cudaMalloc((void **)&device_meanz_per_time_bin, n_time_bins*n_test_vertices*sizeof(float)));

     }

     if( correct_mode == 10 ){

       check_cudamalloc_float(3*n_time_bins*n_test_vertices);

       checkCudaErrors(cudaMalloc((void **)&device_dx_per_time_bin, n_time_bins*n_test_vertices*sizeof(float)));

       checkCudaErrors(cudaMalloc((void **)&device_dy_per_time_bin, n_time_bins*n_test_vertices*sizeof(float)));

       checkCudaErrors(cudaMalloc((void **)&device_dz_per_time_bin, n_time_bins*n_test_vertices*sizeof(float)));

       check_cudamalloc_unsigned_int(n_time_bins*n_test_vertices);

       checkCudaErrors(cudaMalloc((void **)&device_number_of_pmts_in_cone_in_time_bin, n_time_bins*n_test_vertices*sizeof(unsigned int)));

     }

   }


   if( correct_mode == 0 ){

     checkCudaErrors(cudaMemset(device_n_pmts_per_time_bin, 0, n_time_bins*n_test_vertices*sizeof(unsigned int)));

   } else if( correct_mode == 1 ){

     checkCudaErrors(cudaMemset(device_n_pmts_per_time_bin, 0, n_time_bins*n_test_vertices*sizeof(unsigned int)));


     check_cudamalloc_unsigned_int(n_hits*n_test_vertices);

     checkCudaErrors(cudaMalloc((void **)&device_time_bin_of_hit, n_hits*n_test_vertices*sizeof(unsigned int)));

     //    checkCudaErrors(cudaMemset(device_time_bin_of_hit, 0, n_hits*n_test_vertices*sizeof(unsigned int)));

   }

   else if( correct_mode == 2 ){

     check_cudamalloc_unsigned_int(n_hits*n_test_vertices);

     checkCudaErrors(cudaMalloc((void **)&device_time_bin_of_hit, n_hits*n_test_vertices*sizeof(unsigned int)));

     //checkCudaErrors(cudaMemset(device_time_bin_of_hit, 0, n_hits*n_test_vertices*sizeof(unsigned int)));


     host_time_bin_of_hit = (unsigned int *)malloc(n_hits*n_test_vertices*sizeof(unsigned int));


     host_n_pmts_per_time_bin = (unsigned int *)malloc(n_time_bins*n_test_vertices*sizeof(unsigned int));

     memset(host_n_pmts_per_time_bin, 0, n_time_bins*n_test_vertices*sizeof(unsigned int));

   } else if( correct_mode == 3 ){

     checkCudaErrors(cudaMemset(device_n_pmts_per_time_bin, 0, n_time_bins*n_test_vertices*sizeof(unsigned int)));


     check_cudamalloc_unsigned_int(n_hits*n_test_vertices);

     checkCudaErrors(cudaMalloc((void **)&device_time_bin_of_hit, n_hits*n_test_vertices*sizeof(unsigned int)));

     //checkCudaErrors(cudaMemset(device_time_bin_of_hit, 0, n_hits*n_test_vertices*sizeof(unsigned int)));

   } else if( correct_mode == 4 ){

     checkCudaErrors(cudaMemset(device_n_pmts_per_time_bin, 0, n_time_bins*n_test_vertices*sizeof(unsigned int)));


     check_cudamalloc_unsigned_int(n_hits*n_test_vertices);

     checkCudaErrors(cudaMalloc((void **)&device_time_bin_of_hit, n_hits*n_test_vertices*sizeof(unsigned int)));

   } else if( correct_mode == 5 ){

     checkCudaErrors(cudaMemset(device_n_pmts_per_time_bin, 0, n_time_bins*n_test_vertices*sizeof(unsigned int)));


     check_cudamalloc_unsigned_int(n_hits*n_test_vertices);

     checkCudaErrors(cudaMalloc((void **)&device_time_bin_of_hit, n_hits*n_test_vertices*sizeof(unsigned int)));

   } else if( correct_mode == 6 ){

     checkCudaErrors(cudaMemset(device_n_pmts_per_time_bin, 0, n_time_bins*n_test_vertices*sizeof(unsigned int)));


     check_cudamalloc_unsigned_int(n_hits*n_test_vertices);

     checkCudaErrors(cudaMalloc((void **)&device_time_bin_of_hit, n_hits*n_test_vertices*sizeof(unsigned int)));

   } else if( correct_mode == 7 ){

     checkCudaErrors(cudaMemset(device_n_pmts_per_time_bin, 0, n_time_bins*n_test_vertices*sizeof(unsigned int)));


     check_cudamalloc_unsigned_int(n_hits*n_test_vertices);

     checkCudaErrors(cudaMalloc((void **)&device_time_bin_of_hit, n_hits*n_test_vertices*sizeof(unsigned int)));

   } else if( correct_mode == 8 ){

     checkCudaErrors(cudaMemset(device_n_pmts_per_time_bin, 0, n_time_bins*n_test_vertices*sizeof(unsigned int)));

     if( return_direction ){

       checkCudaErrors(cudaMemset(device_meanx_per_time_bin, 0, n_time_bins*n_test_vertices*sizeof(float)));

       checkCudaErrors(cudaMemset(device_meany_per_time_bin, 0, n_time_bins*n_test_vertices*sizeof(float)));

       checkCudaErrors(cudaMemset(device_meanz_per_time_bin, 0, n_time_bins*n_test_vertices*sizeof(float)));

     }

   } else if( correct_mode == 9 ){


     check_cudamalloc_unsigned_int(n_time_bins*n_direction_bins*n_test_vertices);

     checkCudaErrors(cudaMalloc((void **)&device_n_pmts_per_time_bin_and_direction_bin, n_time_bins*n_direction_bins*n_test_vertices*sizeof(unsigned int)));

     checkCudaErrors(cudaMemset(device_n_pmts_per_time_bin_and_direction_bin, 0, n_time_bins*n_direction_bins*n_test_vertices*sizeof(unsigned int)));

   } else if( correct_mode == 10 ){

     checkCudaErrors(cudaMemset(device_n_pmts_per_time_bin, 0, n_time_bins*n_test_vertices*sizeof(unsigned int)));

     checkCudaErrors(cudaMemset(device_dx_per_time_bin, 0, n_time_bins*n_test_vertices*sizeof(float)));

     checkCudaErrors(cudaMemset(device_dy_per_time_bin, 0, n_time_bins*n_test_vertices*sizeof(float)));

     checkCudaErrors(cudaMemset(device_dz_per_time_bin, 0, n_time_bins*n_test_vertices*sizeof(float)));

   }


   return;


 }


 void allocate_correct_memory_on_device_nhits(){


   printf(" [2] --- allocate memory \n");

   /*

   if( n_hits > available_memory ){

     printf(" [2] cannot allocate vector of %d, available_memory %d \n", n_hits, available_memory);

     return 0;

   }

   */

   check_cudamalloc_unsigned_int(n_hits);

   checkCudaErrors(cudaMalloc((void **)&device_ids, n_hits*sizeof(unsigned int)));


   check_cudamalloc_unsigned_int(n_hits);

   checkCudaErrors(cudaMalloc((void **)&device_times, n_hits*sizeof(unsigned int)));

   /*

   if( n_test_vertices*n_PMTs > available_memory ){

     printf(" [2] cannot allocate vector of %d, available_memory %d \n", n_test_vertices*n_PMTs, available_memory);

     return 0;

   }

   */


   check_cudamalloc_unsigned_int(1);

   checkCudaErrors(cudaMalloc((void **)&device_n_pmts_nhits, 1*sizeof(unsigned int)));

   //  checkCudaErrors(cudaMalloc((void **)&device_time_nhits, (nhits_window/time_step_size + 1)*sizeof(unsigned int)));


   host_n_pmts_nhits = (unsigned int *)malloc(1*sizeof(unsigned int));

   //  host_time_nhits = (unsigned int *)malloc((nhits_window/time_step_size + 1)*sizeof(unsigned int));


   return;


 }


 void allocate_candidates_memory_on_host(){


   printf(" [2] --- allocate candidates memory on host \n");


   host_max_number_of_pmts_in_time_bin = (histogram_t *)malloc(n_time_bins*sizeof(histogram_t));

   host_vertex_with_max_n_pmts = (unsigned int *)malloc(n_time_bins*sizeof(unsigned int));


   if( correct_mode == 8 && return_direction ){

     host_meanx_for_vertex_with_max_n_pmts_per_time_bin = (float *)malloc(n_time_bins*sizeof(float));

     host_meany_for_vertex_with_max_n_pmts_per_time_bin = (float *)malloc(n_time_bins*sizeof(float));

     host_meanz_for_vertex_with_max_n_pmts_per_time_bin = (float *)malloc(n_time_bins*sizeof(float));

   }


   if( correct_mode == 10 ){

     host_max_number_of_pmts_in_cone_in_time_bin = (unsigned int *)malloc(n_time_bins*sizeof(unsigned int));

   }


   return;


 }


 void allocate_candidates_memory_on_device(){


   printf(" [2] --- allocate candidates memory on device \n");


 #if defined __HISTOGRAM_UCHAR__

   check_cudamalloc_unsigned_char(n_time_bins);

 #elif defined __HISTOGRAM_USHORT__

   check_cudamalloc_unsigned_short(n_time_bins);

 #elif defined __HISTOGRAM_UINT__

   check_cudamalloc_unsigned_int(n_time_bins);

 #endif

   checkCudaErrors(cudaMalloc((void **)&device_max_number_of_pmts_in_time_bin, n_time_bins*sizeof(histogram_t)));


   if( correct_mode == 8 && return_direction ){

     checkCudaErrors(cudaMalloc((void **)&device_meanx_for_vertex_with_max_n_pmts_per_time_bin, n_time_bins*sizeof(float)));

     checkCudaErrors(cudaMalloc((void **)&device_meany_for_vertex_with_max_n_pmts_per_time_bin, n_time_bins*sizeof(float)));

     checkCudaErrors(cudaMalloc((void **)&device_meanz_for_vertex_with_max_n_pmts_per_time_bin, n_time_bins*sizeof(float)));

   }


   check_cudamalloc_unsigned_int(n_time_bins);

   checkCudaErrors(cudaMalloc((void **)&device_vertex_with_max_n_pmts, n_time_bins*sizeof(unsigned int)));


   if( correct_mode == 10 ){

     check_cudamalloc_unsigned_int(n_time_bins);

     checkCudaErrors(cudaMalloc((void **)&device_max_number_of_pmts_in_cone_in_time_bin, n_time_bins*sizeof(unsigned int)));

   }


   return;


 }


 void make_table_of_tofs(){


   printf(" [2] --- fill times_of_flight \n");

   host_times_of_flight = (time_of_flight_t*)malloc(n_test_vertices*n_PMTs * sizeof(time_of_flight_t));

   printf(" [2] speed_light_water %f \n", speed_light_water);

   if( correct_mode == 8 && return_direction ){

     host_PMTx = (float*)malloc(n_PMTs * sizeof(double));

     host_PMTy = (float*)malloc(n_PMTs * sizeof(double));

     host_PMTz = (float*)malloc(n_PMTs * sizeof(double));

   }

   if( correct_mode == 10 ){

     host_light_dx = (float*)malloc(n_test_vertices*n_PMTs * sizeof(double));

     host_light_dy = (float*)malloc(n_test_vertices*n_PMTs * sizeof(double));

     host_light_dz = (float*)malloc(n_test_vertices*n_PMTs * sizeof(double));

     host_light_dr = (float*)malloc(n_test_vertices*n_PMTs * sizeof(double));

   }

   unsigned int distance_index;

   time_offset = 0.;

   for(unsigned int ip=0; ip<n_PMTs; ip++){

     if( correct_mode == 8 && return_direction ){

       host_PMTx[ip] = PMT_x[ip];

       host_PMTy[ip] = PMT_y[ip];

       host_PMTz[ip] = PMT_z[ip];

     }

     for(unsigned int iv=0; iv<n_test_vertices; iv++){

       distance_index = get_distance_index(ip + 1, n_PMTs*iv);

       host_times_of_flight[distance_index] = sqrt(pow(vertex_x[iv] - PMT_x[ip],2) + pow(vertex_y[iv] - PMT_y[ip],2) + pow(vertex_z[iv] - PMT_z[ip],2))/speed_light_water;

       if( correct_mode == 10 ){

     host_light_dx[distance_index] = PMT_x[ip] - vertex_x[iv];

     host_light_dy[distance_index] = PMT_y[ip] - vertex_y[iv];

     host_light_dz[distance_index] = PMT_z[ip] - vertex_z[iv];

     host_light_dr[distance_index] = sqrt(pow(host_light_dx[distance_index],2) + pow(host_light_dy[distance_index],2) + pow(host_light_dz[distance_index],2));

       }

       if( host_times_of_flight[distance_index] > time_offset )

     time_offset = host_times_of_flight[distance_index];


     }

   }

   //print_times_of_flight();


   return;

 }


 void make_table_of_directions(){


   printf(" [2] --- fill directions \n");

   printf(" [2] cerenkov_angle_water %f \n", cerenkov_angle_water);

   host_directions_for_vertex_and_pmt = (bool*)malloc(n_test_vertices*n_PMTs*n_direction_bins * sizeof(bool));

   float dx, dy, dz, dr, phi, cos_theta, sin_theta;

   float phi2, cos_theta2, angle;

   unsigned int dir_index_at_angles;

   unsigned int dir_index_at_pmt;

   for(unsigned int ip=0; ip<n_PMTs; ip++){

     for(unsigned int iv=0; iv<n_test_vertices; iv++){

       dx = PMT_x[ip] - vertex_x[iv];

       dy = PMT_y[ip] - vertex_y[iv];

       dz = PMT_z[ip] - vertex_z[iv];

       dr = sqrt(pow(dx,2) + pow(dy,2) + pow(dz,2));

       phi = atan2(dy,dx);

       // light direction

       cos_theta = dz/dr;

       sin_theta = sqrt(1. - pow(cos_theta,2));

       // particle direction

       for(unsigned int itheta = 0; itheta < n_direction_bins_theta; itheta++){

     cos_theta2 = -1. + 2.*itheta/(n_direction_bins_theta - 1);

     for(unsigned int iphi = 0; iphi < n_direction_bins_phi; iphi++){

       phi2 = 0. + twopi*iphi/n_direction_bins_phi;


       if( (itheta == 0 || itheta + 1 == n_direction_bins_theta ) && iphi != 0 ) break;


       // angle between light direction and particle direction

       angle = acos( sin_theta*sqrt(1 - pow(cos_theta2,2)) * cos(phi - phi2) + cos_theta*cos_theta2 );


       dir_index_at_angles = get_direction_index_at_angles(iphi, itheta);

       dir_index_at_pmt = get_direction_index_at_pmt(ip, iv, dir_index_at_angles);


       //printf(" [2] phi %f ctheta %f phi' %f ctheta' %f angle %f dir_index_at_angles %d dir_index_at_pmt %d \n",

       //     phi, cos_theta, phi2, cos_theta2, angle, dir_index_at_angles, dir_index_at_pmt);


       host_directions_for_vertex_and_pmt[dir_index_at_pmt]

         = (bool)(fabs(angle - cerenkov_angle_water) < twopi/(2.*n_direction_bins_phi));

     }

       }

     }

   }

   //print_directions();


   return;

 }


 void fill_tofs_memory_on_device(){


   printf(" [2] --- copy tofs from host to device \n");

   checkCudaErrors(cudaMemcpy(device_times_of_flight,

                  host_times_of_flight,

                  n_test_vertices*n_PMTs*sizeof(time_of_flight_t),

                  cudaMemcpyHostToDevice));

   if( correct_mode == 8 && return_direction ){

     checkCudaErrors(cudaMemcpy(device_PMTx,host_PMTx,n_PMTs*sizeof(float),cudaMemcpyHostToDevice));

     checkCudaErrors(cudaMemcpy(device_PMTy,host_PMTy,n_PMTs*sizeof(float),cudaMemcpyHostToDevice));

     checkCudaErrors(cudaMemcpy(device_PMTz,host_PMTz,n_PMTs*sizeof(float),cudaMemcpyHostToDevice));

   }

   if( correct_mode == 10 ){

     checkCudaErrors(cudaMemcpy(device_light_dx,host_light_dx,n_test_vertices*n_PMTs*sizeof(float),cudaMemcpyHostToDevice));

     checkCudaErrors(cudaMemcpy(device_light_dy,host_light_dy,n_test_vertices*n_PMTs*sizeof(float),cudaMemcpyHostToDevice));

     checkCudaErrors(cudaMemcpy(device_light_dz,host_light_dz,n_test_vertices*n_PMTs*sizeof(float),cudaMemcpyHostToDevice));

     checkCudaErrors(cudaMemcpy(device_light_dr,host_light_dr,n_test_vertices*n_PMTs*sizeof(float),cudaMemcpyHostToDevice));

   }

   checkCudaErrors( cudaMemcpyToSymbol(constant_time_step_size, &time_step_size, sizeof(time_step_size)) );

   checkCudaErrors( cudaMemcpyToSymbol(constant_n_test_vertices, &n_test_vertices, sizeof(n_test_vertices)) );

   checkCudaErrors( cudaMemcpyToSymbol(constant_n_water_like_test_vertices, &n_water_like_test_vertices, sizeof(n_water_like_test_vertices)) );

   checkCudaErrors( cudaMemcpyToSymbol(constant_n_PMTs, &n_PMTs, sizeof(n_PMTs)) );


   // Bind the array to the texture

   checkCudaErrors(cudaBindTexture(0,tex_times_of_flight, device_times_of_flight, n_test_vertices*n_PMTs*sizeof(time_of_flight_t)));

   if( correct_mode == 8 && return_direction ){

     checkCudaErrors(cudaBindTexture(0,tex_PMTx, device_PMTx, n_PMTs*sizeof(float)));

     checkCudaErrors(cudaBindTexture(0,tex_PMTy, device_PMTy, n_PMTs*sizeof(float)));

     checkCudaErrors(cudaBindTexture(0,tex_PMTz, device_PMTz, n_PMTs*sizeof(float)));

   }

   if( correct_mode == 10 ){

     checkCudaErrors(cudaBindTexture(0,tex_light_dx, device_light_dx, n_test_vertices*n_PMTs*sizeof(float)));

     checkCudaErrors(cudaBindTexture(0,tex_light_dy, device_light_dy, n_test_vertices*n_PMTs*sizeof(float)));

     checkCudaErrors(cudaBindTexture(0,tex_light_dz, device_light_dz, n_test_vertices*n_PMTs*sizeof(float)));

     checkCudaErrors(cudaBindTexture(0,tex_light_dr, device_light_dr, n_test_vertices*n_PMTs*sizeof(float)));

     checkCudaErrors( cudaMemcpyToSymbol(constant_cerenkov_costheta, &cerenkov_costheta, sizeof(float)) );

     checkCudaErrors( cudaMemcpyToSymbol(constant_costheta_cone_cut, &costheta_cone_cut, sizeof(float)) );

     checkCudaErrors( cudaMemcpyToSymbol(constant_select_based_on_cone, &select_based_on_cone, sizeof(bool)) );

     checkCudaErrors( cudaMemcpyToSymbol(constant_return_direction, &return_direction, sizeof(bool)) );

   }


   return;

 }


 void fill_directions_memory_on_device(){


   printf(" [2] --- copy directions from host to device \n");

   checkCudaErrors(cudaMemcpy(device_directions_for_vertex_and_pmt,

                  host_directions_for_vertex_and_pmt,

                  n_test_vertices*n_PMTs*n_direction_bins*sizeof(bool),

                  cudaMemcpyHostToDevice));

   checkCudaErrors( cudaMemcpyToSymbol(constant_n_direction_bins_theta, &n_direction_bins_theta, sizeof(n_direction_bins_theta)) );

   checkCudaErrors( cudaMemcpyToSymbol(constant_n_direction_bins_phi, &n_direction_bins_phi, sizeof(n_direction_bins_phi)) );

   checkCudaErrors( cudaMemcpyToSymbol(constant_n_direction_bins, &n_direction_bins, sizeof(n_direction_bins)) );


   // Bind the array to the texture

   //  checkCudaErrors(cudaBindTexture(0,tex_directions_for_vertex_and_pmt, device_directions_for_vertex_and_pmt, n_test_vertices*n_PMTs*n_direction_bins_theta*n_direction_bins_theta*sizeof(bool)));


   return;

 }


 void fill_tofs_memory_on_device_nhits(){


   printf(" [2] --- copy tofs from host to device \n");

   checkCudaErrors( cudaMemcpyToSymbol(constant_time_step_size, &time_step_size, sizeof(time_step_size)) );

   checkCudaErrors( cudaMemcpyToSymbol(constant_n_PMTs, &n_PMTs, sizeof(n_PMTs)) );


   return;

 }


 void fill_correct_memory_on_device(){


   printf(" [2] --- copy from host to device \n");

   checkCudaErrors(cudaMemcpy(device_ids,

                  host_ids,

                  n_hits*sizeof(unsigned int),

                  cudaMemcpyHostToDevice));

   checkCudaErrors(cudaMemcpy(device_times,

                  host_times,

                  n_hits*sizeof(unsigned int),

                  cudaMemcpyHostToDevice));

   checkCudaErrors( cudaMemcpyToSymbol(constant_time_offset, &time_offset, sizeof(time_offset)) );


   checkCudaErrors(cudaBindTexture(0,tex_ids, device_ids, n_hits*sizeof(unsigned int)));

   checkCudaErrors(cudaBindTexture(0,tex_times, device_times, n_hits*sizeof(unsigned int)));


   return;

 }


 unsigned int read_number_of_pmts(){


   FILE *f=fopen(pmts_file.c_str(), "r");

   if (f == NULL){

     printf(" [2] cannot read pmts file %s \n", pmts_file.c_str());

     fclose(f);

     return 0;

   }


   unsigned int n_pmts = 0;


   for (char c = getc(f); c != EOF; c = getc(f))

     if (c == '\n')

       n_pmts ++;


   fclose(f);

   return n_pmts;


 }


 bool read_pmts(){


   FILE *f=fopen(pmts_file.c_str(), "r");


   double x, y, z;

   unsigned int id;

   for( unsigned int i=0; i<n_PMTs; i++){

     if( fscanf(f, "%d %lf %lf %lf", &id, &x, &y, &z) != 4 ){

       printf(" [2] problem scanning pmt %d \n", i);

       fclose(f);

       return false;

     }

     PMT_x[id-1] = x;

     PMT_y[id-1] = y;

     PMT_z[id-1] = z;

   }


   fclose(f);


   return true;


 }


 void coalesce_triggers(){


   trigger_pair_vertex_time.clear();

   trigger_npmts_in_time_bin.clear();

   if( correct_mode == 8 && return_direction ){

     trigger_meanx_per_time_bin.clear();

     trigger_meany_per_time_bin.clear();

     trigger_meanz_per_time_bin.clear();

   }

   if( correct_mode == 10 ){

     trigger_npmts_in_cone_in_time_bin.clear();

   }


   unsigned int vertex_index, time_upper, number_of_pmts_in_time_bin, number_of_pmts_in_cone_in_time_bin;

   unsigned int max_pmt=0,max_vertex_index=0,max_time=0,max_pmt_in_cone=0;

   bool first_trigger, last_trigger, coalesce_triggers;

   unsigned int trigger_index;

   float local_trigger_meanx_per_time_bin, local_trigger_meany_per_time_bin, local_trigger_meanz_per_time_bin;

   float max_trigger_meanx_per_time_bin = 0, max_trigger_meany_per_time_bin = 0, max_trigger_meanz_per_time_bin = 0;

   for(std::vector<std::pair<unsigned int,unsigned int> >::const_iterator itrigger=candidate_trigger_pair_vertex_time.begin(); itrigger != candidate_trigger_pair_vertex_time.end(); ++itrigger){


     vertex_index =      itrigger->first;

     time_upper = itrigger->second;

     trigger_index = itrigger - candidate_trigger_pair_vertex_time.begin();

     number_of_pmts_in_time_bin = candidate_trigger_npmts_in_time_bin.at(trigger_index);

     if( correct_mode == 8 && return_direction ){

       local_trigger_meanx_per_time_bin = candidate_trigger_meanx_per_time_bin.at(trigger_index);

       local_trigger_meany_per_time_bin = candidate_trigger_meany_per_time_bin.at(trigger_index);

       local_trigger_meanz_per_time_bin = candidate_trigger_meanz_per_time_bin.at(trigger_index);

     }

     if( correct_mode == 10 ){

       number_of_pmts_in_cone_in_time_bin = candidate_trigger_npmts_in_cone_in_time_bin.at(trigger_index);

     }


     first_trigger = (trigger_index == 0);

     last_trigger = (trigger_index == candidate_trigger_pair_vertex_time.size()-1);


     if( first_trigger ){

       max_pmt = number_of_pmts_in_time_bin;

       max_vertex_index = vertex_index;

       max_time = time_upper;

       if( correct_mode == 8 && return_direction ){

     max_trigger_meanx_per_time_bin = local_trigger_meanx_per_time_bin;

     max_trigger_meany_per_time_bin = local_trigger_meany_per_time_bin;

     max_trigger_meanz_per_time_bin = local_trigger_meanz_per_time_bin;

       }

       if( correct_mode == 10 ){

     max_pmt_in_cone = number_of_pmts_in_cone_in_time_bin;

       }

     }

     else{

       coalesce_triggers = (std::abs((int)(max_time - time_upper)) < coalesce_time/time_step_size);


       if( coalesce_triggers ){

     if( number_of_pmts_in_time_bin >= max_pmt) {

       max_pmt = number_of_pmts_in_time_bin;

       max_vertex_index = vertex_index;

       max_time = time_upper;

       if( correct_mode == 8 && return_direction ){

         max_trigger_meanx_per_time_bin = local_trigger_meanx_per_time_bin;

         max_trigger_meany_per_time_bin = local_trigger_meany_per_time_bin;

         max_trigger_meanz_per_time_bin = local_trigger_meanz_per_time_bin;

       }

       if( correct_mode == 10 ){

         max_pmt_in_cone = number_of_pmts_in_cone_in_time_bin;

       }

     }

       }else{

     trigger_pair_vertex_time.push_back(std::make_pair(max_vertex_index,max_time));

     trigger_npmts_in_time_bin.push_back(max_pmt);

     max_pmt = number_of_pmts_in_time_bin;

     max_vertex_index = vertex_index;

     max_time = time_upper;

     if( correct_mode == 8 && return_direction ){

       trigger_meanx_per_time_bin.push_back(max_trigger_meanx_per_time_bin);

       trigger_meany_per_time_bin.push_back(max_trigger_meany_per_time_bin);

       trigger_meanz_per_time_bin.push_back(max_trigger_meanz_per_time_bin);

       max_trigger_meanx_per_time_bin = local_trigger_meanx_per_time_bin;

       max_trigger_meany_per_time_bin = local_trigger_meany_per_time_bin;

       max_trigger_meanz_per_time_bin = local_trigger_meanz_per_time_bin;

     }

     if( correct_mode == 10 ){

       trigger_npmts_in_cone_in_time_bin.push_back(max_pmt_in_cone);

       max_pmt_in_cone = number_of_pmts_in_cone_in_time_bin;

     }

       }

     }


     if(last_trigger){

       trigger_pair_vertex_time.push_back(std::make_pair(max_vertex_index,max_time));

       trigger_npmts_in_time_bin.push_back(max_pmt);

       if( correct_mode == 8 && return_direction ){

     trigger_meanx_per_time_bin.push_back(max_trigger_meanx_per_time_bin);

     trigger_meany_per_time_bin.push_back(max_trigger_meany_per_time_bin);

     trigger_meanz_per_time_bin.push_back(max_trigger_meanz_per_time_bin);

       }

       if( correct_mode == 10 ){

     trigger_npmts_in_cone_in_time_bin.push_back(max_pmt_in_cone);

       }

     }


   }


   for(std::vector<std::pair<unsigned int,unsigned int> >::const_iterator itrigger=trigger_pair_vertex_time.begin(); itrigger != trigger_pair_vertex_time.end(); ++itrigger)

     printf(" [2] coalesced vertex index %d trigger timebin %d \n", itrigger->first, itrigger->second);


   return;


 }


 void separate_triggers_into_gates(){


   final_trigger_pair_vertex_time.clear();

   unsigned int trigger_index;


   unsigned int time_start=0;

   for(std::vector<std::pair<unsigned int,unsigned int> >::const_iterator itrigger=trigger_pair_vertex_time.begin(); itrigger != trigger_pair_vertex_time.end(); ++itrigger){

     //once a trigger is found, we must jump in the future before searching for the next

     if(itrigger->second > time_start) {

       unsigned int triggertime = itrigger->second*time_step_size - time_offset;

       final_trigger_pair_vertex_time.push_back(std::make_pair(itrigger->first,triggertime));

       time_start = itrigger->second + trigger_gate_up/time_step_size;

       trigger_index = itrigger - trigger_pair_vertex_time.begin();

       output_trigger_information.clear();

       output_trigger_information.push_back(vertex_x[itrigger->first]);

       output_trigger_information.push_back(vertex_y[itrigger->first]);

       output_trigger_information.push_back(vertex_z[itrigger->first]);

       output_trigger_information.push_back(trigger_npmts_in_time_bin.at(trigger_index));

       output_trigger_information.push_back(triggertime);


       printf(" [2] triggertime: %d, vertex index: %d, npmts: %d, x: %f, y: %f, z: %f \n", triggertime, itrigger->first, trigger_npmts_in_time_bin.at(trigger_index), vertex_x[itrigger->first], vertex_y[itrigger->first], vertex_z[itrigger->first]);


       /* if( output_txt ){ */

       /*    FILE *of=fopen(output_file.c_str(), "w"); */


       /*    unsigned int distance_index; */

       /*    double tof; */

       /*    double corrected_time; */


       /*    for(unsigned int i=0; i<n_hits; i++){ */


       /*      distance_index = get_distance_index(host_ids[i], n_PMTs*(itrigger->first)); */

       /*      tof = host_times_of_flight[distance_index]; */


       /*      corrected_time = host_times[i]-tof; */


       /*      //fprintf(of, " %d %d %f \n", host_ids[i], host_times[i], corrected_time); */

       /*      fprintf(of, " %d %f \n", host_ids[i], corrected_time); */

       /*    } */


       /*    fclose(of); */

       /* } */


     }

   }


   return;

 }


 void separate_triggers_into_gates(std::vector<int> * trigger_ns, std::vector<int> * trigger_ts, std::vector<double> * trigger_vtx_xs, std::vector<double> * trigger_vtx_ys, std::vector<double> * trigger_vtx_zs, std::vector<double> * trigger_dir_xs, std::vector<double> * trigger_dir_ys, std::vector<double> * trigger_dir_zs){


   final_trigger_pair_vertex_time.clear();

   unsigned int trigger_index;


   unsigned int time_start=0;

   float sum_x, mean_x, sum_y, mean_y, sum_z, mean_z;

   float dir_x, dir_y, dir_z;

   float dir;

   for(std::vector<std::pair<unsigned int,unsigned int> >::const_iterator itrigger=trigger_pair_vertex_time.begin(); itrigger != trigger_pair_vertex_time.end(); ++itrigger){

     //once a trigger is found, we must jump in the future before searching for the next

     if(itrigger->second > time_start) {

       unsigned int triggertime = itrigger->second*time_step_size - time_offset;

       final_trigger_pair_vertex_time.push_back(std::make_pair(itrigger->first,triggertime));

       time_start = itrigger->second + trigger_gate_up/time_step_size;

       trigger_index = itrigger - trigger_pair_vertex_time.begin();

       output_trigger_information.clear();

       output_trigger_information.push_back(vertex_x[itrigger->first]);

       output_trigger_information.push_back(vertex_y[itrigger->first]);

       output_trigger_information.push_back(vertex_z[itrigger->first]);

       output_trigger_information.push_back(trigger_npmts_in_time_bin.at(trigger_index));

       output_trigger_information.push_back(triggertime);


       trigger_ns->push_back(trigger_npmts_in_time_bin.at(trigger_index));

       trigger_ts->push_back(triggertime);

       if( return_vertex ){

     trigger_vtx_xs->push_back(vertex_x[itrigger->first]);

     trigger_vtx_ys->push_back(vertex_y[itrigger->first]);

     trigger_vtx_zs->push_back(vertex_z[itrigger->first]);

       }

       if( correct_mode == 8 && return_direction ){

     sum_x = trigger_meanx_per_time_bin.at(trigger_index);

     mean_x = sum_x/trigger_npmts_in_time_bin.at(trigger_index);

     dir_x = mean_x - vertex_x[itrigger->first];

     sum_y = trigger_meany_per_time_bin.at(trigger_index);

     mean_y = sum_y/trigger_npmts_in_time_bin.at(trigger_index);

     dir_y = mean_y - vertex_y[itrigger->first];

     sum_z = trigger_meanz_per_time_bin.at(trigger_index);

     mean_z = sum_z/trigger_npmts_in_time_bin.at(trigger_index);

     dir_z = mean_z - vertex_z[itrigger->first];

     dir = sqrt(pow(dir_x,2) + pow(dir_y,2) + pow(dir_z,2));

     trigger_dir_xs->push_back(dir_x/dir);

     trigger_dir_ys->push_back(dir_y/dir);

     trigger_dir_zs->push_back(dir_z/dir);

       }


       printf(" [2] triggertime: %d, vertex index: %d, npmts: %d, x: %f, y: %f, z: %f \n", triggertime, itrigger->first, trigger_npmts_in_time_bin.at(trigger_index), vertex_x[itrigger->first], vertex_y[itrigger->first], vertex_z[itrigger->first]);


       /* if( output_txt ){ */

       /*    FILE *of=fopen(output_file.c_str(), "w"); */


       /*    unsigned int distance_index; */

       /*    double tof; */

       /*    double corrected_time; */


       /*    for(unsigned int i=0; i<n_hits; i++){ */


       /*      distance_index = get_distance_index(host_ids[i], n_PMTs*(itrigger->first)); */

       /*      tof = host_times_of_flight[distance_index]; */


       /*      corrected_time = host_times[i]-tof; */


       /*      //fprintf(of, " %d %d %f \n", host_ids[i], host_times[i], corrected_time); */

       /*      fprintf(of, " %d %f \n", host_ids[i], corrected_time); */

       /*    } */


       /*    fclose(of); */

       /* } */


     }

   }


   return;

 }


 float timedifference_msec(struct timeval t0, struct timeval t1){

     return (t1.tv_sec - t0.tv_sec) * 1000.0f + (t1.tv_usec - t0.tv_usec) / 1000.0f;

 }


 void start_c_clock(){

   gettimeofday(&t0,0);


 }

 double stop_c_clock(){

   gettimeofday(&t1,0);

   return timedifference_msec(t0, t1);

 }

 void start_cuda_clock(){

   cudaEventRecord(start);


 }

 double stop_cuda_clock(){

   float milli;

   cudaEventRecord(stop);

   cudaEventSynchronize(stop);

   cudaEventElapsedTime(&milli, start, stop);

   return milli;

 }

 void start_total_cuda_clock(){

   cudaEventRecord(total_start);


 }

 double stop_total_cuda_clock(){

   float milli;

   cudaEventRecord(total_stop);

   cudaEventSynchronize(total_stop);

   cudaEventElapsedTime(&milli, total_start, total_stop);

   return milli;

 }


 unsigned int get_distance_index(unsigned int pmt_id, unsigned int vertex_block){

   // block = (npmts) * (vertex index)


   return pmt_id - 1 + vertex_block;


 }


 unsigned int get_time_index(unsigned int hit_index, unsigned int vertex_block){

   // block = (n time bins) * (vertex index)


   return hit_index + vertex_block;


 }


  unsigned int get_direction_index_at_angles(unsigned int iphi, unsigned int itheta){


    if( itheta == 0 ) return 0;

    if( itheta + 1 == n_direction_bins_theta ) return n_direction_bins - 1;


    return 1 + (itheta - 1) * n_direction_bins_phi + iphi;


 }


 unsigned int get_direction_index_at_pmt(unsigned int pmt_id, unsigned int vertex_index, unsigned int direction_index){


   //                                                     pmt id 1                        ...        pmt id p

   // [                      vertex 1                              vertex 2 ... vertex m] ... [vertex 1 ... vertex m]

   // [(dir 1 ... dir n) (dir 1 ... dir n) ... (dir 1 ... dir n)] ...


   return n_direction_bins * (pmt_id * n_test_vertices  + vertex_index) + direction_index ;


 }


 unsigned int get_direction_index_at_time(unsigned int time_bin, unsigned int vertex_index, unsigned int direction_index){


   //                                                     time 1                        ...        time p

   // [                      vertex 1                              vertex 2 ... vertex m] ... [vertex 1 ... vertex m]

   // [(dir 1 ... dir n) (dir 1 ... dir n) ... (dir 1 ... dir n)] ...


   return n_direction_bins* (time_bin * n_test_vertices  + vertex_index ) + direction_index ;


 }


 __device__ unsigned int device_get_distance_index(unsigned int pmt_id, unsigned int vertex_block){

   // block = (npmts) * (vertex index)


   return pmt_id - 1 + vertex_block;


 }


 __device__ unsigned int device_get_time_index(unsigned int hit_index, unsigned int vertex_block){

   // block = (n time bins) * (vertex index)


   return hit_index + vertex_block;


 }


 __device__ unsigned int device_get_direction_index_at_pmt(unsigned int pmt_id, unsigned int vertex_index, unsigned int direction_index){


   //                                                     pmt id 1                        ...        pmt id p

   // [                      vertex 1                              vertex 2 ... vertex m] ... [vertex 1 ... vertex m]

   // [(dir 1 ... dir n) (dir 1 ... dir n) ... (dir 1 ... dir n)] ...


   return constant_n_direction_bins * (pmt_id * constant_n_test_vertices  + vertex_index) + direction_index ;


 }


 __device__ unsigned int device_get_direction_index_at_angles(unsigned int iphi, unsigned int itheta){


    if( itheta == 0 ) return 0;

    if( itheta + 1 == constant_n_direction_bins_theta ) return constant_n_direction_bins - 1;


    return 1 + (itheta - 1) * constant_n_direction_bins_phi + iphi;


 }


 __device__ unsigned int device_get_direction_index_at_time(unsigned int time_bin, unsigned int vertex_index, unsigned int direction_index){


   //                                                     time 1                        ...        time p

   // [                      vertex 1                              vertex 2 ... vertex m] ... [vertex 1 ... vertex m]

   // [(dir 1 ... dir n) (dir 1 ... dir n) ... (dir 1 ... dir n)] ...


   return constant_n_direction_bins* (time_bin * constant_n_test_vertices  + vertex_index ) + direction_index ;


 }


 // Print device properties

 void print_gpu_properties(){


   int devCount;

   cudaGetDeviceCount(&devCount);

   printf(" [2] CUDA Device Query...\n");

   printf(" [2] There are %d CUDA devices.\n", devCount);

   cudaDeviceProp devProp;

   for (int i = 0; i < devCount; ++i){

     // Get device properties

     printf(" [2] CUDA Device #%d\n", i);

     cudaGetDeviceProperties(&devProp, i);

     printf("Major revision number:          %d\n",  devProp.major);

     printf("Minor revision number:          %d\n",  devProp.minor);

     printf("Name:                           %s\n",  devProp.name);

     printf("Total global memory:            %lu\n",  devProp.totalGlobalMem);

     printf("Total shared memory per block:  %lu\n",  devProp.sharedMemPerBlock);

     printf("Total registers per block:      %d\n",  devProp.regsPerBlock);

     printf("Warp size:                      %d\n",  devProp.warpSize);

     printf("Maximum memory pitch:           %lu\n",  devProp.memPitch);

     max_n_threads_per_block = devProp.maxThreadsPerBlock;

     printf("Maximum threads per block:      %d\n",  max_n_threads_per_block);

     for (int i = 0; i < 3; ++i)

       printf("Maximum dimension %d of block:   %d\n", i, devProp.maxThreadsDim[i]);

     max_n_blocks = devProp.maxGridSize[0];

     for (int i = 0; i < 3; ++i)

       printf("Maximum dimension %d of grid:    %d\n", i, devProp.maxGridSize[i]);

     printf("Clock rate:                     %d\n",  devProp.clockRate);

     printf("Total constant memory:          %lu\n",  devProp.totalConstMem);

     printf("Texture alignment:              %lu\n",  devProp.textureAlignment);

     printf("Concurrent copy and execution:  %s\n",  (devProp.deviceOverlap ? "Yes" : "No"));

     printf("Number of multiprocessors:      %d\n",  devProp.multiProcessorCount);

     printf("Kernel execution timeout:       %s\n",  (devProp.kernelExecTimeoutEnabled ?"Yes" : "No"));

     printf("Memory Clock Rate (KHz):        %d\n", devProp.memoryClockRate);

     printf("Memory Bus Width (bits):        %d\n", devProp.memoryBusWidth);

     printf("Peak Memory Bandwidth (GB/s):   %f\n", 2.0*devProp.memoryClockRate*(devProp.memoryBusWidth/8)/1.0e6);

     printf("Concurrent kernels:             %d\n",  devProp.concurrentKernels);

   }

   size_t available_memory, total_memory;

   cudaMemGetInfo(&available_memory, &total_memory);

   size_t stack_memory;

   cudaDeviceGetLimit(&stack_memory, cudaLimitStackSize);

   size_t fifo_memory;

   cudaDeviceGetLimit(&fifo_memory, cudaLimitPrintfFifoSize);

   size_t heap_memory;

   cudaDeviceGetLimit(&heap_memory, cudaLimitMallocHeapSize);

   printf(" [2] memgetinfo: available_memory %f MB, total_memory %f MB, stack_memory %f MB, fifo_memory %f MB, heap_memory %f MB \n", (double)available_memory/1.e6, (double)total_memory/1.e6, (double)stack_memory/1.e6, (double)fifo_memory/1.e6, (double)heap_memory/1.e6);


   return;

 }


 __global__ void kernel_find_vertex_with_max_npmts_in_timebin(histogram_t * np, histogram_t * mnp, unsigned int * vmnp){


   // get unique id for each thread in each block == time bin

   unsigned int time_bin_index = threadIdx.x + blockDim.x*blockIdx.x;


   // skip if thread is assigned to nonexistent time bin

   if( time_bin_index >= constant_n_time_bins ) return;


   unsigned int number_of_pmts_in_time_bin = 0;

   unsigned int time_index;

   histogram_t max_number_of_pmts_in_time_bin=0;

   unsigned int vertex_with_max_n_pmts = 0;


   for(unsigned int iv=0;iv<constant_n_test_vertices;iv++) { // loop over test vertices

     // sum the number of hit PMTs in this time window and the next


     time_index = time_bin_index + constant_n_time_bins*iv;

     if( time_index >= constant_n_time_bins*constant_n_test_vertices - 1 ) continue;

     number_of_pmts_in_time_bin = np[time_index] + np[time_index+1];

     if( number_of_pmts_in_time_bin >= max_number_of_pmts_in_time_bin ){

       max_number_of_pmts_in_time_bin = number_of_pmts_in_time_bin;

       vertex_with_max_n_pmts = iv;

     }

   }


   mnp[time_bin_index] = max_number_of_pmts_in_time_bin;

   vmnp[time_bin_index] = vertex_with_max_n_pmts;


   return;


 }


 __global__ void kernel_find_vertex_with_max_npmts_in_timebin(histogram_t * np, histogram_t * mnp, unsigned int * vmnp, float * meanxin, float * meanyin, float * meanzin, float * meanxout, float * meanyout, float * meanzout){


   // get unique id for each thread in each block == time bin

   unsigned int time_bin_index = threadIdx.x + blockDim.x*blockIdx.x;


   // skip if thread is assigned to nonexistent time bin

   if( time_bin_index >= constant_n_time_bins ) return;


   unsigned int number_of_pmts_in_time_bin = 0;

   unsigned int time_index;

   histogram_t max_number_of_pmts_in_time_bin=0;

   unsigned int vertex_with_max_n_pmts = 0;

   float meanx_for_vertex_with_max_n_pmts = 0., meany_for_vertex_with_max_n_pmts = 0., meanz_for_vertex_with_max_n_pmts = 0.;


   for(unsigned int iv=0;iv<constant_n_test_vertices;iv++) { // loop over test vertices

     // sum the number of hit PMTs in this time window and the next


     time_index = time_bin_index + constant_n_time_bins*iv;

     if( time_index >= constant_n_time_bins*constant_n_test_vertices - 1 ) continue;

     number_of_pmts_in_time_bin = np[time_index] + np[time_index+1];

     if( number_of_pmts_in_time_bin >= max_number_of_pmts_in_time_bin ){

       max_number_of_pmts_in_time_bin = number_of_pmts_in_time_bin;

       meanx_for_vertex_with_max_n_pmts = meanxin[time_index];

       meany_for_vertex_with_max_n_pmts = meanyin[time_index];

       meanz_for_vertex_with_max_n_pmts = meanzin[time_index];

       vertex_with_max_n_pmts = iv;

     }

   }


   mnp[time_bin_index] = max_number_of_pmts_in_time_bin;

   vmnp[time_bin_index] = vertex_with_max_n_pmts;

   meanxout[time_bin_index] = meanx_for_vertex_with_max_n_pmts;

   meanyout[time_bin_index] = meany_for_vertex_with_max_n_pmts;

   meanzout[time_bin_index] = meanz_for_vertex_with_max_n_pmts;


   return;


 }


 __global__ void kernel_find_vertex_with_max_npmts_and_center_of_mass_in_timebin(histogram_t * np, histogram_t * mnp, unsigned int * vmnp, unsigned int *nc, unsigned int *mnc){


   // get unique id for each thread in each block == time bin

   unsigned int time_bin_index = threadIdx.x + blockDim.x*blockIdx.x;


   // skip if thread is assigned to nonexistent time bin

   if( time_bin_index >= constant_n_time_bins ) return;


   unsigned int number_of_pmts_in_time_bin = 0;

   unsigned int number_of_pmts_in_cone_in_time_bin = 0;

   unsigned int time_index;

   unsigned int max_number_of_pmts_in_time_bin=0;

   unsigned int max_number_of_pmts_in_cone_in_time_bin=0;

   unsigned int vertex_with_max_n_pmts = 0;


   for(unsigned int iv=0;iv<constant_n_test_vertices;iv++) { // loop over test vertices

     // sum the number of hit PMTs in this time window and the next


     time_index = time_bin_index + constant_n_time_bins*iv;

     if( time_index >= constant_n_time_bins*constant_n_test_vertices - 1 ) continue;

     number_of_pmts_in_time_bin = np[time_index] + np[time_index+1];

     number_of_pmts_in_cone_in_time_bin = nc[time_index] + nc[time_index+1];

     if( !constant_select_based_on_cone ){

     // use this part to select events based on test vertices

       if( number_of_pmts_in_time_bin >= max_number_of_pmts_in_time_bin ){

     max_number_of_pmts_in_time_bin = number_of_pmts_in_time_bin;

     vertex_with_max_n_pmts = iv;

     max_number_of_pmts_in_cone_in_time_bin = number_of_pmts_in_cone_in_time_bin;

       }

     }else{

       // use this part to select events based on test vertices and cone

       if( number_of_pmts_in_cone_in_time_bin >= max_number_of_pmts_in_cone_in_time_bin ){

     max_number_of_pmts_in_cone_in_time_bin = number_of_pmts_in_cone_in_time_bin;

     vertex_with_max_n_pmts = iv;

     max_number_of_pmts_in_time_bin = number_of_pmts_in_time_bin;

       }

     }

   }


   mnp[time_bin_index] = max_number_of_pmts_in_time_bin;

   vmnp[time_bin_index] = vertex_with_max_n_pmts;

   mnc[time_bin_index] = max_number_of_pmts_in_cone_in_time_bin;


   return;


 }


 __global__ void kernel_find_vertex_with_max_npmts_in_timebin_and_directionbin(unsigned int * np, histogram_t * mnp, unsigned int * vmnp){


   // get unique id for each thread in each block == time bin

   unsigned int time_bin_index = threadIdx.x + blockDim.x*blockIdx.x;


   // skip if thread is assigned to nonexistent time bin

   if( time_bin_index >= constant_n_time_bins - 1 ) return;


   unsigned int number_of_pmts_in_time_bin = 0;

   unsigned int max_number_of_pmts_in_time_bin=0;

   unsigned int vertex_with_max_n_pmts = 0;

   unsigned int dir_index_1, dir_index_2;


   for(unsigned int iv=0;iv<constant_n_test_vertices;iv++) { // loop over test vertices

     // sum the number of hit PMTs in this time window


     for(unsigned int idir = 0; idir < constant_n_direction_bins; idir++){


       dir_index_1 = device_get_direction_index_at_time(time_bin_index, iv, idir);

       dir_index_2 = device_get_direction_index_at_time(time_bin_index + 1, iv, idir);


       number_of_pmts_in_time_bin = np[dir_index_1]

     + np[dir_index_2];

       if( number_of_pmts_in_time_bin > max_number_of_pmts_in_time_bin ){

     max_number_of_pmts_in_time_bin = number_of_pmts_in_time_bin;

     vertex_with_max_n_pmts = iv;

       }

     }

   }


   mnp[time_bin_index] = max_number_of_pmts_in_time_bin;

   vmnp[time_bin_index] = vertex_with_max_n_pmts;


   return;


 }


 void free_event_memories(){


   checkCudaErrors(cudaUnbindTexture(tex_ids));

   checkCudaErrors(cudaUnbindTexture(tex_times));

   free(host_ids);

   free(host_times);

   checkCudaErrors(cudaFree(device_ids));

   checkCudaErrors(cudaFree(device_times));

   if( correct_mode == 1 ){

     checkCudaErrors(cudaFree(device_time_bin_of_hit));

   } else if( correct_mode == 2 ){

     checkCudaErrors(cudaFree(device_time_bin_of_hit));

     free(host_time_bin_of_hit);

     free(host_n_pmts_per_time_bin);

   } else if( correct_mode == 3 ){

     checkCudaErrors(cudaFree(device_time_bin_of_hit));

   } else if( correct_mode == 4 ){

     checkCudaErrors(cudaFree(device_time_bin_of_hit));

   } else if( correct_mode == 5 ){

     checkCudaErrors(cudaFree(device_time_bin_of_hit));

   } else if( correct_mode == 6 ){

     checkCudaErrors(cudaFree(device_time_bin_of_hit));

   } else if( correct_mode == 7 ){

     checkCudaErrors(cudaFree(device_time_bin_of_hit));

   }

   if( correct_mode != 9 ){

     checkCudaErrors(cudaFree(device_n_pmts_per_time_bin));

     if( correct_mode == 8 && return_direction ){

       checkCudaErrors(cudaFree(device_meanx_per_time_bin));

       checkCudaErrors(cudaFree(device_meany_per_time_bin));

       checkCudaErrors(cudaFree(device_meanz_per_time_bin));

     }

     if( correct_mode == 10 ){

       checkCudaErrors(cudaFree(device_dx_per_time_bin));

       checkCudaErrors(cudaFree(device_dy_per_time_bin));

       checkCudaErrors(cudaFree(device_dz_per_time_bin));

     }

   }else{

     checkCudaErrors(cudaFree(device_n_pmts_per_time_bin_and_direction_bin));

   }

   free(host_max_number_of_pmts_in_time_bin);

   free(host_vertex_with_max_n_pmts);

   checkCudaErrors(cudaFree(device_max_number_of_pmts_in_time_bin));

   checkCudaErrors(cudaFree(device_vertex_with_max_n_pmts));


   if( correct_mode == 8 && return_direction ){

     free(host_meanx_for_vertex_with_max_n_pmts_per_time_bin);

     free(host_meany_for_vertex_with_max_n_pmts_per_time_bin);

     free(host_meanz_for_vertex_with_max_n_pmts_per_time_bin);

     checkCudaErrors(cudaFree(device_meanx_for_vertex_with_max_n_pmts_per_time_bin));

     checkCudaErrors(cudaFree(device_meany_for_vertex_with_max_n_pmts_per_time_bin));

     checkCudaErrors(cudaFree(device_meanz_for_vertex_with_max_n_pmts_per_time_bin));

   }

   if( correct_mode == 10 ){

     free(host_max_number_of_pmts_in_cone_in_time_bin);

     checkCudaErrors(cudaFree(device_max_number_of_pmts_in_cone_in_time_bin));

     checkCudaErrors(cudaFree(device_number_of_pmts_in_cone_in_time_bin));

   }


   return;

 }


 void free_event_memories_nhits(){


   checkCudaErrors(cudaUnbindTexture(tex_ids));

   checkCudaErrors(cudaUnbindTexture(tex_times));

   free(host_ids);

   free(host_times);

   checkCudaErrors(cudaFree(device_ids));

   checkCudaErrors(cudaFree(device_times));

   checkCudaErrors(cudaFree(device_n_pmts_nhits));

   //  checkCudaErrors(cudaFree(device_time_nhits));

   free(host_n_pmts_nhits);

   //  free(host_time_nhits);


   return;

 }


 void free_global_memories(){


   //unbind texture reference to free resource

   checkCudaErrors(cudaUnbindTexture(tex_times_of_flight));

   if( correct_mode == 8 && return_direction ){

     checkCudaErrors(cudaUnbindTexture(tex_PMTx));

     checkCudaErrors(cudaUnbindTexture(tex_PMTy));

     checkCudaErrors(cudaUnbindTexture(tex_PMTz));

   }

   if( correct_mode == 10 ){

     checkCudaErrors(cudaUnbindTexture(tex_light_dx));

     checkCudaErrors(cudaUnbindTexture(tex_light_dy));

     checkCudaErrors(cudaUnbindTexture(tex_light_dz));

     checkCudaErrors(cudaUnbindTexture(tex_light_dr));

   }


   if( correct_mode == 9 ){

     //    checkCudaErrors(cudaUnbindTexture(tex_directions_for_vertex_and_pmt));

     checkCudaErrors(cudaFree(device_directions_for_vertex_and_pmt));

     free(host_directions_for_vertex_and_pmt);

   }


   free(PMT_x);

   free(PMT_y);

   free(PMT_z);

   free(vertex_x);

   free(vertex_y);

   free(vertex_z);

   checkCudaErrors(cudaFree(device_times_of_flight));

   free(host_times_of_flight);

   if( correct_mode == 8 && return_direction ){

     checkCudaErrors(cudaFree(device_PMTx));

     free(host_PMTx);

     checkCudaErrors(cudaFree(device_PMTy));

     free(host_PMTy);

     checkCudaErrors(cudaFree(device_PMTz));

     free(host_PMTz);

   }

   if( correct_mode == 10 ){

     checkCudaErrors(cudaFree(device_light_dx));

     free(host_light_dx);

     checkCudaErrors(cudaFree(device_light_dy));

     free(host_light_dy);

     checkCudaErrors(cudaFree(device_light_dz));

     free(host_light_dz);

     checkCudaErrors(cudaFree(device_light_dr));

     free(host_light_dr);

   }


   return;

 }


 void copy_candidates_from_device_to_host(){


   checkCudaErrors(cudaMemcpy(host_max_number_of_pmts_in_time_bin,

                  device_max_number_of_pmts_in_time_bin,

                  n_time_bins*sizeof(histogram_t),

                  cudaMemcpyDeviceToHost));

   checkCudaErrors(cudaMemcpy(host_vertex_with_max_n_pmts,

                  device_vertex_with_max_n_pmts,

                  n_time_bins*sizeof(unsigned int),

                  cudaMemcpyDeviceToHost));

   if( correct_mode == 8 && return_direction ){

     checkCudaErrors(cudaMemcpy(host_meanx_for_vertex_with_max_n_pmts_per_time_bin,

                    device_meanx_for_vertex_with_max_n_pmts_per_time_bin,

                    n_time_bins*sizeof(float),

                    cudaMemcpyDeviceToHost));

     checkCudaErrors(cudaMemcpy(host_meany_for_vertex_with_max_n_pmts_per_time_bin,

                    device_meany_for_vertex_with_max_n_pmts_per_time_bin,

                    n_time_bins*sizeof(float),

                    cudaMemcpyDeviceToHost));

     checkCudaErrors(cudaMemcpy(host_meanz_for_vertex_with_max_n_pmts_per_time_bin,

                    device_meanz_for_vertex_with_max_n_pmts_per_time_bin,

                    n_time_bins*sizeof(float),

                    cudaMemcpyDeviceToHost));

   }

   if( correct_mode == 10 ){

     checkCudaErrors(cudaMemcpy(host_max_number_of_pmts_in_cone_in_time_bin,

                    device_max_number_of_pmts_in_cone_in_time_bin,

                    n_time_bins*sizeof(unsigned int),

                    cudaMemcpyDeviceToHost));


   }


 }


 void choose_candidates_above_threshold(){


   candidate_trigger_pair_vertex_time.clear();

   candidate_trigger_npmts_in_time_bin.clear();

   if( correct_mode == 8 && return_direction ){

     candidate_trigger_meanx_per_time_bin.clear();

     candidate_trigger_meany_per_time_bin.clear();

     candidate_trigger_meanz_per_time_bin.clear();

   }

   if( correct_mode == 10 ){

     candidate_trigger_npmts_in_cone_in_time_bin.clear();

   }


   unsigned int the_threshold;

   unsigned int number_of_pmts_to_cut_on;


   for(unsigned int time_bin = 0; time_bin<n_time_bins - 1; time_bin++){ // loop over time bins

     // n_time_bins - 1 as we are checking the i and i+1 at the same time


     if(host_vertex_with_max_n_pmts[time_bin] < n_water_like_test_vertices )

       the_threshold = water_like_threshold_number_of_pmts;

     else

       the_threshold = wall_like_threshold_number_of_pmts;


     number_of_pmts_to_cut_on = host_max_number_of_pmts_in_time_bin[time_bin];

     if( correct_mode == 10 ){

       if( select_based_on_cone ){

     number_of_pmts_to_cut_on = host_max_number_of_pmts_in_cone_in_time_bin[time_bin];

       }

     }


     if(number_of_pmts_to_cut_on > the_threshold) {


       if( use_verbose ){

     printf(" [2] time %f vertex (%f, %f, %f) npmts %d \n", (time_bin + 2)*time_step_size - time_offset, vertex_x[host_vertex_with_max_n_pmts[time_bin]], vertex_y[host_vertex_with_max_n_pmts[time_bin]], vertex_z[host_vertex_with_max_n_pmts[time_bin]], number_of_pmts_to_cut_on);

       }


       candidate_trigger_pair_vertex_time.push_back(std::make_pair(host_vertex_with_max_n_pmts[time_bin],time_bin+1));

       candidate_trigger_npmts_in_time_bin.push_back(host_max_number_of_pmts_in_time_bin[time_bin]);

       if( correct_mode == 8 && return_direction ){

     candidate_trigger_meanx_per_time_bin.push_back(host_meanx_for_vertex_with_max_n_pmts_per_time_bin[time_bin]);

     candidate_trigger_meany_per_time_bin.push_back(host_meany_for_vertex_with_max_n_pmts_per_time_bin[time_bin]);

     candidate_trigger_meanz_per_time_bin.push_back(host_meanz_for_vertex_with_max_n_pmts_per_time_bin[time_bin]);

       }

       if( correct_mode == 10 ){

     candidate_trigger_npmts_in_cone_in_time_bin.push_back(host_max_number_of_pmts_in_cone_in_time_bin[time_bin]);

       }

     }


   }


   if( use_verbose )

     printf(" [2] n candidates: %d \n", candidate_trigger_pair_vertex_time.size());

 }


 bool set_input_file_for_event(int n){


   int nchar = (ceil(log10(n+1))+1);

   char * num =  (char*)malloc(sizeof(char)*nchar);

   sprintf(num, "%d", n+1);

   event_file = event_file_base + num + event_file_suffix;


   bool file_exists = (access( event_file.c_str(), F_OK ) != -1);


   free(num);


   return file_exists;


 }


 void set_output_file(){


   int nchar = (ceil(log10(water_like_threshold_number_of_pmts))+1);

   char * num =  (char*)malloc(sizeof(char)*nchar);

   sprintf(num, "%d", water_like_threshold_number_of_pmts);

   output_file = output_file_base + num + event_file_suffix;


   free(num);


   return ;


 }


 void set_output_file_nhits(unsigned int threshold){


   int nchar = (ceil(log10(threshold))+1);

   char * num =  (char*)malloc(sizeof(char)*nchar);

   sprintf(num, "%d", threshold);

   output_file = output_file_base + num + event_file_suffix;


   free(num);


   return ;


 }


 void write_output_nhits(unsigned int * ntriggers){


   if( output_txt ){


     for(unsigned int u=nhits_threshold_min; u<=nhits_threshold_max; u++){

       set_output_file_nhits(u);

       FILE *of=fopen(output_file.c_str(), "a");


       //    int trigger = (ntriggers[u - nhits_threshold_min] > 0 ? 1 : 0);

       int trigger = ntriggers[u - nhits_threshold_min];

       fprintf(of, " %d \n", trigger);


       fclose(of);

     }


   }


 }


 void write_output(){


   if( output_txt ){

     FILE *of=fopen(output_file.c_str(), "a");


     int trigger;

     if( write_output_mode == 0 ){

       // output 1 if there is a trigger, 0 otherwise

       trigger = (trigger_pair_vertex_time.size() > 0 ? 1 : 0);

     }


     if( write_output_mode == 1 ){

       // output the n of triggers

       trigger = trigger_pair_vertex_time.size();

     }


     if( write_output_mode == 2 ){

       // output the n of water-like triggers

       int trigger = 0;

       for(std::vector<std::pair<unsigned int,unsigned int> >::const_iterator itrigger=trigger_pair_vertex_time.begin(); itrigger != trigger_pair_vertex_time.end(); ++itrigger){

         if( itrigger->first  < n_water_like_test_vertices )

         trigger ++;

       }

     }


     if( write_output_mode == 0 || write_output_mode == 1 || write_output_mode == 2 ){

       fprintf(of, " %d \n", trigger);

     }


     if( write_output_mode == 3 ){

       unsigned int triggertime, trigger_index;

       // output reconstructed vertices

       for(std::vector<std::pair<unsigned int,unsigned int> >::const_iterator itrigger=trigger_pair_vertex_time.begin(); itrigger != trigger_pair_vertex_time.end(); ++itrigger){

     triggertime = itrigger->second*time_step_size - time_offset;

     if( correct_mode == 10 ){

       trigger_index = itrigger - trigger_pair_vertex_time.begin();

       fprintf(of, " %d %f %f %f %d %d %d \n", n_events, vertex_x[itrigger->first], vertex_y[itrigger->first], vertex_z[itrigger->first], triggertime, trigger_npmts_in_time_bin.at(trigger_index), trigger_npmts_in_cone_in_time_bin.at(trigger_index));

     }else{

       fprintf(of, " %d %f %f %f %d \n", n_events, vertex_x[itrigger->first], vertex_y[itrigger->first], vertex_z[itrigger->first], triggertime);

     }

       }

     }


     if( write_output_mode == 4 ){

       // output non-corrected and corrected times for best vertex

       int max_n_pmts = 0;

       unsigned int best_vertex;

       for(std::vector<std::pair<unsigned int,unsigned int> >::const_iterator itrigger=trigger_pair_vertex_time.begin(); itrigger != trigger_pair_vertex_time.end(); ++itrigger){

     unsigned int vertex_index = itrigger->first;

     unsigned int time_index = itrigger->second;

     unsigned int local_n_pmts = host_max_number_of_pmts_in_time_bin[itrigger->second];

     if( local_n_pmts > max_n_pmts ){

       max_n_pmts = local_n_pmts;

       best_vertex = vertex_index;

     }

       }

       unsigned int distance_index;

       double tof;

       double corrected_time;


       for(unsigned int i=0; i<n_hits; i++){


     distance_index = get_distance_index(host_ids[i], n_PMTs*best_vertex);

     tof = host_times_of_flight[distance_index];

     corrected_time = host_times[i]-tof;


     fprintf(of, " %d %d %f \n", host_ids[i], host_times[i], corrected_time);

     //fprintf(of, " %d %f \n", host_ids[i], corrected_time);

       }

     }


     fclose(of);

   }


 }


 void initialize_output(){


   if( output_txt )

     remove( output_file.c_str() );


 }


 void initialize_output_nhits(){


   if( output_txt )

     for(unsigned int u=nhits_threshold_min; u<=nhits_threshold_max; u++){

       set_output_file_nhits(u);

       remove( output_file.c_str() );

     }

 }


 float read_value_from_file(std::string paramname, std::string filename){


   FILE * pFile = fopen (filename.c_str(),"r");

   if(pFile == NULL) {

     printf("Error: file %s could not be opened\n", filename.c_str());

   }


   char name[256];

   float value;


   while( EOF != fscanf(pFile, "%s %e", name, &value) ){

     if( paramname.compare(name) == 0 ){

       fclose(pFile);

       return value;

     }

   }


   printf(" [2] warning: could not find parameter %s in file %s \n", paramname.c_str(), filename.c_str());


   fclose(pFile);

   exit(0);


   return 0.;


 }


 void read_user_parameters(){


   std::string parameter_file = "parameters.txt";


   twopi = 2.*acos(-1.);

   speed_light_water = 29.9792/1.3330; // speed of light in water, cm/ns

   //double speed_light_water = 22.490023;


   cerenkov_costheta =1./1.3330;

   cerenkov_angle_water = acos(cerenkov_costheta);

   costheta_cone_cut = read_value_from_file("costheta_cone_cut", parameter_file);

   select_based_on_cone = (bool)read_value_from_file("select_based_on_cone", parameter_file);

   return_vertex = (bool)read_value_from_file("return_vertex", parameter_file);

   return_direction = (bool)read_value_from_file("return_direction", parameter_file);


   dark_rate = read_value_from_file("dark_rate", parameter_file); // Hz

   cylindrical_grid = (bool)read_value_from_file("cylindrical_grid", parameter_file);

   distance_between_vertices = read_value_from_file("distance_between_vertices", parameter_file); // cm

   wall_like_distance = read_value_from_file("wall_like_distance", parameter_file); // units of distance between vertices

   time_step_size = (unsigned int)(sqrt(3.)*distance_between_vertices/(4.*speed_light_water)); // ns

   int extra_threshold = (int)(dark_rate*n_PMTs*2.*time_step_size*1.e-9); // to account for dark current occupancy

   water_like_threshold_number_of_pmts = read_value_from_file("water_like_threshold_number_of_pmts", parameter_file) + extra_threshold;

   wall_like_threshold_number_of_pmts = read_value_from_file("wall_like_threshold_number_of_pmts", parameter_file) + extra_threshold;

   coalesce_time = read_value_from_file("coalesce_time", parameter_file); // ns

   trigger_gate_up = read_value_from_file("trigger_gate_up", parameter_file); // ns

   trigger_gate_down = read_value_from_file("trigger_gate_down", parameter_file); // ns

   max_n_hits_per_job = read_value_from_file("max_n_hits_per_job", parameter_file);

   output_txt = (bool)read_value_from_file("output_txt", parameter_file);

   correct_mode = read_value_from_file("correct_mode", parameter_file);

   write_output_mode = read_value_from_file("write_output_mode", parameter_file);

   number_of_kernel_blocks_3d.y = read_value_from_file("num_blocks_y", parameter_file);

   number_of_threads_per_block_3d.y = read_value_from_file("num_threads_per_block_y", parameter_file);

   number_of_threads_per_block_3d.x = read_value_from_file("num_threads_per_block_x", parameter_file);


   n_direction_bins_theta = read_value_from_file("n_direction_bins_theta", parameter_file);

   n_direction_bins_phi = 2*(n_direction_bins_theta - 1);

   n_direction_bins = n_direction_bins_phi*n_direction_bins_theta - 2*(n_direction_bins_phi - 1);


 }


 void read_user_parameters_nhits(){


   std::string parameter_file = "parameters.txt";


   twopi = 2.*acos(-1.);

   speed_light_water = 29.9792/1.3330; // speed of light in water, cm/ns

   //double speed_light_water = 22.490023;


   double dark_rate = read_value_from_file("dark_rate", parameter_file); // Hz

   distance_between_vertices = read_value_from_file("distance_between_vertices", parameter_file); // cm

   wall_like_distance = read_value_from_file("wall_like_distance", parameter_file); // units of distance between vertices

   time_step_size = read_value_from_file("nhits_step_size", parameter_file); // ns

   nhits_window = read_value_from_file("nhits_window", parameter_file); // ns

   int extra_threshold = (int)(dark_rate*n_PMTs*nhits_window*1.e-9); // to account for dark current occupancy

   extra_threshold = 0;

   water_like_threshold_number_of_pmts = read_value_from_file("water_like_threshold_number_of_pmts", parameter_file) + extra_threshold;

   wall_like_threshold_number_of_pmts = read_value_from_file("wall_like_threshold_number_of_pmts", parameter_file) + extra_threshold;

   coalesce_time = read_value_from_file("coalesce_time", parameter_file); // ns

   trigger_gate_up = read_value_from_file("trigger_gate_up", parameter_file); // ns

   trigger_gate_down = read_value_from_file("trigger_gate_down", parameter_file); // ns

   max_n_hits_per_job = read_value_from_file("max_n_hits_per_job", parameter_file);

   output_txt = (bool)read_value_from_file("output_txt", parameter_file);

   correct_mode = read_value_from_file("correct_mode", parameter_file);

   number_of_kernel_blocks_3d.y = read_value_from_file("num_blocks_y", parameter_file);

   number_of_threads_per_block_3d.y = read_value_from_file("num_threads_per_block_y", parameter_file);

   number_of_threads_per_block_3d.x = read_value_from_file("num_threads_per_block_x", parameter_file);

   nhits_threshold_min = read_value_from_file("nhits_threshold_min", parameter_file);

   nhits_threshold_max = read_value_from_file("nhits_threshold_max", parameter_file);


 }


 void check_cudamalloc_float(unsigned int size){


   unsigned int bytes_per_float = 4;

   size_t available_memory, total_memory;

   cudaMemGetInfo(&available_memory, &total_memory);

   if( size*bytes_per_float > available_memory*1000/1024 ){

     printf(" [2] cannot allocate %d floats, or %d B, available %d B \n",

        size, size*bytes_per_float, available_memory*1000/1024);

   }


 }


 void check_cudamalloc_int(unsigned int size){


   unsigned int bytes_per_int = 4;

   size_t available_memory, total_memory;

   cudaMemGetInfo(&available_memory, &total_memory);

   if( size*bytes_per_int > available_memory*1000/1024 ){

     printf(" [2] cannot allocate %d ints, or %d B, available %d B \n",

        size, size*bytes_per_int, available_memory*1000/1024);

   }


 }


 void check_cudamalloc_unsigned_int(unsigned int size){


   unsigned int bytes_per_unsigned_int = 4;

   size_t available_memory, total_memory;

   cudaMemGetInfo(&available_memory, &total_memory);

   if( size*bytes_per_unsigned_int > available_memory*1000/1024 ){

     printf(" [2] cannot allocate %d unsigned_ints, or %d B, available %d B \n",

        size, size*bytes_per_unsigned_int, available_memory*1000/1024);

   }


 }


 void check_cudamalloc_unsigned_short(unsigned int size){


   unsigned int bytes_per_unsigned_short = 2;

   size_t available_memory, total_memory;

   cudaMemGetInfo(&available_memory, &total_memory);

   if( size*bytes_per_unsigned_short > available_memory*1000/1024 ){

     printf(" cannot allocate %d unsigned_shorts, or %d B, available %d B \n",

        size, size*bytes_per_unsigned_short, available_memory*1000/1024);

   }


 }


 void check_cudamalloc_unsigned_char(unsigned int size){


   unsigned int bytes_per_unsigned_char = 1;

   size_t available_memory, total_memory;

   cudaMemGetInfo(&available_memory, &total_memory);

   if( size*bytes_per_unsigned_char > available_memory*1000/1024 ){

     printf(" cannot allocate %d unsigned_chars, or %d B, available %d B \n",

        size, size*bytes_per_unsigned_char, available_memory*1000/1024);

   }


 }


 void check_cudamalloc_bool(unsigned int size){


   unsigned int bytes_per_bool = 1;

   size_t available_memory, total_memory;

   cudaMemGetInfo(&available_memory, &total_memory);

   if( size*bytes_per_bool > available_memory*1000/1024 ){

     printf(" [2] cannot allocate %d unsigned_ints, or %d B, available %d B \n",

        size, size*bytes_per_bool, available_memory*1000/1024);

   }


 }


 unsigned int find_greatest_divisor(unsigned int n, unsigned int max){


   if( n == 1 ){

     return 1;

   }


   if (n % 2 == 0){

     if( n <= 2*max ){

       return n / 2;

     }

     else{

       float sqrtN = sqrt(n); // square root of n in float precision.

       unsigned int start = ceil(std::max((double)2,(double)n/(double)max));

       for(unsigned int d = start; d <= n; d += 1)

     if (n % d == 0)

       return n/d;

       return 1;

     }

   }


   // Now, the least prime divisor of n is odd.

   // So, we increment the counter by 2 in the loop, by starting in 3.


   float sqrtN = sqrt(n); // square root of n in float precision.

   unsigned int start = ceil(std::max((double)3,(double)n/(double)max));

   for(unsigned int d = start; d <= n; d += 2)

     if (n % d == 0)

       return n/d;


   // If the loop has reached its end normally,

   // it means that N is prime.


   return 1;


 }


 void setup_threads_for_histo(unsigned int n){


   number_of_kernel_blocks_3d.x = n/greatest_divisor;

   number_of_kernel_blocks_3d.y = 1;


   number_of_threads_per_block_3d.x = greatest_divisor;

   number_of_threads_per_block_3d.y = 1;


 }


 void setup_threads_for_histo(){


   number_of_kernel_blocks_3d.x = 1000;

   number_of_kernel_blocks_3d.y = 1;


   number_of_threads_per_block_3d.x = max_n_threads_per_block;

   number_of_threads_per_block_3d.y = 1;


 }


 void setup_threads_for_histo_iterated(bool last){


   number_of_kernel_blocks_3d.x = 1000;

   number_of_kernel_blocks_3d.y = 1;


   unsigned int size = n_time_bins*n_test_vertices;

   number_of_threads_per_block_3d.x = (last ? size - size/max_n_threads_per_block*max_n_threads_per_block : max_n_threads_per_block);

   number_of_threads_per_block_3d.y = 1;


 }


 void setup_threads_for_histo_per(unsigned int n){


   number_of_kernel_blocks_3d.x = n;


   print_parameters_2d();


   if( number_of_threads_per_block_3d.x * number_of_threads_per_block_3d.y > max_n_threads_per_block ){

     printf(" [2] --------------------- warning: number_of_threads_per_block (x*y) = %d cannot exceed max value %d \n", number_of_threads_per_block_3d.x * number_of_threads_per_block_3d.y, max_n_threads_per_block );

   }


   if( number_of_kernel_blocks_3d.x > max_n_blocks ){

     printf(" [2] warning: number_of_kernel_blocks x = %d cannot exceed max value %d \n", number_of_kernel_blocks_3d.x, max_n_blocks );

   }


   if( number_of_kernel_blocks_3d.y > max_n_blocks ){

     printf(" [2] warning: number_of_kernel_blocks y = %d cannot exceed max value %d \n", number_of_kernel_blocks_3d.y, max_n_blocks );

   }


   if( std::numeric_limits<int>::max() / (number_of_kernel_blocks_3d.x*number_of_kernel_blocks_3d.y)  < number_of_threads_per_block_3d.x*number_of_threads_per_block_3d.y ){

     printf(" [2] --------------------- warning: grid size cannot exceed max value %u \n", std::numeric_limits<int>::max() );

   }


 }


 #endif


candidate_trigger_npmts_in_time_bin
std::vector< unsigned int > candidate_trigger_npmts_in_time_bin
Definition: library_daq.h:154

constant_n_hits
__constant__ unsigned int constant_n_hits
Definition: library_daq.h:94

set_output_file_nhits
void set_output_file_nhits(unsigned int threshold)
Definition: library_daq.h:2232

device_PMTz
float * device_PMTz
Definition: library_daq.h:139

device_meanx_for_vertex_with_max_n_pmts_per_time_bin
float * device_meanx_for_vertex_with_max_n_pmts_per_time_bin
Definition: library_daq.h:184

allocate_candidates_memory_on_device
void allocate_candidates_memory_on_device()
Definition: library_daq.h:1134

free_global_memories
void free_global_memories()
Definition: library_daq.h:2062

device_directions_for_vertex_and_pmt
bool * device_directions_for_vertex_and_pmt
Definition: library_daq.h:141

the_max_time
unsigned int the_max_time
Definition: library_daq.h:211

setup_threads_for_histo
void setup_threads_for_histo(unsigned int n)
Definition: library_daq.h:2570

set_output_file
void set_output_file()
Definition: library_daq.h:2219

tex_PMTx
texture< float, 1, cudaReadModeElementType > tex_PMTx
Definition: library_daq.h:148

read_user_parameters_nhits
void read_user_parameters_nhits()
Definition: library_daq.h:2428

device_get_direction_index_at_time
__device__ unsigned int device_get_direction_index_at_time(unsigned int time_bin, unsigned int vertex_index, unsigned int direction_index)
Definition: library_daq.h:1755

allocate_directions_memory_on_device
void allocate_directions_memory_on_device()
Definition: library_daq.h:957

histogram_t
unsigned short histogram_t
Definition: library_daq.h:27

elapsed_candidates_copy_host
float elapsed_candidates_copy_host
Definition: library_daq.h:204

allocate_candidates_memory_on_host
void allocate_candidates_memory_on_host()
Definition: library_daq.h:1113

nhits_threshold_max
unsigned int nhits_threshold_max
Definition: library_daq.h:50

stop_total_cuda_clock
double stop_total_cuda_clock()
Definition: library_daq.h:1670

n_test_vertices
unsigned int n_test_vertices
vertices
Definition: library_daq.h:71

distance_between_vertices
double distance_between_vertices
parameters
Definition: library_daq.h:44

nhits_window
unsigned int nhits_window
Definition: library_daq.h:212

device_dy_per_time_bin
float * device_dy_per_time_bin
Definition: library_daq.h:111

PMT_x
double * PMT_x
Definition: library_daq.h:69

wall_like_threshold_number_of_pmts
unsigned int wall_like_threshold_number_of_pmts
Definition: library_daq.h:49

device_times_of_flight
time_of_flight_t * device_times_of_flight
Definition: library_daq.h:125

setup_threads_for_tof_biparallel
bool setup_threads_for_tof_biparallel()
Definition: library_daq.h:738

tex_times_of_flight
texture< time_of_flight_t, 1, cudaReadModeElementType > tex_times_of_flight
Definition: library_daq.h:143

constant_n_direction_bins_phi
__constant__ unsigned int constant_n_direction_bins_phi
Definition: library_daq.h:90

device_dz_per_time_bin
float * device_dz_per_time_bin
Definition: library_daq.h:112

setup_threads_nhits
bool setup_threads_nhits()
Definition: library_daq.h:822

setup_threads_for_tof
bool setup_threads_for_tof()
Definition: library_daq.h:715

kernel_find_vertex_with_max_npmts_in_timebin_and_directionbin
__global__ void kernel_find_vertex_with_max_npmts_in_timebin_and_directionbin(unsigned int *np, histogram_t *mnp, unsigned int *vmnp)
Definition: library_daq.h:1944

tex_PMTz
texture< float, 1, cudaReadModeElementType > tex_PMTz
Definition: library_daq.h:150

vertex_z
double * vertex_z
Definition: library_daq.h:75

constant_n_direction_bins_theta
__constant__ unsigned int constant_n_direction_bins_theta
Definition: library_daq.h:88

host_max_number_of_pmts_in_cone_in_time_bin
unsigned int * host_max_number_of_pmts_in_cone_in_time_bin
Definition: library_daq.h:182

coalesce_triggers
void coalesce_triggers()
Definition: library_daq.h:1403

device_number_of_pmts_in_cone_in_time_bin
unsigned int * device_number_of_pmts_in_cone_in_time_bin
Definition: library_daq.h:181

dark_rate
double dark_rate
Definition: library_daq.h:55

setup_threads_for_histo_iterated
void setup_threads_for_histo_iterated(bool last)
Definition: library_daq.h:2590

elapsed_tofs_copy_dev
float elapsed_tofs_copy_dev
Definition: library_daq.h:204

check_cudamalloc_unsigned_short
void check_cudamalloc_unsigned_short(unsigned int size)
Definition: library_daq.h:2497

elapsed_detector
float elapsed_detector
Definition: library_daq.h:204

constant_time_step_size
__constant__ unsigned int constant_time_step_size
Definition: library_daq.h:47

total_start
cudaEvent_t total_start
Definition: library_daq.h:171

constant_n_PMTs
__constant__ unsigned int constant_n_PMTs
Definition: library_daq.h:68

device_light_dx
float * device_light_dx
Definition: library_daq.h:127

candidate_trigger_meany_per_time_bin
std::vector< float > candidate_trigger_meany_per_time_bin
Definition: library_daq.h:162

fill_tofs_memory_on_device_nhits
void fill_tofs_memory_on_device_nhits()
Definition: library_daq.h:1323

tex_light_dz
texture< float, 1, cudaReadModeElementType > tex_light_dz
Definition: library_daq.h:146

set_input_file_for_event
bool set_input_file_for_event(int n)
Definition: library_daq.h:2204

write_output_nhits
void write_output_nhits(unsigned int n)

read_the_detector
bool read_the_detector()
Definition: library_daq.h:461

elapsed_parameters
float elapsed_parameters
Definition: library_daq.h:204

free_event_memories_nhits
void free_event_memories_nhits()
Definition: library_daq.h:2046

stop_cuda_clock
double stop_cuda_clock()
Definition: library_daq.h:1659

setup_threads_for_histo_per
void setup_threads_for_histo_per(unsigned int n)
Definition: library_daq.h:2601

number_of_kernel_blocks
unsigned int number_of_kernel_blocks
threads
Definition: library_daq.h:77

n_PMTs
unsigned int n_PMTs
pmts
Definition: library_daq.h:67

cerenkov_angle_water
double cerenkov_angle_water
Definition: library_daq.h:120

trigger_meanz_per_time_bin
std::vector< float > trigger_meanz_per_time_bin
Definition: library_daq.h:166

choose_candidates_above_threshold
void choose_candidates_above_threshold()
Definition: library_daq.h:2149

device_PMTy
float * device_PMTy
Definition: library_daq.h:137

elapsed_memory_tofs_dev
float elapsed_memory_tofs_dev
Definition: library_daq.h:204

read_value_from_file
float read_value_from_file(std::string paramname, std::string filename)
Definition: library_daq.h:2360

device_meany_for_vertex_with_max_n_pmts_per_time_bin
float * device_meany_for_vertex_with_max_n_pmts_per_time_bin
Definition: library_daq.h:185

print_parameters_2d
void print_parameters_2d()
Definition: library_daq.h:389

elapsed_copy_dev
float elapsed_copy_dev
Definition: library_daq.h:204

device_meanx_per_time_bin
float * device_meanx_per_time_bin
Definition: library_daq.h:113

elapsed_memory_candidates_host
float elapsed_memory_candidates_host
Definition: library_daq.h:204

correct_mode
unsigned int correct_mode
Definition: library_daq.h:174

print_times_of_flight
void print_times_of_flight()
Definition: library_daq.h:414

host_light_dx
float * host_light_dx
Definition: library_daq.h:128

free_event_memories
void free_event_memories()
Definition: library_daq.h:1983

elapsed_reset
float elapsed_reset
Definition: library_daq.h:204

device_max_number_of_pmts_in_cone_in_time_bin
unsigned int * device_max_number_of_pmts_in_cone_in_time_bin
Definition: library_daq.h:183

get_distance_index
unsigned int get_distance_index(unsigned int pmt_id, unsigned int vertex_block)
Definition: library_daq.h:1678

device_light_dr
float * device_light_dr
Definition: library_daq.h:133

vertex_y
double * vertex_y
Definition: library_daq.h:75

speed_light_water
double speed_light_water
Definition: library_daq.h:119

t0
struct timeval t0
Definition: library_daq.h:168

number_of_threads_per_block
unsigned int number_of_threads_per_block
Definition: library_daq.h:79

elapsed_directions
float elapsed_directions
Definition: library_daq.h:204

output_file
std::string output_file
Definition: library_daq.h:200

n_time_bins
unsigned int n_time_bins
Definition: library_daq.h:85

output_txt
bool output_txt
Definition: library_daq.h:173

stop_c_clock
double stop_c_clock()
Definition: library_daq.h:1651

elapsed_candidates_memory_dev
float elapsed_candidates_memory_dev
Definition: library_daq.h:204

t1
struct timeval t1
Definition: library_daq.h:169

setup_threads_to_find_candidates
bool setup_threads_to_find_candidates()
Definition: library_daq.h:809

make_table_of_directions
void make_table_of_directions()
Definition: library_daq.h:1209

host_n_pmts_nhits
unsigned int * host_n_pmts_nhits
Definition: library_daq.h:108

max_n_threads_per_block
int max_n_threads_per_block
Definition: library_daq.h:191

tex_times
texture< unsigned int, 1, cudaReadModeElementType > tex_times
Definition: library_daq.h:100

select_based_on_cone
bool select_based_on_cone
Definition: library_daq.h:58

twopi
double twopi
Definition: library_daq.h:123

timedifference_msec
float timedifference_msec(struct timeval t0, struct timeval t1)
Definition: library_daq.h:1641

read_number_of_input_hits
unsigned int read_number_of_input_hits()
Definition: library_daq.h:303

device_max_number_of_pmts_in_time_bin
histogram_t * device_max_number_of_pmts_in_time_bin
Definition: library_daq.h:178

constant_n_water_like_test_vertices
__constant__ unsigned int constant_n_water_like_test_vertices
Definition: library_daq.h:74

device_light_dz
float * device_light_dz
Definition: library_daq.h:131

time_step_size
unsigned int time_step_size
Definition: library_daq.h:46

trigger_meany_per_time_bin
std::vector< float > trigger_meany_per_time_bin
Definition: library_daq.h:165

allocate_correct_memory_on_device
void allocate_correct_memory_on_device()
Definition: library_daq.h:973

tex_light_dy
texture< float, 1, cudaReadModeElementType > tex_light_dy
Definition: library_daq.h:145

host_n_pmts_per_time_bin
unsigned int * host_n_pmts_per_time_bin
Definition: library_daq.h:106

elapsed_memory_directions_dev
float elapsed_memory_directions_dev
Definition: library_daq.h:204

max_n_hits_per_job
unsigned int max_n_hits_per_job
Definition: library_daq.h:54

setup_threads_for_tof_2d
bool setup_threads_for_tof_2d(unsigned int A, unsigned int B)
Definition: library_daq.h:761

read_input
bool read_input()
Definition: library_daq.h:323

read_the_pmts
bool read_the_pmts()
Definition: library_daq.h:446

host_vertex_with_max_n_pmts
unsigned int * host_vertex_with_max_n_pmts
Definition: library_daq.h:179

trigger_pair_vertex_time
std::vector< std::pair< unsigned int, unsigned int > > trigger_pair_vertex_time
Definition: library_daq.h:156

allocate_correct_memory_on_device_nhits
void allocate_correct_memory_on_device_nhits()
Definition: library_daq.h:1081

separate_triggers_into_gates
void separate_triggers_into_gates()
Definition: library_daq.h:1514

candidate_trigger_meanz_per_time_bin
std::vector< float > candidate_trigger_meanz_per_time_bin
Definition: library_daq.h:163

device_light_dy
float * device_light_dy
Definition: library_daq.h:129

elapsed_gates
float elapsed_gates
Definition: library_daq.h:204

host_light_dz
float * host_light_dz
Definition: library_daq.h:132

device_n_pmts_per_time_bin_and_direction_bin
unsigned int * device_n_pmts_per_time_bin_and_direction_bin
Definition: library_daq.h:109

read_the_input_ToolDAQ
bool read_the_input_ToolDAQ(std::vector< int > PMTid, std::vector< int > time)

trigger_npmts_in_time_bin
std::vector< unsigned int > trigger_npmts_in_time_bin
Definition: library_daq.h:157

candidate_trigger_meanx_per_time_bin
std::vector< float > candidate_trigger_meanx_per_time_bin
Definition: library_daq.h:161

elapsed_pmts
float elapsed_pmts
Definition: library_daq.h:204

host_meany_for_vertex_with_max_n_pmts_per_time_bin
float * host_meany_for_vertex_with_max_n_pmts_per_time_bin
Definition: library_daq.h:188

print_input
void print_input()
Definition: library_daq.h:400

cylindrical_grid
bool cylindrical_grid
Definition: library_daq.h:124

initialize_output_nhits
void initialize_output_nhits()
Definition: library_daq.h:2351

get_direction_index_at_time
unsigned int get_direction_index_at_time(unsigned int time_bin, unsigned int vertex_index, unsigned int direction_index)
Definition: library_daq.h:1711

costheta_cone_cut
float costheta_cone_cut
Definition: library_daq.h:56

constant_cerenkov_costheta
__constant__ float constant_cerenkov_costheta
Definition: library_daq.h:122

number_of_threads_per_block_3d
dim3 number_of_threads_per_block_3d
Definition: library_daq.h:80

device_n_pmts_per_time_bin
histogram_t * device_n_pmts_per_time_bin
Definition: library_daq.h:105

host_meanx_for_vertex_with_max_n_pmts_per_time_bin
float * host_meanx_for_vertex_with_max_n_pmts_per_time_bin
Definition: library_daq.h:187

offset_t
unsigned short offset_t
Definition: library_daq.h:19

build.h

choose_candidates
float choose_candidates
Definition: library_daq.h:204

detector_radius
double detector_radius
Definition: library_daq.h:65

water_like_threshold_number_of_pmts
unsigned int water_like_threshold_number_of_pmts
Definition: library_daq.h:48

candidate_trigger_pair_vertex_time
std::vector< std::pair< unsigned int, unsigned int > > candidate_trigger_pair_vertex_time
Definition: library_daq.h:153

nhits_threshold_min
unsigned int nhits_threshold_min
Definition: library_daq.h:50

read_the_input
bool read_the_input()
Definition: library_daq.h:838

constant_return_direction
__constant__ bool constant_return_direction
Definition: library_daq.h:62

tex_ids
texture< unsigned int, 1, cudaReadModeElementType > tex_ids
Definition: library_daq.h:97

event_file_base
std::string event_file_base
Definition: library_daq.h:201

fill_tofs_memory_on_device
void fill_tofs_memory_on_device()
Definition: library_daq.h:1257

return_vertex
bool return_vertex
Definition: library_daq.h:60

trigger_gate_down
double trigger_gate_down
Definition: library_daq.h:53

get_time_index
unsigned int get_time_index(unsigned int hit_index, unsigned int vertex_block)
Definition: library_daq.h:1685

n_direction_bins_theta
unsigned int n_direction_bins_theta
Definition: library_daq.h:87

constant_n_time_bins
__constant__ unsigned int constant_n_time_bins
Definition: library_daq.h:86

start_c_clock
void start_c_clock()
Definition: library_daq.h:1647

device_ids
unsigned int * device_ids
Definition: library_daq.h:96

greatest_divisor
unsigned int greatest_divisor
Definition: library_daq.h:210

trigger_gate_up
double trigger_gate_up
Definition: library_daq.h:52

device_time_bin_of_hit
unsigned int * device_time_bin_of_hit
Definition: library_daq.h:103

event_file
std::string event_file
Definition: library_daq.h:197

device_get_direction_index_at_angles
__device__ unsigned int device_get_direction_index_at_angles(unsigned int iphi, unsigned int itheta)
Definition: library_daq.h:1746

check_cudamalloc_float
void check_cudamalloc_float(unsigned int size)
Definition: library_daq.h:2460

detector_height
double detector_height
detector
Definition: library_daq.h:64

n_hits
unsigned int n_hits
Definition: library_daq.h:93

copy_candidates_from_device_to_host
void copy_candidates_from_device_to_host()
Definition: library_daq.h:2114

return_direction
bool return_direction
Definition: library_daq.h:61

number_of_kernel_blocks_3d
dim3 number_of_kernel_blocks_3d
Definition: library_daq.h:78

host_times_of_flight
time_of_flight_t * host_times_of_flight
Definition: library_daq.h:126

output_trigger_information
std::vector< double > output_trigger_information
Definition: library_daq.h:160

elapsed_tofs_free
float elapsed_tofs_free
Definition: library_daq.h:204

stop
cudaEvent_t stop
Definition: library_daq.h:171

host_ids
unsigned int * host_ids
Definition: library_daq.h:95

host_max_number_of_pmts_in_time_bin
histogram_t * host_max_number_of_pmts_in_time_bin
Definition: library_daq.h:177

device_times
unsigned int * device_times
Definition: library_daq.h:99

start_total_cuda_clock
void start_total_cuda_clock()
Definition: library_daq.h:1666

host_light_dy
float * host_light_dy
Definition: library_daq.h:130

constant_n_direction_bins
__constant__ unsigned int constant_n_direction_bins
Definition: library_daq.h:92

event_file_suffix
std::string event_file_suffix
Definition: library_daq.h:202

fill_correct_memory_on_device
void fill_correct_memory_on_device()
Definition: library_daq.h:1334

use_timing
bool use_timing
Definition: library_daq.h:195

get_direction_index_at_pmt
unsigned int get_direction_index_at_pmt(unsigned int pmt_id, unsigned int vertex_index, unsigned int direction_index)
Definition: library_daq.h:1701

output_file_base
std::string output_file_base
Definition: library_daq.h:203

start
cudaEvent_t start
Definition: library_daq.h:171

check_cudamalloc_unsigned_char
void check_cudamalloc_unsigned_char(unsigned int size)
Definition: library_daq.h:2509

host_time_bin_of_hit
unsigned int * host_time_bin_of_hit
Definition: library_daq.h:102

time_offset
offset_t time_offset
hits
Definition: library_daq.h:83

print_pmts
void print_pmts()
Definition: library_daq.h:407

elapsed_input
float elapsed_input
Definition: library_daq.h:204

elapsed_vertices
float elapsed_vertices
Definition: library_daq.h:204

allocate_tofs_memory_on_device
void allocate_tofs_memory_on_device()
Definition: library_daq.h:910

final_trigger_pair_vertex_time
std::vector< std::pair< unsigned int, unsigned int > > final_trigger_pair_vertex_time
Definition: library_daq.h:159

start_cuda_clock
void start_cuda_clock()
Definition: library_daq.h:1655

cerenkov_costheta
float cerenkov_costheta
Definition: library_daq.h:121

read_user_parameters
void read_user_parameters()
Definition: library_daq.h:2386

PMT_z
double * PMT_z
Definition: library_daq.h:69

read_detector
bool read_detector()
Definition: library_daq.h:361

device_n_pmts_nhits
unsigned int * device_n_pmts_nhits
Definition: library_daq.h:107

host_PMTy
float * host_PMTy
Definition: library_daq.h:138

device_get_time_index
__device__ unsigned int device_get_time_index(unsigned int hit_index, unsigned int vertex_block)
Definition: library_daq.h:1729

elapsed_coalesce
float elapsed_coalesce
Definition: library_daq.h:204

device_vertex_with_max_n_pmts
unsigned int * device_vertex_with_max_n_pmts
Definition: library_daq.h:180

host_times
unsigned int * host_times
Definition: library_daq.h:98

host_PMTz
float * host_PMTz
Definition: library_daq.h:140

kernel_find_vertex_with_max_npmts_and_center_of_mass_in_timebin
__global__ void kernel_find_vertex_with_max_npmts_and_center_of_mass_in_timebin(histogram_t *np, histogram_t *mnp, unsigned int *vmnp, unsigned int *nc, unsigned int *mnc)
Definition: library_daq.h:1895

n_direction_bins
unsigned int n_direction_bins
Definition: library_daq.h:91

constant_costheta_cone_cut
__constant__ float constant_costheta_cone_cut
Definition: library_daq.h:57

check_cudamalloc_bool
void check_cudamalloc_bool(unsigned int size)
Definition: library_daq.h:2521

elapsed_threads
float elapsed_threads
Definition: library_daq.h:204

check_cudamalloc_unsigned_int
void check_cudamalloc_unsigned_int(unsigned int size)
Definition: library_daq.h:2484

make_test_vertices
void make_test_vertices()
Definition: library_daq.h:470

make_table_of_tofs
void make_table_of_tofs()
Definition: library_daq.h:1165

tex_light_dx
texture< float, 1, cudaReadModeElementType > tex_light_dx
Definition: library_daq.h:144

find_greatest_divisor
unsigned int find_greatest_divisor(unsigned int n, unsigned int max)
Definition: library_daq.h:2533

host_meanz_for_vertex_with_max_n_pmts_per_time_bin
float * host_meanz_for_vertex_with_max_n_pmts_per_time_bin
Definition: library_daq.h:189

PMT_y
double * PMT_y
Definition: library_daq.h:69

device_meanz_for_vertex_with_max_n_pmts_per_time_bin
float * device_meanz_for_vertex_with_max_n_pmts_per_time_bin
Definition: library_daq.h:186

read_pmts
bool read_pmts()
Definition: library_daq.h:1378

n_direction_bins_phi
unsigned int n_direction_bins_phi
Definition: library_daq.h:89

trigger_meanx_per_time_bin
std::vector< float > trigger_meanx_per_time_bin
Definition: library_daq.h:164

write_output_mode
unsigned int write_output_mode
Definition: library_daq.h:175

host_PMTx
float * host_PMTx
Definition: library_daq.h:136

initialize_output
void initialize_output()
Definition: library_daq.h:2344

trigger_npmts_in_cone_in_time_bin
std::vector< unsigned int > trigger_npmts_in_cone_in_time_bin
Definition: library_daq.h:158

elapsed_memory_dev
float elapsed_memory_dev
Definition: library_daq.h:204

vertex_x
double * vertex_x
Definition: library_daq.h:75

elapsed_kernel_correct_times_and_get_n_pmts_per_time_bin
float elapsed_kernel_correct_times_and_get_n_pmts_per_time_bin
Definition: library_daq.h:204

device_PMTx
float * device_PMTx
Definition: library_daq.h:135

print_directions
void print_directions()
Definition: library_daq.h:429

device_meanz_per_time_bin
float * device_meanz_per_time_bin
Definition: library_daq.h:115

grid_size
unsigned int grid_size
Definition: library_daq.h:81

fill_directions_memory_on_device
void fill_directions_memory_on_device()
Definition: library_daq.h:1303

device_meany_per_time_bin
float * device_meany_per_time_bin
Definition: library_daq.h:114

use_verbose
bool use_verbose
Definition: library_daq.h:194

host_light_dr
float * host_light_dr
Definition: library_daq.h:134

get_direction_index_at_angles
unsigned int get_direction_index_at_angles(unsigned int iphi, unsigned int itheta)
Definition: library_daq.h:1692

wall_like_distance
double wall_like_distance
Definition: library_daq.h:45

pmts_file
std::string pmts_file
Definition: library_daq.h:199

detector_file
std::string detector_file
Definition: library_daq.h:198

time_of_flight_t
unsigned short time_of_flight_t
Definition: library_daq.h:35

constant_select_based_on_cone
__constant__ bool constant_select_based_on_cone
Definition: library_daq.h:59

n_water_like_test_vertices
unsigned int n_water_like_test_vertices
Definition: library_daq.h:72

device_get_direction_index_at_pmt
__device__ unsigned int device_get_direction_index_at_pmt(unsigned int pmt_id, unsigned int vertex_index, unsigned int direction_index)
Definition: library_daq.h:1736

host_directions_for_vertex_and_pmt
bool * host_directions_for_vertex_and_pmt
Definition: library_daq.h:142

device_get_distance_index
__device__ unsigned int device_get_distance_index(unsigned int pmt_id, unsigned int vertex_block)
Definition: library_daq.h:1722

coalesce_time
double coalesce_time
Definition: library_daq.h:51

total_stop
cudaEvent_t total_stop
Definition: library_daq.h:171

max_n_blocks
int max_n_blocks
Definition: library_daq.h:192

device_dx_per_time_bin
float * device_dx_per_time_bin
Definition: library_daq.h:110

elapsed_directions_copy_dev
float elapsed_directions_copy_dev
Definition: library_daq.h:204

tex_PMTy
texture< float, 1, cudaReadModeElementType > tex_PMTy
Definition: library_daq.h:149

n_events
int n_events
Definition: library_daq.h:213

elapsed_tof
float elapsed_tof
Definition: library_daq.h:204

read_number_of_pmts
unsigned int read_number_of_pmts()
Definition: library_daq.h:1358

constant_n_test_vertices
__constant__ unsigned int constant_n_test_vertices
Definition: library_daq.h:73

kernel_find_vertex_with_max_npmts_in_timebin
__global__ void kernel_find_vertex_with_max_npmts_in_timebin(histogram_t *np, histogram_t *mnp, unsigned int *vmnp)
Definition: library_daq.h:1818

elapsed_total
float elapsed_total
Definition: library_daq.h:204

constant_time_offset
__constant__ offset_t constant_time_offset
Definition: library_daq.h:84

tex_light_dr
texture< float, 1, cudaReadModeElementType > tex_light_dr
Definition: library_daq.h:147

print_gpu_properties
void print_gpu_properties()
Definition: library_daq.h:1766

elapsed_write_output
float elapsed_write_output
Definition: library_daq.h:204

print_parameters
void print_parameters()
Definition: library_daq.h:378

elapsed_free
float elapsed_free
Definition: library_daq.h:204

write_output
void write_output()
Definition: library_daq.h:2266

check_cudamalloc_int
void check_cudamalloc_int(unsigned int size)
Definition: library_daq.h:2472

candidate_trigger_npmts_in_cone_in_time_bin
std::vector< unsigned int > candidate_trigger_npmts_in_cone_in_time_bin
Definition: library_daq.h:155

elapsed_candidates_kernel
float elapsed_candidates_kernel
Definition: library_daq.h:204

elapsed_threads_candidates
float elapsed_threads_candidates
Definition: library_daq.h:204