13 #include <thrust/extrema.h>
16 #include <thrust/sort.h>
17 #include <thrust/device_ptr.h>
24 #if defined __HISTOGRAM_UCHAR__
26 #elif defined __HISTOGRAM_USHORT__
28 #elif defined __HISTOGRAM_UINT__
32 #if defined __TIME_OF_FLIGHT_UCHAR__
34 #elif defined __TIME_OF_FLIGHT_USHORT__
36 #elif defined __TIME_OF_FLIGHT_UINT__
97 texture<unsigned int, 1, cudaReadModeElementType>
tex_ids;
100 texture<unsigned int, 1, cudaReadModeElementType>
tex_times;
148 texture<float, 1, cudaReadModeElementType>
tex_PMTx;
149 texture<float, 1, cudaReadModeElementType>
tex_PMTy;
150 texture<float, 1, cudaReadModeElementType>
tex_PMTz;
254 void separate_triggers_into_gates(std::vector<int> * trigger_ns, std::vector<int> * trigger_ts, std::vector<double> * trigger_vtx_xs, std::vector<double> * trigger_vtx_ys, std::vector<double> * trigger_vtx_zs, std::vector<double> * trigger_dir_xs, std::vector<double> * trigger_dir_ys, std::vector<double> * trigger_dir_zs);
263 unsigned int get_time_index(
unsigned int hit_index,
unsigned int vertex_block);
307 printf(
" [2] cannot read input file \n");
314 for (
char c = getc(f); c != EOF; c = getc(f))
332 for(
unsigned int i=0; i<
n_hits; i++){
333 if( fscanf(f,
"%d %lf", &
id, &timef) != 2 ){
334 printf(
" [2] problem scanning hit %d from input \n", i);
338 time = int(floor(timef));
341 if( time > max ) max = time;
342 if( time < min ) min = time;
346 for(
int i=0; i<
n_hits; i++){
366 printf(
" [2] problem scanning detector \n");
382 printf(
" [2] n_PMTs = %d \n",
n_PMTs);
393 printf(
" [2] n_PMTs = %d \n",
n_PMTs);
402 for(
unsigned int i=0; i<
n_hits; i++)
409 for(
unsigned int i=0; i<
n_PMTs; i++)
410 printf(
" [2] pmt %d x %f y %f z %f \n", i,
PMT_x[i],
PMT_y[i],
PMT_z[i]);
416 printf(
" [2] times_of_flight: (vertex, PMT) \n");
417 unsigned int distance_index;
420 for(
unsigned int ip=0; ip<
n_PMTs; ip++){
431 printf(
" [2] directions: (vertex, PMT) \n");
434 for(
unsigned int ip=0; ip<
n_PMTs; ip++){
448 printf(
" [2] --- read pmts \n");
450 if( !
n_PMTs )
return false;
451 printf(
" [2] detector contains %d PMTs \n",
n_PMTs);
463 printf(
" [2] --- read detector \n");
472 printf(
" [2] --- make test vertices \n");
545 double distance_angular;
547 printf(
" [2] distance_between_vertices %f, distance_vertical %f, distance_radial %f \n",
550 double the_r, the_z, the_phi;
554 bool add_extra_layer = first;
558 while( the_r >= 0. ){
560 distance_angular =
twopi/n_angular;
564 while( the_z <= semiheight){
567 while( the_phi <
twopi - distance_angular/2. ){
571 if( the_r == 0. )
break;
572 the_phi += distance_angular;
576 if( add_extra_layer ){
577 if( the_z + semiheight < 0.3*distance_vertical )
578 the_z += distance_vertical/2.;
579 else if( semiheight - the_z < 0.7*distance_vertical )
580 the_z += distance_vertical/2.;
582 the_z += distance_vertical;
584 the_z += distance_vertical;
588 the_r -= distance_radial/2.;
592 the_r -= distance_radial;
600 first = add_extra_layer;
606 while( the_r >= 0. ){
612 distance_angular =
twopi/n_angular;
616 while( the_z <= semiheight){
622 while( the_phi <
twopi - distance_angular/2. ){
629 if( the_r == 0. )
break;
630 the_phi += distance_angular;
634 if( add_extra_layer ){
635 if( the_z + semiheight < 0.3*distance_vertical )
636 the_z += distance_vertical/2.;
637 else if( semiheight - the_z < 0.7*distance_vertical )
638 the_z += distance_vertical/2.;
640 the_z += distance_vertical;
642 the_z += distance_vertical;
648 the_r -= distance_radial/2.;
652 the_r -= distance_radial;
659 first = add_extra_layer;
662 while( the_r >= 0. ){
665 distance_angular =
twopi/n_angular;
669 while( the_z <= semiheight){
675 while( the_phi <
twopi - distance_angular/2. ){
682 if( the_r == 0. )
break;
683 the_phi += distance_angular;
686 if( add_extra_layer ){
687 if( the_z + semiheight < 0.3*distance_vertical )
688 the_z += distance_vertical/2.;
689 else if( semiheight - the_z < 0.7*distance_vertical )
690 the_z += distance_vertical/2.;
692 the_z += distance_vertical;
694 the_z += distance_vertical;
698 the_r -= distance_radial/2.;
702 the_r -= distance_radial;
763 if( std::numeric_limits<unsigned int>::max() / B < A ){
764 printf(
" [2] --------------------- warning: B = %d times A = %d cannot exceed max value %u \n", B, A, std::numeric_limits<unsigned int>::max() );
780 printf(
" [2] --------------------- warning: number_of_threads_per_block x = %d cannot exceed max value %d \n",
number_of_threads_per_block_3d.x, max_n_threads_per_block_2d );
785 printf(
" [2] --------------------- warning: number_of_threads_per_block y = %d cannot exceed max value %d \n",
number_of_threads_per_block_3d.y, max_n_threads_per_block_2d );
800 printf(
" [2] --------------------- warning: grid size cannot exceed max value %u \n", std::numeric_limits<int>::max() );
840 printf(
" [2] --- read input \n");
842 if( !
n_hits )
return false;
843 printf(
" [2] input contains %d hits \n",
n_hits);
850 printf(
" [2] time_offset = %d ns \n",
time_offset);
861 printf(
" [2] --- read input \n");
863 if( !
n_hits )
return false;
864 if(
n_hits != times.size() ){
865 printf(
" [2] n PMT ids %d but n times %d \n",
n_hits, times.size());
868 printf(
" [2] input contains %d hits \n",
n_hits);
878 for(
int i=0; i<PMTids.size(); i++){
879 time = int(floor(times[i]));
883 if( time > max ) max = time;
884 if( time < min ) min = time;
887 for(
int i=0; i<PMTids.size(); i++){
901 printf(
" [2] time_offset = %d ns \n",
time_offset);
912 printf(
" [2] --- allocate memory tofs \n");
913 #if defined __TIME_OF_FLIGHT_UCHAR__
915 #elif defined __TIME_OF_FLIGHT_USHORT__
917 #elif defined __TIME_OF_FLIGHT_UINT__
924 checkCudaErrors(cudaMalloc((
void **)&
device_PMTx, n_PMTs*
sizeof(
float)));
925 checkCudaErrors(cudaMalloc((
void **)&
device_PMTy, n_PMTs*
sizeof(
float)));
926 checkCudaErrors(cudaMalloc((
void **)&
device_PMTz, n_PMTs*
sizeof(
float)));
959 printf(
" [2] --- allocate memory directions \n");
975 printf(
" [2] --- allocate memory \n");
983 checkCudaErrors(cudaMalloc((
void **)&
device_ids,
n_hits*
sizeof(
unsigned int)));
1083 printf(
" [2] --- allocate memory \n");
1091 checkCudaErrors(cudaMalloc((
void **)&
device_ids,
n_hits*
sizeof(
unsigned int)));
1115 printf(
" [2] --- allocate candidates memory on host \n");
1136 printf(
" [2] --- allocate candidates memory on device \n");
1138 #if defined __HISTOGRAM_UCHAR__
1140 #elif defined __HISTOGRAM_USHORT__
1142 #elif defined __HISTOGRAM_UINT__
1167 printf(
" [2] --- fill times_of_flight \n");
1181 unsigned int distance_index;
1183 for(
unsigned int ip=0; ip<
n_PMTs; ip++){
1211 printf(
" [2] --- fill directions \n");
1214 float dx, dy, dz, dr, phi, cos_theta, sin_theta;
1215 float phi2, cos_theta2, angle;
1216 unsigned int dir_index_at_angles;
1217 unsigned int dir_index_at_pmt;
1218 for(
unsigned int ip=0; ip<
n_PMTs; ip++){
1223 dr = sqrt(pow(dx,2) + pow(dy,2) + pow(dz,2));
1227 sin_theta = sqrt(1. - pow(cos_theta,2));
1230 cos_theta2 = -1. + 2.*itheta/(n_direction_bins_theta - 1);
1234 if( (itheta == 0 || itheta + 1 == n_direction_bins_theta ) && iphi != 0 )
break;
1237 angle = acos( sin_theta*sqrt(1 - pow(cos_theta2,2)) * cos(phi - phi2) + cos_theta*cos_theta2 );
1259 printf(
" [2] --- copy tofs from host to device \n");
1263 cudaMemcpyHostToDevice));
1305 printf(
" [2] --- copy directions from host to device \n");
1309 cudaMemcpyHostToDevice));
1325 printf(
" [2] --- copy tofs from host to device \n");
1336 printf(
" [2] --- copy from host to device \n");
1339 n_hits*
sizeof(
unsigned int),
1340 cudaMemcpyHostToDevice));
1343 n_hits*
sizeof(
unsigned int),
1344 cudaMemcpyHostToDevice));
1362 printf(
" [2] cannot read pmts file %s \n",
pmts_file.c_str());
1367 unsigned int n_pmts = 0;
1369 for (
char c = getc(f); c != EOF; c = getc(f))
1384 for(
unsigned int i=0; i<
n_PMTs; i++){
1385 if( fscanf(f,
"%d %lf %lf %lf", &
id, &x, &y, &z) != 4 ){
1386 printf(
" [2] problem scanning pmt %d \n", i);
1416 unsigned int vertex_index, time_upper, number_of_pmts_in_time_bin, number_of_pmts_in_cone_in_time_bin;
1417 unsigned int max_pmt=0,max_vertex_index=0,max_time=0,max_pmt_in_cone=0;
1419 unsigned int trigger_index;
1420 float local_trigger_meanx_per_time_bin, local_trigger_meany_per_time_bin, local_trigger_meanz_per_time_bin;
1421 float max_trigger_meanx_per_time_bin = 0, max_trigger_meany_per_time_bin = 0, max_trigger_meanz_per_time_bin = 0;
1424 vertex_index = itrigger->first;
1425 time_upper = itrigger->second;
1437 first_trigger = (trigger_index == 0);
1440 if( first_trigger ){
1441 max_pmt = number_of_pmts_in_time_bin;
1442 max_vertex_index = vertex_index;
1443 max_time = time_upper;
1445 max_trigger_meanx_per_time_bin = local_trigger_meanx_per_time_bin;
1446 max_trigger_meany_per_time_bin = local_trigger_meany_per_time_bin;
1447 max_trigger_meanz_per_time_bin = local_trigger_meanz_per_time_bin;
1450 max_pmt_in_cone = number_of_pmts_in_cone_in_time_bin;
1456 if( coalesce_triggers ){
1457 if( number_of_pmts_in_time_bin >= max_pmt) {
1458 max_pmt = number_of_pmts_in_time_bin;
1459 max_vertex_index = vertex_index;
1460 max_time = time_upper;
1462 max_trigger_meanx_per_time_bin = local_trigger_meanx_per_time_bin;
1463 max_trigger_meany_per_time_bin = local_trigger_meany_per_time_bin;
1464 max_trigger_meanz_per_time_bin = local_trigger_meanz_per_time_bin;
1467 max_pmt_in_cone = number_of_pmts_in_cone_in_time_bin;
1473 max_pmt = number_of_pmts_in_time_bin;
1474 max_vertex_index = vertex_index;
1475 max_time = time_upper;
1480 max_trigger_meanx_per_time_bin = local_trigger_meanx_per_time_bin;
1481 max_trigger_meany_per_time_bin = local_trigger_meany_per_time_bin;
1482 max_trigger_meanz_per_time_bin = local_trigger_meanz_per_time_bin;
1486 max_pmt_in_cone = number_of_pmts_in_cone_in_time_bin;
1507 printf(
" [2] coalesced vertex index %d trigger timebin %d \n", itrigger->first, itrigger->second);
1517 unsigned int trigger_index;
1519 unsigned int time_start=0;
1522 if(itrigger->second > time_start) {
1534 printf(
" [2] triggertime: %d, vertex index: %d, npmts: %d, x: %f, y: %f, z: %f \n", triggertime, itrigger->first,
trigger_npmts_in_time_bin.at(trigger_index),
vertex_x[itrigger->first],
vertex_y[itrigger->first],
vertex_z[itrigger->first]);
1564 void separate_triggers_into_gates(std::vector<int> * trigger_ns, std::vector<int> * trigger_ts, std::vector<double> * trigger_vtx_xs, std::vector<double> * trigger_vtx_ys, std::vector<double> * trigger_vtx_zs, std::vector<double> * trigger_dir_xs, std::vector<double> * trigger_dir_ys, std::vector<double> * trigger_dir_zs){
1567 unsigned int trigger_index;
1569 unsigned int time_start=0;
1570 float sum_x, mean_x, sum_y, mean_y, sum_z, mean_z;
1571 float dir_x, dir_y, dir_z;
1575 if(itrigger->second > time_start) {
1588 trigger_ts->push_back(triggertime);
1590 trigger_vtx_xs->push_back(
vertex_x[itrigger->first]);
1591 trigger_vtx_ys->push_back(
vertex_y[itrigger->first]);
1592 trigger_vtx_zs->push_back(
vertex_z[itrigger->first]);
1597 dir_x = mean_x -
vertex_x[itrigger->first];
1600 dir_y = mean_y -
vertex_y[itrigger->first];
1603 dir_z = mean_z -
vertex_z[itrigger->first];
1604 dir = sqrt(pow(dir_x,2) + pow(dir_y,2) + pow(dir_z,2));
1605 trigger_dir_xs->push_back(dir_x/dir);
1606 trigger_dir_ys->push_back(dir_y/dir);
1607 trigger_dir_zs->push_back(dir_z/dir);
1610 printf(
" [2] triggertime: %d, vertex index: %d, npmts: %d, x: %f, y: %f, z: %f \n", triggertime, itrigger->first,
trigger_npmts_in_time_bin.at(trigger_index),
vertex_x[itrigger->first],
vertex_y[itrigger->first],
vertex_z[itrigger->first]);
1642 return (t1.tv_sec - t0.tv_sec) * 1000.0f + (t1.tv_usec - t0.tv_usec) / 1000.0f;
1648 gettimeofday(&
t0,0);
1652 gettimeofday(&
t1,0);
1656 cudaEventRecord(
start);
1661 cudaEventRecord(
stop);
1662 cudaEventSynchronize(
stop);
1663 cudaEventElapsedTime(&milli,
start,
stop);
1681 return pmt_id - 1 + vertex_block;
1688 return hit_index + vertex_block;
1694 if( itheta == 0 )
return 0;
1725 return pmt_id - 1 + vertex_block;
1732 return hit_index + vertex_block;
1748 if( itheta == 0 )
return 0;
1769 cudaGetDeviceCount(&devCount);
1770 printf(
" [2] CUDA Device Query...\n");
1771 printf(
" [2] There are %d CUDA devices.\n", devCount);
1772 cudaDeviceProp devProp;
1773 for (
int i = 0; i < devCount; ++i){
1775 printf(
" [2] CUDA Device #%d\n", i);
1776 cudaGetDeviceProperties(&devProp, i);
1777 printf(
"Major revision number: %d\n", devProp.major);
1778 printf(
"Minor revision number: %d\n", devProp.minor);
1779 printf(
"Name: %s\n", devProp.name);
1780 printf(
"Total global memory: %lu\n", devProp.totalGlobalMem);
1781 printf(
"Total shared memory per block: %lu\n", devProp.sharedMemPerBlock);
1782 printf(
"Total registers per block: %d\n", devProp.regsPerBlock);
1783 printf(
"Warp size: %d\n", devProp.warpSize);
1784 printf(
"Maximum memory pitch: %lu\n", devProp.memPitch);
1787 for (
int i = 0; i < 3; ++i)
1788 printf(
"Maximum dimension %d of block: %d\n", i, devProp.maxThreadsDim[i]);
1790 for (
int i = 0; i < 3; ++i)
1791 printf(
"Maximum dimension %d of grid: %d\n", i, devProp.maxGridSize[i]);
1792 printf(
"Clock rate: %d\n", devProp.clockRate);
1793 printf(
"Total constant memory: %lu\n", devProp.totalConstMem);
1794 printf(
"Texture alignment: %lu\n", devProp.textureAlignment);
1795 printf(
"Concurrent copy and execution: %s\n", (devProp.deviceOverlap ?
"Yes" :
"No"));
1796 printf(
"Number of multiprocessors: %d\n", devProp.multiProcessorCount);
1797 printf(
"Kernel execution timeout: %s\n", (devProp.kernelExecTimeoutEnabled ?
"Yes" :
"No"));
1798 printf(
"Memory Clock Rate (KHz): %d\n", devProp.memoryClockRate);
1799 printf(
"Memory Bus Width (bits): %d\n", devProp.memoryBusWidth);
1800 printf(
"Peak Memory Bandwidth (GB/s): %f\n", 2.0*devProp.memoryClockRate*(devProp.memoryBusWidth/8)/1.0e6);
1801 printf(
"Concurrent kernels: %d\n", devProp.concurrentKernels);
1803 size_t available_memory, total_memory;
1804 cudaMemGetInfo(&available_memory, &total_memory);
1805 size_t stack_memory;
1806 cudaDeviceGetLimit(&stack_memory, cudaLimitStackSize);
1808 cudaDeviceGetLimit(&fifo_memory, cudaLimitPrintfFifoSize);
1810 cudaDeviceGetLimit(&heap_memory, cudaLimitMallocHeapSize);
1811 printf(
" [2] memgetinfo: available_memory %f MB, total_memory %f MB, stack_memory %f MB, fifo_memory %f MB, heap_memory %f MB \n", (
double)available_memory/1.e6, (
double)total_memory/1.e6, (
double)stack_memory/1.e6, (
double)fifo_memory/1.e6, (
double)heap_memory/1.e6);
1822 unsigned int time_bin_index = threadIdx.x + blockDim.x*blockIdx.x;
1828 unsigned int number_of_pmts_in_time_bin = 0;
1829 unsigned int time_index;
1831 unsigned int vertex_with_max_n_pmts = 0;
1838 number_of_pmts_in_time_bin = np[time_index] + np[time_index+1];
1839 if( number_of_pmts_in_time_bin >= max_number_of_pmts_in_time_bin ){
1840 max_number_of_pmts_in_time_bin = number_of_pmts_in_time_bin;
1841 vertex_with_max_n_pmts = iv;
1845 mnp[time_bin_index] = max_number_of_pmts_in_time_bin;
1846 vmnp[time_bin_index] = vertex_with_max_n_pmts;
1857 unsigned int time_bin_index = threadIdx.x + blockDim.x*blockIdx.x;
1863 unsigned int number_of_pmts_in_time_bin = 0;
1864 unsigned int time_index;
1866 unsigned int vertex_with_max_n_pmts = 0;
1867 float meanx_for_vertex_with_max_n_pmts = 0., meany_for_vertex_with_max_n_pmts = 0., meanz_for_vertex_with_max_n_pmts = 0.;
1874 number_of_pmts_in_time_bin = np[time_index] + np[time_index+1];
1875 if( number_of_pmts_in_time_bin >= max_number_of_pmts_in_time_bin ){
1876 max_number_of_pmts_in_time_bin = number_of_pmts_in_time_bin;
1877 meanx_for_vertex_with_max_n_pmts = meanxin[time_index];
1878 meany_for_vertex_with_max_n_pmts = meanyin[time_index];
1879 meanz_for_vertex_with_max_n_pmts = meanzin[time_index];
1880 vertex_with_max_n_pmts = iv;
1884 mnp[time_bin_index] = max_number_of_pmts_in_time_bin;
1885 vmnp[time_bin_index] = vertex_with_max_n_pmts;
1886 meanxout[time_bin_index] = meanx_for_vertex_with_max_n_pmts;
1887 meanyout[time_bin_index] = meany_for_vertex_with_max_n_pmts;
1888 meanzout[time_bin_index] = meanz_for_vertex_with_max_n_pmts;
1899 unsigned int time_bin_index = threadIdx.x + blockDim.x*blockIdx.x;
1905 unsigned int number_of_pmts_in_time_bin = 0;
1906 unsigned int number_of_pmts_in_cone_in_time_bin = 0;
1907 unsigned int time_index;
1908 unsigned int max_number_of_pmts_in_time_bin=0;
1909 unsigned int max_number_of_pmts_in_cone_in_time_bin=0;
1910 unsigned int vertex_with_max_n_pmts = 0;
1917 number_of_pmts_in_time_bin = np[time_index] + np[time_index+1];
1918 number_of_pmts_in_cone_in_time_bin = nc[time_index] + nc[time_index+1];
1921 if( number_of_pmts_in_time_bin >= max_number_of_pmts_in_time_bin ){
1922 max_number_of_pmts_in_time_bin = number_of_pmts_in_time_bin;
1923 vertex_with_max_n_pmts = iv;
1924 max_number_of_pmts_in_cone_in_time_bin = number_of_pmts_in_cone_in_time_bin;
1928 if( number_of_pmts_in_cone_in_time_bin >= max_number_of_pmts_in_cone_in_time_bin ){
1929 max_number_of_pmts_in_cone_in_time_bin = number_of_pmts_in_cone_in_time_bin;
1930 vertex_with_max_n_pmts = iv;
1931 max_number_of_pmts_in_time_bin = number_of_pmts_in_time_bin;
1936 mnp[time_bin_index] = max_number_of_pmts_in_time_bin;
1937 vmnp[time_bin_index] = vertex_with_max_n_pmts;
1938 mnc[time_bin_index] = max_number_of_pmts_in_cone_in_time_bin;
1948 unsigned int time_bin_index = threadIdx.x + blockDim.x*blockIdx.x;
1954 unsigned int number_of_pmts_in_time_bin = 0;
1955 unsigned int max_number_of_pmts_in_time_bin=0;
1956 unsigned int vertex_with_max_n_pmts = 0;
1957 unsigned int dir_index_1, dir_index_2;
1967 number_of_pmts_in_time_bin = np[dir_index_1]
1969 if( number_of_pmts_in_time_bin > max_number_of_pmts_in_time_bin ){
1970 max_number_of_pmts_in_time_bin = number_of_pmts_in_time_bin;
1971 vertex_with_max_n_pmts = iv;
1976 mnp[time_bin_index] = max_number_of_pmts_in_time_bin;
1977 vmnp[time_bin_index] = vertex_with_max_n_pmts;
1985 checkCudaErrors(cudaUnbindTexture(
tex_ids));
1986 checkCudaErrors(cudaUnbindTexture(
tex_times));
2048 checkCudaErrors(cudaUnbindTexture(
tex_ids));
2049 checkCudaErrors(cudaUnbindTexture(
tex_times));
2067 checkCudaErrors(cudaUnbindTexture(
tex_PMTx));
2068 checkCudaErrors(cudaUnbindTexture(
tex_PMTy));
2069 checkCudaErrors(cudaUnbindTexture(
tex_PMTz));
2119 cudaMemcpyDeviceToHost));
2123 cudaMemcpyDeviceToHost));
2128 cudaMemcpyDeviceToHost));
2132 cudaMemcpyDeviceToHost));
2136 cudaMemcpyDeviceToHost));
2142 cudaMemcpyDeviceToHost));
2162 unsigned int the_threshold;
2163 unsigned int number_of_pmts_to_cut_on;
2165 for(
unsigned int time_bin = 0; time_bin<
n_time_bins - 1; time_bin++){
2180 if(number_of_pmts_to_cut_on > the_threshold) {
2206 int nchar = (ceil(log10(n+1))+1);
2207 char * num = (
char*)malloc(
sizeof(
char)*nchar);
2208 sprintf(num,
"%d", n+1);
2211 bool file_exists = (access(
event_file.c_str(), F_OK ) != -1);
2222 char * num = (
char*)malloc(
sizeof(
char)*nchar);
2234 int nchar = (ceil(log10(threshold))+1);
2235 char * num = (
char*)malloc(
sizeof(
char)*nchar);
2236 sprintf(num,
"%d", threshold);
2255 fprintf(of,
" %d \n", trigger);
2292 fprintf(of,
" %d \n", trigger);
2296 unsigned int triggertime, trigger_index;
2312 unsigned int best_vertex;
2314 unsigned int vertex_index = itrigger->first;
2315 unsigned int time_index = itrigger->second;
2317 if( local_n_pmts > max_n_pmts ){
2318 max_n_pmts = local_n_pmts;
2319 best_vertex = vertex_index;
2322 unsigned int distance_index;
2324 double corrected_time;
2326 for(
unsigned int i=0; i<
n_hits; i++){
2362 FILE * pFile = fopen (filename.c_str(),
"r");
2364 printf(
"Error: file %s could not be opened\n", filename.c_str());
2370 while( EOF != fscanf(pFile,
"%s %e", name, &value) ){
2371 if( paramname.compare(name) == 0 ){
2377 printf(
" [2] warning: could not find parameter %s in file %s \n", paramname.c_str(), filename.c_str());
2388 std::string parameter_file =
"parameters.txt";
2390 twopi = 2.*acos(-1.);
2430 std::string parameter_file =
"parameters.txt";
2432 twopi = 2.*acos(-1.);
2442 extra_threshold = 0;
2462 unsigned int bytes_per_float = 4;
2463 size_t available_memory, total_memory;
2464 cudaMemGetInfo(&available_memory, &total_memory);
2465 if( size*bytes_per_float > available_memory*1000/1024 ){
2466 printf(
" [2] cannot allocate %d floats, or %d B, available %d B \n",
2467 size, size*bytes_per_float, available_memory*1000/1024);
2474 unsigned int bytes_per_int = 4;
2475 size_t available_memory, total_memory;
2476 cudaMemGetInfo(&available_memory, &total_memory);
2477 if( size*bytes_per_int > available_memory*1000/1024 ){
2478 printf(
" [2] cannot allocate %d ints, or %d B, available %d B \n",
2479 size, size*bytes_per_int, available_memory*1000/1024);
2486 unsigned int bytes_per_unsigned_int = 4;
2487 size_t available_memory, total_memory;
2488 cudaMemGetInfo(&available_memory, &total_memory);
2489 if( size*bytes_per_unsigned_int > available_memory*1000/1024 ){
2490 printf(
" [2] cannot allocate %d unsigned_ints, or %d B, available %d B \n",
2491 size, size*bytes_per_unsigned_int, available_memory*1000/1024);
2499 unsigned int bytes_per_unsigned_short = 2;
2500 size_t available_memory, total_memory;
2501 cudaMemGetInfo(&available_memory, &total_memory);
2502 if( size*bytes_per_unsigned_short > available_memory*1000/1024 ){
2503 printf(
" cannot allocate %d unsigned_shorts, or %d B, available %d B \n",
2504 size, size*bytes_per_unsigned_short, available_memory*1000/1024);
2511 unsigned int bytes_per_unsigned_char = 1;
2512 size_t available_memory, total_memory;
2513 cudaMemGetInfo(&available_memory, &total_memory);
2514 if( size*bytes_per_unsigned_char > available_memory*1000/1024 ){
2515 printf(
" cannot allocate %d unsigned_chars, or %d B, available %d B \n",
2516 size, size*bytes_per_unsigned_char, available_memory*1000/1024);
2523 unsigned int bytes_per_bool = 1;
2524 size_t available_memory, total_memory;
2525 cudaMemGetInfo(&available_memory, &total_memory);
2526 if( size*bytes_per_bool > available_memory*1000/1024 ){
2527 printf(
" [2] cannot allocate %d unsigned_ints, or %d B, available %d B \n",
2528 size, size*bytes_per_bool, available_memory*1000/1024);
2544 float sqrtN = sqrt(n);
2545 unsigned int start = ceil(std::max((
double)2,(
double)n/(
double)max));
2546 for(
unsigned int d = start; d <= n; d += 1)
2556 float sqrtN = sqrt(n);
2557 unsigned int start = ceil(std::max((
double)3,(
double)n/(
double)max));
2558 for(
unsigned int d = start; d <= n; d += 2)
2620 printf(
" [2] --------------------- warning: grid size cannot exceed max value %u \n", std::numeric_limits<int>::max() );
std::vector< unsigned int > candidate_trigger_npmts_in_time_bin
__constant__ unsigned int constant_n_hits
void set_output_file_nhits(unsigned int threshold)
float * device_meanx_for_vertex_with_max_n_pmts_per_time_bin
void allocate_candidates_memory_on_device()
void free_global_memories()
bool * device_directions_for_vertex_and_pmt
unsigned int the_max_time
void setup_threads_for_histo(unsigned int n)
texture< float, 1, cudaReadModeElementType > tex_PMTx
void read_user_parameters_nhits()
__device__ unsigned int device_get_direction_index_at_time(unsigned int time_bin, unsigned int vertex_index, unsigned int direction_index)
void allocate_directions_memory_on_device()
unsigned short histogram_t
float elapsed_candidates_copy_host
void allocate_candidates_memory_on_host()
unsigned int nhits_threshold_max
double stop_total_cuda_clock()
unsigned int n_test_vertices
vertices
double distance_between_vertices
parameters
unsigned int nhits_window
float * device_dy_per_time_bin
unsigned int wall_like_threshold_number_of_pmts
time_of_flight_t * device_times_of_flight
bool setup_threads_for_tof_biparallel()
texture< time_of_flight_t, 1, cudaReadModeElementType > tex_times_of_flight
__constant__ unsigned int constant_n_direction_bins_phi
float * device_dz_per_time_bin
bool setup_threads_nhits()
bool setup_threads_for_tof()
__global__ void kernel_find_vertex_with_max_npmts_in_timebin_and_directionbin(unsigned int *np, histogram_t *mnp, unsigned int *vmnp)
texture< float, 1, cudaReadModeElementType > tex_PMTz
__constant__ unsigned int constant_n_direction_bins_theta
unsigned int * host_max_number_of_pmts_in_cone_in_time_bin
unsigned int * device_number_of_pmts_in_cone_in_time_bin
void setup_threads_for_histo_iterated(bool last)
float elapsed_tofs_copy_dev
void check_cudamalloc_unsigned_short(unsigned int size)
__constant__ unsigned int constant_time_step_size
__constant__ unsigned int constant_n_PMTs
std::vector< float > candidate_trigger_meany_per_time_bin
void fill_tofs_memory_on_device_nhits()
texture< float, 1, cudaReadModeElementType > tex_light_dz
bool set_input_file_for_event(int n)
void write_output_nhits(unsigned int n)
void free_event_memories_nhits()
void setup_threads_for_histo_per(unsigned int n)
unsigned int number_of_kernel_blocks
threads
double cerenkov_angle_water
std::vector< float > trigger_meanz_per_time_bin
void choose_candidates_above_threshold()
float elapsed_memory_tofs_dev
float read_value_from_file(std::string paramname, std::string filename)
float * device_meany_for_vertex_with_max_n_pmts_per_time_bin
void print_parameters_2d()
float * device_meanx_per_time_bin
float elapsed_memory_candidates_host
unsigned int correct_mode
void print_times_of_flight()
void free_event_memories()
unsigned int * device_max_number_of_pmts_in_cone_in_time_bin
unsigned int get_distance_index(unsigned int pmt_id, unsigned int vertex_block)
unsigned int number_of_threads_per_block
float elapsed_candidates_memory_dev
bool setup_threads_to_find_candidates()
void make_table_of_directions()
unsigned int * host_n_pmts_nhits
int max_n_threads_per_block
texture< unsigned int, 1, cudaReadModeElementType > tex_times
bool select_based_on_cone
float timedifference_msec(struct timeval t0, struct timeval t1)
unsigned int read_number_of_input_hits()
histogram_t * device_max_number_of_pmts_in_time_bin
__constant__ unsigned int constant_n_water_like_test_vertices
unsigned int time_step_size
std::vector< float > trigger_meany_per_time_bin
void allocate_correct_memory_on_device()
texture< float, 1, cudaReadModeElementType > tex_light_dy
unsigned int * host_n_pmts_per_time_bin
float elapsed_memory_directions_dev
unsigned int max_n_hits_per_job
bool setup_threads_for_tof_2d(unsigned int A, unsigned int B)
unsigned int * host_vertex_with_max_n_pmts
std::vector< std::pair< unsigned int, unsigned int > > trigger_pair_vertex_time
void allocate_correct_memory_on_device_nhits()
void separate_triggers_into_gates()
std::vector< float > candidate_trigger_meanz_per_time_bin
unsigned int * device_n_pmts_per_time_bin_and_direction_bin
bool read_the_input_ToolDAQ(std::vector< int > PMTid, std::vector< int > time)
std::vector< unsigned int > trigger_npmts_in_time_bin
std::vector< float > candidate_trigger_meanx_per_time_bin
float * host_meany_for_vertex_with_max_n_pmts_per_time_bin
void initialize_output_nhits()
unsigned int get_direction_index_at_time(unsigned int time_bin, unsigned int vertex_index, unsigned int direction_index)
__constant__ float constant_cerenkov_costheta
dim3 number_of_threads_per_block_3d
histogram_t * device_n_pmts_per_time_bin
float * host_meanx_for_vertex_with_max_n_pmts_per_time_bin
unsigned int water_like_threshold_number_of_pmts
std::vector< std::pair< unsigned int, unsigned int > > candidate_trigger_pair_vertex_time
unsigned int nhits_threshold_min
__constant__ bool constant_return_direction
texture< unsigned int, 1, cudaReadModeElementType > tex_ids
std::string event_file_base
void fill_tofs_memory_on_device()
unsigned int get_time_index(unsigned int hit_index, unsigned int vertex_block)
unsigned int n_direction_bins_theta
__constant__ unsigned int constant_n_time_bins
unsigned int * device_ids
unsigned int greatest_divisor
unsigned int * device_time_bin_of_hit
__device__ unsigned int device_get_direction_index_at_angles(unsigned int iphi, unsigned int itheta)
void check_cudamalloc_float(unsigned int size)
double detector_height
detector
void copy_candidates_from_device_to_host()
dim3 number_of_kernel_blocks_3d
time_of_flight_t * host_times_of_flight
std::vector< double > output_trigger_information
histogram_t * host_max_number_of_pmts_in_time_bin
unsigned int * device_times
void start_total_cuda_clock()
__constant__ unsigned int constant_n_direction_bins
std::string event_file_suffix
void fill_correct_memory_on_device()
unsigned int get_direction_index_at_pmt(unsigned int pmt_id, unsigned int vertex_index, unsigned int direction_index)
std::string output_file_base
void check_cudamalloc_unsigned_char(unsigned int size)
unsigned int * host_time_bin_of_hit
void allocate_tofs_memory_on_device()
std::vector< std::pair< unsigned int, unsigned int > > final_trigger_pair_vertex_time
void read_user_parameters()
unsigned int * device_n_pmts_nhits
__device__ unsigned int device_get_time_index(unsigned int hit_index, unsigned int vertex_block)
unsigned int * device_vertex_with_max_n_pmts
unsigned int * host_times
__global__ void kernel_find_vertex_with_max_npmts_and_center_of_mass_in_timebin(histogram_t *np, histogram_t *mnp, unsigned int *vmnp, unsigned int *nc, unsigned int *mnc)
unsigned int n_direction_bins
__constant__ float constant_costheta_cone_cut
void check_cudamalloc_bool(unsigned int size)
void check_cudamalloc_unsigned_int(unsigned int size)
void make_test_vertices()
void make_table_of_tofs()
texture< float, 1, cudaReadModeElementType > tex_light_dx
unsigned int find_greatest_divisor(unsigned int n, unsigned int max)
float * host_meanz_for_vertex_with_max_n_pmts_per_time_bin
float * device_meanz_for_vertex_with_max_n_pmts_per_time_bin
unsigned int n_direction_bins_phi
std::vector< float > trigger_meanx_per_time_bin
unsigned int write_output_mode
std::vector< unsigned int > trigger_npmts_in_cone_in_time_bin
float elapsed_kernel_correct_times_and_get_n_pmts_per_time_bin
float * device_meanz_per_time_bin
void fill_directions_memory_on_device()
float * device_meany_per_time_bin
unsigned int get_direction_index_at_angles(unsigned int iphi, unsigned int itheta)
double wall_like_distance
std::string detector_file
unsigned short time_of_flight_t
__constant__ bool constant_select_based_on_cone
unsigned int n_water_like_test_vertices
__device__ unsigned int device_get_direction_index_at_pmt(unsigned int pmt_id, unsigned int vertex_index, unsigned int direction_index)
bool * host_directions_for_vertex_and_pmt
__device__ unsigned int device_get_distance_index(unsigned int pmt_id, unsigned int vertex_block)
float * device_dx_per_time_bin
float elapsed_directions_copy_dev
texture< float, 1, cudaReadModeElementType > tex_PMTy
unsigned int read_number_of_pmts()
__constant__ unsigned int constant_n_test_vertices
__global__ void kernel_find_vertex_with_max_npmts_in_timebin(histogram_t *np, histogram_t *mnp, unsigned int *vmnp)
__constant__ offset_t constant_time_offset
texture< float, 1, cudaReadModeElementType > tex_light_dr
void print_gpu_properties()
float elapsed_write_output
void check_cudamalloc_int(unsigned int size)
std::vector< unsigned int > candidate_trigger_npmts_in_cone_in_time_bin
float elapsed_candidates_kernel
float elapsed_threads_candidates