Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
GeneticPopulation.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Peter Speckmayer
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : TMVA::GeneticPopulation *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
15  * *
16  * Copyright (c) 2005: *
17  * CERN, Switzerland *
18  * MPI-K Heidelberg, Germany *
19  * *
20  * Redistribution and use in source and binary forms, with or without *
21  * modification, are permitted according to the terms listed in LICENSE *
22  * (http://tmva.sourceforge.net/LICENSE) *
23  **********************************************************************************/
24 
25 /*! \class TMVA::GeneticPopulation
26 \ingroup TMVA
27 
28 Population definition for genetic algorithm.
29 
30 */
31 
32 #include <iostream>
33 #include <iomanip>
34 
35 #include "Rstrstream.h"
36 #include "TSystem.h"
37 #include "TRandom3.h"
38 #include "TH1.h"
39 #include <algorithm>
40 
41 #include "TMVA/GeneticPopulation.h"
42 #include "TMVA/GeneticGenes.h"
43 #include "TMVA/MsgLogger.h"
44 
45 ClassImp(TMVA::GeneticPopulation);
46 
47 using namespace std;
48 
49 ////////////////////////////////////////////////////////////////////////////////
50 /// Constructor
51 
52 TMVA::GeneticPopulation::GeneticPopulation(const std::vector<Interval*>& ranges, Int_t size, UInt_t seed)
53  : fGenePool(size),
54  fRanges(ranges.size()),
55  fLogger( new MsgLogger("GeneticPopulation") )
56 {
57  // create a randomGenerator for this population and set a seed
58  // create the genePools
59  //
60  fRandomGenerator = new TRandom3( 100 ); //please check
61  fRandomGenerator->Uniform(0.,1.);
62  fRandomGenerator->SetSeed( seed );
63 
64  for ( unsigned int i = 0; i < ranges.size(); ++i )
65  fRanges[i] = new TMVA::GeneticRange( fRandomGenerator, ranges[i] );
66 
67  vector<Double_t> newEntry( fRanges.size() );
68  for ( int i = 0; i < size; ++i )
69  {
70  for ( unsigned int rIt = 0; rIt < fRanges.size(); ++rIt )
71  newEntry[rIt] = fRanges[rIt]->Random();
72  fGenePool[i] = TMVA::GeneticGenes( newEntry);
73  }
74 
75  fPopulationSizeLimit = size;
76 }
77 
78 ////////////////////////////////////////////////////////////////////////////////
79 /// destructor
80 
81 TMVA::GeneticPopulation::~GeneticPopulation()
82 {
83  if (fRandomGenerator != NULL) delete fRandomGenerator;
84 
85  std::vector<GeneticRange*>::iterator it = fRanges.begin();
86  for (;it!=fRanges.end(); ++it) delete *it;
87 
88  delete fLogger;
89 }
90 
91 
92 
93 ////////////////////////////////////////////////////////////////////////////////
94 /// the random seed of the random generator
95 
96 void TMVA::GeneticPopulation::SetRandomSeed( UInt_t seed )
97 {
98  fRandomGenerator->SetSeed( seed );
99 }
100 
101 ////////////////////////////////////////////////////////////////////////////////
102 /// Produces offspring which is are copies of their parents.
103 ///
104 /// Parameters:
105 /// - int number : the number of the last individual to be copied
106 
107 void TMVA::GeneticPopulation::MakeCopies( int number )
108 {
109  int i=0;
110  for (std::vector<TMVA::GeneticGenes>::iterator it = fGenePool.begin();
111  it != fGenePool.end() && i < number;
112  ++it, ++i ) {
113  GiveHint( it->GetFactors(), it->GetFitness() );
114  }
115 }
116 
117 ////////////////////////////////////////////////////////////////////////////////
118 /// Creates children out of members of the current generation.
119 ///
120 /// Children have a combination of the coefficients of their parents
121 
122 void TMVA::GeneticPopulation::MakeChildren()
123 {
124 #ifdef _GLIBCXX_PARALLEL
125 #pragma omp parallel
126 #pragma omp for
127 #endif
128  for ( int it = 0; it < (int) (fGenePool.size() / 2); ++it )
129  {
130  Int_t pos = (Int_t)fRandomGenerator->Integer( fGenePool.size()/2 );
131  fGenePool[(fGenePool.size() / 2) + it] = MakeSex( fGenePool[it], fGenePool[pos] );
132  }
133 }
134 
135 ////////////////////////////////////////////////////////////////////////////////
136 /// this function takes two individuals and produces offspring by mixing
137 /// (recombining) their coefficients.
138 
139 TMVA::GeneticGenes TMVA::GeneticPopulation::MakeSex( TMVA::GeneticGenes male,
140  TMVA::GeneticGenes female )
141 {
142  vector< Double_t > child(fRanges.size());
143  for (unsigned int i = 0; i < fRanges.size(); ++i) {
144  if (fRandomGenerator->Integer( 2 ) == 0) {
145  child[i] = male.GetFactors()[i];
146  }else{
147  child[i] = female.GetFactors()[i];
148  }
149  }
150  return TMVA::GeneticGenes( child );
151 }
152 
153 ////////////////////////////////////////////////////////////////////////////////
154 /// Mutates the individuals in the genePool.
155 ///
156 /// Parameters:
157 ///
158 /// - double probability : gives the probability (in percent) of a mutation of a coefficient
159 /// - int startIndex : leaves unchanged (without mutation) the individuals which are better ranked
160 /// than indicated by "startIndex". This means: if "startIndex==3", the first (and best)
161 /// three individuals are not mutated. This allows to preserve the best result of the
162 /// current Generation for the next generation.
163 /// - Bool_t near : if true, the mutation will produce a new coefficient which is "near" the old one
164 /// (gaussian around the current value)
165 /// - double spread : if near==true, spread gives the sigma of the gaussian
166 /// - Bool_t mirror : if the new value obtained would be outside of the given constraints
167 /// the value is mapped between the constraints again. This can be done either
168 /// by a kind of periodic boundary conditions or mirrored at the boundary.
169 /// (mirror = true seems more "natural")
170 
171 void TMVA::GeneticPopulation::Mutate( Double_t probability , Int_t startIndex,
172  Bool_t near, Double_t spread, Bool_t mirror )
173 {
174  vector< Double_t>::iterator vec;
175  vector< TMVA::GeneticRange* >::iterator vecRange;
176 
177  //#ifdef _GLIBCXX_PARALLEL
178  // #pragma omp parallel
179  // #pragma omp for
180  //#endif
181  // The range methods are not thread safe!
182  for (int it = startIndex; it < (int) fGenePool.size(); ++it) {
183  vecRange = fRanges.begin();
184  for (vec = (fGenePool[it].GetFactors()).begin(); vec < (fGenePool[it].GetFactors()).end(); ++vec) {
185  if (fRandomGenerator->Uniform( 100 ) <= probability) {
186  (*vec) = (*vecRange)->Random( near, (*vec), spread, mirror );
187  }
188  ++vecRange;
189  }
190  }
191 }
192 
193 
194 ////////////////////////////////////////////////////////////////////////////////
195 /// gives back the "Genes" of the population with the given index.
196 
197 TMVA::GeneticGenes* TMVA::GeneticPopulation::GetGenes( Int_t index )
198 {
199  return &(fGenePool[index]);
200 }
201 
202 ////////////////////////////////////////////////////////////////////////////////
203 /// make a little printout of the individuals up to index "untilIndex"
204 /// this means, .. write out the best "untilIndex" individuals.
205 
206 void TMVA::GeneticPopulation::Print( Int_t untilIndex )
207 {
208  for ( unsigned int it = 0; it < fGenePool.size(); ++it )
209  {
210  Int_t n=0;
211  if (untilIndex >= -1 ) {
212  if (untilIndex == -1 ) return;
213  untilIndex--;
214  }
215  Log() << "fitness: " << fGenePool[it].GetFitness() << " ";
216  for (vector< Double_t >::iterator vec = fGenePool[it].GetFactors().begin();
217  vec < fGenePool[it].GetFactors().end(); ++vec ) {
218  Log() << "f_" << n++ << ": " << (*vec) << " ";
219  }
220  Log() << Endl;
221  }
222 }
223 
224 ////////////////////////////////////////////////////////////////////////////////
225 /// make a little printout to the stream "out" of the individuals up to index "untilIndex"
226 /// this means, .. write out the best "untilIndex" individuals.
227 
228 void TMVA::GeneticPopulation::Print( ostream & out, Int_t untilIndex )
229 {
230  for ( unsigned int it = 0; it < fGenePool.size(); ++it ) {
231  Int_t n=0;
232  if (untilIndex >= -1 ) {
233  if (untilIndex == -1 ) return;
234  untilIndex--;
235  }
236  out << "fitness: " << fGenePool[it].GetFitness() << " ";
237  for (vector< Double_t >::iterator vec = fGenePool[it].GetFactors().begin();
238  vec < fGenePool[it].GetFactors().end(); ++vec ) {
239  out << "f_" << n++ << ": " << (*vec) << " ";
240  }
241  out << std::endl;
242  }
243 }
244 
245 ////////////////////////////////////////////////////////////////////////////////
246 /// give back a histogram with the distribution of the coefficients.
247 ///
248 /// Parameters:
249 ///
250 /// - int bins : number of bins of the histogram
251 /// - int min : histogram minimum
252 /// - int max : maximum value of the histogram
253 
254 TH1F* TMVA::GeneticPopulation::VariableDistribution( Int_t varNumber, Int_t bins,
255  Int_t min, Int_t max )
256 {
257  std::cout << "FAILED! TMVA::GeneticPopulation::VariableDistribution" << std::endl;
258 
259  std::stringstream histName;
260  histName.clear();
261  histName.str("v");
262  histName << varNumber;
263  TH1F *hist = new TH1F( histName.str().c_str(),histName.str().c_str(), bins,min,max );
264 
265  return hist;
266 }
267 
268 ////////////////////////////////////////////////////////////////////////////////
269 /// gives back all the values of coefficient "varNumber" of the current generation
270 
271 vector<Double_t> TMVA::GeneticPopulation::VariableDistribution( Int_t /*varNumber*/ )
272 {
273  std::cout << "FAILED! TMVA::GeneticPopulation::VariableDistribution" << std::endl;
274 
275  vector< Double_t > varDist;
276 
277  return varDist;
278 }
279 
280 ////////////////////////////////////////////////////////////////////////////////
281 /// add another population (strangers) to the one of this GeneticPopulation
282 
283 void TMVA::GeneticPopulation::AddPopulation( GeneticPopulation *strangers )
284 {
285  for (std::vector<TMVA::GeneticGenes>::iterator it = strangers->fGenePool.begin();
286  it != strangers->fGenePool.end(); ++it ) {
287  GiveHint( it->GetFactors(), it->GetFitness() );
288  }
289 }
290 
291 ////////////////////////////////////////////////////////////////////////////////
292 /// add another population (strangers) to the one of this GeneticPopulation
293 
294 void TMVA::GeneticPopulation::AddPopulation( GeneticPopulation &strangers )
295 {
296  AddPopulation(&strangers);
297 }
298 
299 ////////////////////////////////////////////////////////////////////////////////
300 /// trim the population to the predefined size
301 
302 void TMVA::GeneticPopulation::TrimPopulation()
303 {
304  std::sort(fGenePool.begin(), fGenePool.end());
305  while ( fGenePool.size() > (unsigned int) fPopulationSizeLimit )
306  fGenePool.pop_back();
307 }
308 
309 ////////////////////////////////////////////////////////////////////////////////
310 /// add an individual (a set of variables) to the population
311 /// if there is a set of variables which is known to perform good, they can be given as a hint to the population
312 
313 void TMVA::GeneticPopulation::GiveHint( std::vector< Double_t >& hint, Double_t fitness )
314 {
315  TMVA::GeneticGenes g(hint);
316  g.SetFitness(fitness);
317 
318  fGenePool.push_back( g );
319 }
320 
321 ////////////////////////////////////////////////////////////////////////////////
322 /// sort the genepool according to the fitness of the individuals
323 
324 void TMVA::GeneticPopulation::Sort()
325 {
326  std::sort(fGenePool.begin(), fGenePool.end());
327 }
328