HippoDraw: BFGSFitter.cxx Source File

00001 
00012 #ifdef _MSC_VER
00013 #include "msdevstudio/MSconfig.h"
00014 #endif
00015 
00016 #ifdef _MSC_VER
00017 #define isnan _isnan
00018 #endif
00019 
00020 //To have isnan.
00021 #ifdef __APPLE__
00022 #include <cstdlib>
00023 #define _GLIBCPP_USE_C99 1
00024 #endif
00025 
00026 #include "BFGSFitter.h"
00027 
00028 #include "NumLinAlg.h"
00029 #include "StatedFCN.h"
00030 
00031 //  see todo
00032 #include <iostream>
00033 using std::cout;
00034 using std::endl;
00035 
00036 #include <cfloat>
00037 #include <cstdlib>
00038 #include <cassert>
00039 #include <cmath>
00040 #include <ctime>
00041 
00042 #ifdef __APPLE__
00043 using std::isnan;
00044 #endif
00045 
00046 using namespace hippodraw::Numeric;
00047 
00048 using std::pow;
00049 using std::swap;
00050 using std::min;
00051 using std::abs;
00052 using std::vector;
00053 using std::vector;
00054 using std::string;
00055 using std::map;
00056 
00057 BFGSFitter::BFGSFitter(  const char * name )
00058   : Fitter ( name ),
00059    m_xinit( 1 ),
00060    m_grad_cutoff( 1e-6 ),
00061    m_step_cutoff( 1e-6 ),
00062    m_proj_cutoff( 1e-6 ),
00063    m_c1( 1e-4 ),
00064    m_c2( 0.9 ),
00065    m_alpha_max( 4 ),
00066    m_alpha1( 1 )
00067 {
00068   m_iter_params[ "grad_cutoff" ] = & m_grad_cutoff;
00069   m_iter_params[ "step_cutoff" ] = & m_step_cutoff;
00070   m_iter_params[ "proj_cutoff" ] = & m_proj_cutoff;
00071   m_iter_params[ "c1" ] = & m_c1;
00072   m_iter_params[ "c2" ] = & m_c2;
00073   m_iter_params[ "alpha_max" ] = & m_alpha_max;
00074   m_iter_params[ "alpha1" ] = & m_alpha1;
00075 }
00076 
00077 Fitter * BFGSFitter::clone ( ) const
00078 {
00079   return new BFGSFitter ( *this ); // uses compiler generated copy constructor
00080 }
00081 
00082 bool
00083 BFGSFitter::
00084 needsDerivatives () const
00085 {
00086   return false;
00087 }
00088 
00089 bool BFGSFitter::calcBestFit()
00090 {
00091   double Alpha_star;
00092 
00093   // Initialization
00094   vector < double > init_params;
00095   m_fcn -> fillFreeParameters ( init_params );
00096   setInitIter( init_params );
00097   
00098   // Allocate space for various matrices / vectors needed
00099   vector< double > xold = m_xinit;
00100   vector< double > xnew = m_xinit;
00101   vector< double > gk       = gradient( xold );
00102   vector< double > gkPlusUn = gradient( xnew );
00103   
00104   vector< double > pk( m_xinit.size() );
00105   vector< double > s( m_xinit.size() );
00106   vector< double > y( m_xinit.size() );
00107 
00108   // The standard quasi newton initialization 
00109   eye ( m_M, m_xinit.size() );
00110   m_M = ( 1.0 / norm( gk ) ) * m_M;
00111   
00112   vector< vector< double > > t1 , t2;
00113   eye( t1, m_xinit.size() );
00114   eye( t2, m_xinit.size() );
00115 
00116   double fx    = function( xnew );
00117   double fxold = fx;
00118   for( int k = 1; k <= m_max_iterations; k++ )
00119     {
00120       // Update the iterate.
00121       gk   = gkPlusUn;
00122       pk   = m_M * (-gk);
00123       Alpha_star = wolfeStep( xold, pk );
00124       
00125       do
00126         {
00127           xnew = xold + Alpha_star * pk;
00128           fx   = function( xnew );
00129           Alpha_star /= 3.0;
00130         }
00131       while( isnan( fx ) );
00132       
00133       gkPlusUn = gradient( xnew );
00134 
00135       s = xnew - xold;
00136       y = gkPlusUn - gk;
00137 
00138       double ys = innerProduct( y, s );
00139       double yy = norm( y );
00140       double ss = norm( s );
00141       
00142       // Termination criteria
00143       if( (  abs( ys )           < m_proj_cutoff ) ||
00144           (  abs( Alpha_star )   < DBL_EPSILON   ) ||
00145           (  ss                  < m_step_cutoff ) ||
00146           (  yy                  < m_grad_cutoff ) ||
00147           (  fx                  >= fxold         )  )
00148         break;
00149       
00150       // DFP Update of inverse of approximate hessian.
00151       // M = M-(s*y'*M+M*y*s')/(y'*s)+(1+(y'*M*y)/(y'*s))*(s*s')/(y'*s);
00152       double temp = ( 1 + innerProduct( y, m_M * y ) / ys ) / ys;
00153       
00154       t1 = ( outerProduct(s, y) * m_M)/ys + ( m_M * outerProduct(y , s) ) / ys;
00155       t2 =  temp * outerProduct( s, s );
00156       m_M  =  m_M - t1 + t2;
00157       
00158       // one pass of the loop is over so refresh
00159       xold = xnew;
00160       fxold = fx;
00161     }
00162   
00163   m_fcn -> setFreeParameters ( xold );
00164   //write( xold );
00165     
00166   return true;
00167 }
00168 
00171 double BFGSFitter::wolfeStep( const std::vector< double >& x0,
00172                               const std::vector< double >& p ) const
00173 {
00174   double step_fac = sqrt(2.0);          // Geometric step factor; must be > 1
00175   
00176   double phi0  = function( x0 );        // Function value at the initial point
00177   double dphi0 = gradp( x0, p );        // innner product of gradient and p 
00178   
00179   // dphi0 >= 0 means you are ascending. To avoid it set step = 0 and
00180   // terminate the iterations ( maybe after giving out a warning )
00181   // Algorithm ensures that such case will not arise but curse of finite
00182   // precision mathematics lead to some very small positive dphi0.
00183   if ( dphi0 >= 0 )
00184     return 0.0;
00185   
00186   double Alpha0  = 0;
00187   double Alphai  = m_alpha1;
00188   double Alphaim = Alpha0;
00189   double phiim   = phi0;
00190   int i = 1;
00191   int done = 0;
00192   int cnt  = 0;
00193   
00194   double phii, dphii;
00195   
00196   while ( !done )
00197     {
00198       phii = function( x0 + Alphai * p );
00199       
00200       if ( (phii > (phi0 + m_c1 * Alphai * dphi0)) ||
00201            ( (phii >= phiim) && ( i > 1)) )
00202         return zoom( x0, p, phi0, dphi0, Alphaim, Alphai );
00203       
00204       dphii = gradp( x0 + Alphai * p , p );
00205       
00206       if ( abs( dphii ) <= -m_c2 * dphi0 )
00207         return Alphai;
00208       
00209       if (dphii >= 0)
00210         return zoom( x0, p, phi0, dphi0, Alphai, Alphaim );
00211       
00212       // Choose new Alphai in (Alphai, Alpha_max)
00213       Alphaim = Alphai;
00214       phiim = phii;
00215       Alphai = min( step_fac * Alphai, m_alpha_max );
00216       
00217       if ( Alphai == m_alpha_max )
00218         cnt += 1;
00219       
00220       if (cnt > 1)
00221         {
00222 //        cout << "WARNING: Unable to bracket a strong Wolfe pt. in [ "
00223 //             << m_alpha1 << ", " << m_alpha_max << " ]" << endl;
00224           return m_alpha_max;
00225         }
00226       
00227       i += 1;
00228       //cout << "    Wolfe Iteration: " << i << endl;
00229     }
00230 
00231   return 0.0;
00232 }
00233 
00234 double BFGSFitter::zoom( const std::vector< double >& x0,
00235                          const std::vector< double >& p,
00236                          double phi0, double dphi0,
00237                          double Alphalo, double Alphahi ) const
00238 {
00239   int MaxIter = 20; 
00240   int iter = 0;
00241   int done = 0;
00242 
00243   double philo, phij;
00244   double dphij;
00245   double Alphaj;
00246   double Alpha_star = 0.0;
00247   
00248   while ( !done  && iter < MaxIter )
00249     {
00250       iter += 1;
00251       
00252       philo = function( x0 + Alphalo * p );
00253 
00254       // Find a trial step length Alphaj between Alphalo and Alphahi
00255       Alphaj = interpolate( x0, p, Alphalo, Alphahi );
00256      
00257       // Evalaute phi( Alphaj )
00258       phij = function( x0 + Alphaj * p );
00259 
00260       if( (phij > phi0 + m_c1 * Alphaj * dphi0) || (phij >= philo) )
00261         Alphahi = Alphaj;
00262       else
00263         {
00264           dphij = gradp( x0 + Alphaj * p , p );
00265           
00266           if ( abs( dphij ) <= -m_c2 * dphi0 )
00267             Alpha_star = Alphaj;
00268           return Alpha_star;
00269         
00270           if ( dphij * ( Alphahi - Alphalo ) >= 0 )
00271             Alphahi = Alphalo;
00272           
00273           Alphalo = Alphaj;
00274         }
00275       
00276     }
00277 
00278   // If above loop fails take the mid-point
00279   if (iter == MaxIter)
00280     Alpha_star = 0.5 * ( Alphahi + Alphalo );
00281   
00282   return Alpha_star;
00283 
00284 }
00285 
00286 
00287 double BFGSFitter::interpolate( const std::vector< double >& x0,
00288                                 const std::vector< double >& p,
00289                                 double Alphaim,
00290                                 double Alphai ) const
00291 {
00292   
00293   if ( Alphaim > Alphai )
00294     swap( Alphaim, Alphai);
00295   
00296   double phiim  = function( x0 + Alphaim * p );
00297   double phii   = function( x0 + Alphai  * p );
00298   
00299   double dphiim = gradp( x0 + Alphaim * p, p );
00300   double dphii  = gradp( x0 + Alphai  * p, p );
00301   
00302   double d1     = dphiim + dphii - 3 * ( (phiim - phii)/(Alphaim - Alphai) );
00303   double d2     = sqrt( d1 * d1 - dphiim * dphii);
00304   
00305   double Alphaip = Alphai - (Alphai - Alphaim) * 
00306     ( (dphiim + d2 - d1) / (dphii - dphiim + 2 * d2)    );
00307   
00308   double lth = abs(Alphai - Alphaim);
00309   
00310   if( abs(Alphaip - Alphai)  < 0.05 * lth ||
00311       abs(Alphaip - Alphaim) < 0.05 * lth ||
00312       Alphaip < Alphaim ||
00313       Alphaip > Alphai )
00314     Alphaip = 0.5 * (Alphai + Alphaim);
00315   
00316   return Alphaip;
00317 }
00318 
00319 double BFGSFitter::function( const std::vector< double > &  u ) const
00320 {
00321   // Gets the value of the objective function from pvfcn
00322   // first you have to convert the vector to vector
00323   vector< double > x( u.size() );
00324 
00325   for( unsigned int i = 0; i < u.size(); i++ )
00326     x[i] = u[i];
00327   
00328   m_fcn -> setFreeParameters ( x );
00329   
00330   return objectiveValue();
00331 
00332   // Following are a few standard test functions
00333   // which were used to test this minimizer. 
00334 
00335   //double x = u[0]; double y = u[1];
00336  
00337   // Rosenbrock's function
00338   //return 100 * (y - x * x) * (y - x * x)  + (1 - x) * (1 - x);
00339 
00340   // Freudenstein and Roth's Function
00341   //return pow(-13 + x  + ((5 - y)*y - 2 )*y, 2) + 
00342   //  pow(-29 + x  + ((y + 1)*y - 14)*y, 2);
00343 
00344   // Beale Function  
00345   //return pow(1.5   - x*(1 - y   ), 2) + 
00346   //  pow(2.25  - x*(1 - y*y ), 2) + 
00347   //  pow(2.625 - x*(1 - y*y*y ), 2);
00348 }
00349 
00350 vector< double >
00351 BFGSFitter::gradient( const std::vector< double > & u ) const
00352 {
00353   double h = 1e-5;
00354   vector< double > x( u.size(), 0.0 );
00355   vector< double > xph( u.size(), 0.0 );
00356   
00357   vector< double > g( u.size() );
00358   
00359   for( unsigned int i = 0; i < u.size(); i++ )
00360     x[i] = u[i];
00361   
00362   // Calculating the gradient by finite differencing
00363   m_fcn -> setFreeParameters ( x );
00364   double fx   = m_fcn -> objectiveValue ();
00365   double fxph = 0.0;
00366   for( unsigned int i = 0; i < u.size(); i++ )
00367     {
00368       for( unsigned int j = 0; j < u.size(); j++ ) {
00369         xph[j] = ( i == j ) ? ( x[j] + h ) : x[j];
00370       }
00371       m_fcn -> setFreeParameters ( xph );
00372       fxph =  m_fcn -> objectiveValue ();
00373       g[i] = ( fxph - fx ) / h;
00374     }
00375  
00376   // Following are a few gradients of standard test functions
00377   // which were used to test this minimizer.
00378   
00379   /* double x = u[0]; double y = u[1];
00380      vector< double > g(2);*/
00381   
00382   // Gradient of Rosenbrock's function
00383   /*  g[0] = -400 * x * ( y   - x * x )   - 2 * ( 1 - x );
00384       g[1] =  200 *     ( y   - x * x );*/
00385   
00386   // Gradient of Freudenstein and Roth's Function
00387   //g[0] = 2*(y*(y*(y+1)-14)+ x - 29) +
00388   //     2*(y*((5-y)*y-2) + x - 13);
00389   //g[1] = 2*(y*(2*y+1) + y*(y+1)   - 14) * (y*(y*(y+1) - 14) + x - 29) + 
00390   //     2*((5-y)*y   + (5-2*y)*y -  2) * (y*((5-y)*y -  2) + x - 13);
00391 
00392   // Gradient of Beale Function
00393   //g[0]= 2*(2.625 - x * (1 - y*y*y) ) * (y*y*y - 1)+
00394   //  2*(2.25  - x * (1 - y*y) ) * (y*y - 1)+
00395   //  2*(1.5   - x * (1 - y  ) ) * (y   - 1);
00396   //g[1]= 6*x*y*y*(2.625 - x * (1 - y*y*y)) + 
00397   //  4*x*y  *(2.25  - x * (1 - y*y)) + 
00398   //  2*x    *(1.5   - x * (1 - y  ));
00399 
00400   return g;
00401 }
00402 
00403 
00404 double BFGSFitter::gradp( const std::vector< double > & u,
00405                           const std::vector< double > & p ) const
00406 {
00407   double h = 1e-5;
00408   vector< double > x( u.size() );
00409   
00410   // Calculating the gradient in direction of p by finite differencing
00411   for ( unsigned int i = 0; i < u.size(); i++ ) {
00412     x[i]  = u[i];
00413   }
00414 //   double fx   = m_fcn -> operator()( x );
00415   m_fcn -> setFreeParameters ( x );
00416   double fx   = m_fcn -> objectiveValue ( );
00417   
00418   for ( unsigned int i = 0; i < u.size(); i++ ) {
00419     x[i] += h * p[i] ;
00420   }
00421   m_fcn -> setFreeParameters ( x );
00422   double fxph = m_fcn -> objectiveValue ();
00423     
00424   return ( fxph - fx ) / h;
00425     
00426   // Following are a directional derivative of standard test functions
00427   // which were used to test this minimizer.
00428   
00429   /* double x = u[0]; double y = u[1];
00430      vector< double > g(2);*/
00431   
00432   // Gradient of Rosenbrock's function
00433   /*  g[0] = -400 * x * ( y   - x * x )   - 2 * ( 1 - x );
00434       g[1] =  200 *     ( y   - x * x );*/
00435   
00436   // Gradient of Freudenstein and Roth's Function
00437   //g[0] = 2*(y*(y*(y+1)-14)+ x - 29) +
00438   //     2*(y*((5-y)*y-2) + x - 13);
00439   //g[1] = 2*(y*(2*y+1) + y*(y+1)   - 14) * (y*(y*(y+1) - 14) + x - 29) + 
00440   //     2*((5-y)*y   + (5-2*y)*y -  2) * (y*((5-y)*y -  2) + x - 13);
00441 
00442   // Gradient of Beale Function
00443   //g[0]= 2*(2.625 - x * (1 - y*y*y) ) * (y*y*y - 1)+
00444   //  2*(2.25  - x * (1 - y*y) ) * (y*y - 1)+
00445   //  2*(1.5   - x * (1 - y  ) ) * (y   - 1);
00446   //g[1]= 6*x*y*y*(2.625 - x * (1 - y*y*y)) + 
00447   //  4*x*y  *(2.25  - x * (1 - y*y)) + 
00448   //  2*x    *(1.5   - x * (1 - y  ));
00449 
00450   // return g[0] * p[0] + g[1] * p[1];
00451 }
00452 
00453 const vector< double > & BFGSFitter::initIter() const
00454 {
00455   return m_xinit;
00456 }
00457 
00458 int BFGSFitter::setInitIter( const std::vector< double > & xinit )
00459 {
00460   m_xinit.resize( xinit.size() );
00461   
00462   // Provide  a random perturbations to the initial value
00463   //srand( (unsigned) time( NULL ) );
00464   //for( unsigned int i = 0; i < xinit.size(); i++ )
00465   //m_xinit[i] = xinit[i] * (1 + 0.025 * ( 0.5 - rand() / ( RAND_MAX + 1.0 )));
00466 
00467   m_xinit = xinit;
00468   
00469   return EXIT_SUCCESS;
00470 }
00471 
00472 int BFGSFitter::calcCovariance ( std::vector < std::vector < double > >& cov )
00473 {
00474   cov.resize( m_xinit.size() );
00475   for( unsigned int i = 0; i < m_xinit.size(); i++ )
00476     cov[i].resize( m_xinit.size(), 0.0 );
00477 
00478   for( unsigned int i = 0; i < m_xinit.size(); i++ )
00479     for( unsigned int j = 0; j < m_xinit.size(); j++ )
00480       cov[i][j] = m_M[i][j];
00481 
00482   // set return flag as EXIT_SUCCESS if cov is Positive Definite,
00483   // and to EXIT_FAILURE otherwise.
00484   int flag = cholFactor( cov  );
00485   
00486   for( unsigned int i = 0; i < m_xinit.size(); i++ )
00487     for( unsigned int j = 0; j < m_xinit.size(); j++ )
00488       cov[i][j] = m_M[i][j];
00489   
00490  return flag;
00491 }
00492 
00493 
00494 double BFGSFitter::iterParam( string name )
00495 {
00496   // First check if user is attempting to access the max_iterations
00497   // This is a hack, but it was necessary to ensure a uniform interface
00498   if( name == "max_iterations" )
00499     return m_max_iterations;
00500   
00501   // Don't use map::operator[]() to find the name and its
00502   // associated value, as it will create one if it doesn't exist.
00503   map< string, double * >::const_iterator it 
00504     = m_iter_params.find ( name );
00505 
00506   if ( it == m_iter_params.end () )
00507     cout << name << " is not a valid iteration parameter name" << endl;
00508   else
00509     return *m_iter_params[name];
00510 
00511   return 0.0;
00512 }
00513 
00514 int BFGSFitter::setIterParam( string name, double value )
00515 {
00516 
00517   // First check if user is attempting to modify the max_iterations
00518   // This is a hack, but it was necessary to ensure a uniform interface
00519   if( name == "max_iterations" )
00520     {
00521       m_max_iterations = ( int ) value;
00522       return EXIT_SUCCESS;
00523     }
00524 
00525   // Now start worrying about the other parameters. 
00526   // Don't diretly use map::operator[]() to find the name and its
00527   // associated value, as it will create one if it doesn't exist.
00528   map< string, double * >::const_iterator it 
00529     = m_iter_params.find ( name );
00530   
00531   if ( it == m_iter_params.end () )
00532     {
00533       cout << name << " is not a valid iteration parameter name" << endl;
00534       return EXIT_FAILURE;
00535     }
00536   else
00537     {
00538       *m_iter_params[name] = value;
00539       return EXIT_SUCCESS;
00540     }
00541   
00542   return EXIT_FAILURE;
00543 }
00544 
00545 
00547 /*int main()
00548   {
00549   BFGSFitter("Stupid") rosenbrock;
00550   vector< double > xinit(2);
00551   
00552   xinit[0] = -1.2;
00553   xinit[1] =  1;
00554   
00555   //rosenbrock.setInitIter( xinit );
00556   //rosenbrock.calcBestFit();
00557   
00558   return EXIT_SUCCESS;
00559   }*/