RANSAC linear regression in 2D (robust line fit) -


this file includes c++ code ransac linear regression unit test uses opencv. typical use of line fit is

float x[n], float y[n]; bool inliers[n]; float rmse = ransac_line(x, y, npoints, param, niterations, max_er, inliers); 

here file

/*  * ransac.h  *  *  created on: mar 4, 2013  *      author: vivanchenko  *  *      description: line represented equation  *      a*x+b*y=d, a=cos(alpha), b=sin(alpha), alpha - angle  *      between horizontal axis , line normal, ,  d distance  *      origin line;  *  *      representing line in such way suited fitting error  *      evaluation , allows avoid marginal cases arise  *      traditional line representation y=ax+b  */  #ifndef ransac_h_ #define ransac_h_ #include <string.h> #include <assert.h> #include <stdio.h> // null symbol #include "cv.h" #include "highgui.h"  using namespace std; using namespace cv;  #define sqr(a) ((a)*(a)) #define max_float (std::numeric_limits<float>::digits) #define large_number  (max_float/2) #define small_number  (1.0f/large_number) #define sign(x) ((x)>0?1:(-1))  // unit test params const int ndata_unit_test = 100; const float true_param_unit_test[2] = {0.9f, -27.0f}; const float noise_level_unit_test = 1.0f; const float outlier_proportion_unit_test = 0.3f; const float shift_outliers_unit_test = 10*noise_level_unit_test; // shift introduced outliers const int unit_test_img_width = 800; const int unit_test_img_height = 800;  // conversion string many types template <class t> string tostring(t a) {     stringstream ss (stringstream::in | stringstream::out);     ss << a;     return(ss.str()); }  // outputs redctangle includes data cv::rect datarange(float* x, float* y, const int n) {      float minx = x[0];     float miny = y[0];     float maxx = x[0];     float maxy = y[0];      (int i=1; i<n; i++) {         if (minx > x[i])             minx = x[i];         if (miny > y[i])             miny = y[i];         if (maxx < x[i])             maxx = x[i];         if (maxy < y[i])             maxy = y[i];     }     rect_<float> rect(minx, miny, maxx-minx+1, maxy-miny+1);      return rect; }  // simple linear transformation inline float linear(float src, float mult_val, float plus_val) {     return(src*mult_val+plus_val); }  // shows points, marks inliers, draws fit line (y points up); void showpoints(mat& image, float* x, float* y, const int n,         bool* inlrs, float* param) {      bool* inliers = inlrs;     if (inlrs==null) {         inliers = new bool[n];         std::fill_n(inliers, n, true);     }      // image dimensions     int h = image.rows;     int w = image.cols;     const int pad = 10; // border padding      // range of data     rect_<float> range  = datarange(x, y, n);     string str = "x: " + tostring(range.x) + ".." + tostring(range.x+range.width) +             "; y: " + tostring(range.y) + ".." + tostring(range.y+range.height) +             "; data: y = " + tostring(true_param_unit_test[0]) + "x + " +             tostring(true_param_unit_test[1]) + " + noise";     puttext(image, str, point(20, 20), font_hershey_plain, 1, scalar(255) );      // maping data image     float datatoimg = min((h-2*pad)/range.height,             (w-2*pad)/range.width);      // inverse y direction     int x0 = pad;     int y0 = h-pad;      // points     (int i=0; i<n; i++) {          // adjust origin         float datax = x[i]-range.x;         float datay = y[i]-range.y;          // transform image coordiantes         int imgx = linear(datax, datatoimg, x0) + 0.5f;         int imgy = linear(datay, -datatoimg, y0) + 0.5f;          int thickness = inliers[i]?2:1;         circle(image, point(imgx, imgy), 2, scalar(255), thickness);     }      // line points     float datax1 = range.x;     float datax2 = range.x+range.width-1;     float datay1 = linear(datax1, param[0], param[1]);     float datay2 = linear(datax2, param[0], param[1]);      // adjust origin     datax1-=range.x;     datax2-=range.x;     datay1-=range.y;     datay2-=range.y;      // transform image coordiantes     int x1 = linear(datax1, datatoimg, x0) + 0.5f;     int x2 = linear(datax2, datatoimg, x0) + 0.5f;     int y1 = linear(datay1, -datatoimg, y0) + 0.5f; // y points     int y2 = linear(datay2, -datatoimg, y0) + 0.5f;      cv::line(image, point(x1, y1), point(x2, y2), scalar(255), 1);      if (inlrs==null)         delete inliers; }  // root mean sqaure error (rmse) of line fit y=param[0]*x+param[1] float errorline(float* x, float* y, int n, float* param,         const bool* inliers = null) {      if (n<=0 || x==null || y==null || param==null)         return max_float;      int ninliers = (inliers==null?n:0);     double rmse = 0;      // error accumulation loop     (int = 0; < n; i++) {          if (inliers!=null) {             if (inliers[i])                 ninliers++;             else                 continue;         }          float y_predicted = param[0] * x[i] + param[1];         rmse += sqr(y[i] - y_predicted);     }      if (ninliers==0)         return large_number;     else         return sqrt(rmse/ninliers); }  // simple line fit using sum of squared differences // inliers used specify subset of points fit float fitline(float* x, float* y, int n, float* param,         const bool* inliers = null, const bool debug = false) {      if (n<=0 || x==null || y==null || param==null) {         if (debug)             cout<<"error fit line quadratic: n<=0 || x==null || y==null || param==null"<<endl;         return max_float;     }      int ninliers = (inliers==null?n:0);     double sum_x = 0;     double sum_y = 0;     double sum_xy = 0;     double sum_x2 = 0;      // create specific sums of x, y, xy, x^2     (int = 0; < n; i++) {          // use inliers         if (inliers!=null) {             if (!inliers[i])                 continue;             else                 ninliers++;         }          sum_x += x[i];         sum_y += y[i];         sum_xy += x[i] * y[i];         sum_x2 += x[i] * x[i];     }      if(ninliers < 2) {         if (debug)             cout<<"error fit line quadratic: less 2 data points"<<endl;         return max_float;     }      // means     double mean_x = sum_x / ninliers;     double mean_y = sum_y / ninliers;      float varx = sum_x2 - sum_x * mean_x;     float cov = sum_xy - sum_x * mean_y;     // eliminate bias: variance e bit underestimated since df=n-1, see     // http://davidmlane.com/hyperstat/b16616.html) //  if (ninliers>1) //      varx *= (float)ninliers/(ninliers-1);       // quadratic fit     if (abs(varx) < small_number) {         if (debug)             cout<<"error fit line quadratic: 0 variance" <<endl;         return max_float;     }      // see http://easycalculation.com/statistics/learn-regression.php     param[0] = cov / varx;     param[1] = mean_y - param[0] * mean_x;      return errorline(x, y, n, param, inliers); }  // ransac line fit (number of inliers has priority on rmse). float ransac_line(float* x, float* y, const int n, float* param,         const int niter = 10, const float maxerror = 1.0f,         bool* inlrs = null, bool debug = false) {      if (x==null || y==null || n<2) {         if (debug)             cout<<"x==null || y==null || n<2" <<endl;         return max_float;     }     srand (time(null));      // internal stopping criterions     const float rmse_ok = 0.1f;     const float inliers_ratio_ok = 0.7f;      int ninliers, best_ninliers = 0;     float inliers_ratio = 0;     float rmse, bestrmse = max_float;     float cur_param[2];      bool* inliers = inlrs;     if (inlrs==null) {         inliers = new bool[n];         std::fill_n(inliers, n, true);     }      // iterations     int iter;     (iter = 0; iter<niter; iter++) {          // 1. select random set of 2 inliers         int i1 = rand() % n; // [0, n[         int i2 = i1;         while (i2==i1)             i2 = rand() % n;          // 2. select minimum number of points (2)         float x1 = x[i1];         float x2 = x[i2];         float y1 = y[i1];         float y2 = y[i2];          // todo: may parameterize line differently (alpha, d)         if (abs(x1-x2) < small_number)             cur_param[0] = large_number * sign(cur_param[0]);         else             cur_param[0] = (y1-y2)/(x1-x2);         cur_param[1] = y1-cur_param[0]*x1;         if (abs(cur_param[0]) < small_number) // flat line?             cur_param[0] = small_number * sign(cur_param[0]);           // 3. determine inliers using whole set         (int i=0; i<n; i++) {             float y_fit = cur_param[0]*x[i] + cur_param[1];             float x_fit = (y[i] - cur_param[1])/cur_param[0];             if (max(abs(y[i]-y_fit), abs(x[i]-x_fit))  < maxerror) { // block distance                 inliers[i] = true;             } else {                 inliers[i] = false;             }         }          // 4. re-calculate params via quadratic fit on inliers         rmse = fitline(x, y, n, cur_param, (const bool*)inliers);         if (abs(cur_param[0]) < small_number) // flat line?             cur_param[0] = small_number * sign(cur_param[0]);          // 5. re-calculate inliers         ninliers = 0;         (int i=0; i<n; i++) {             float y_fit = cur_param[0]*x[i] + cur_param[1];             float x_fit = (y[i] - cur_param[1])/cur_param[0];             if (max(abs(y[i]-y_fit), abs(x[i]-x_fit))  < maxerror) {                 inliers[i] = true;                 ninliers++;             } else {                 inliers[i] = false;             }         }          // 6. calculate error         rmse = errorline(x, y, n, cur_param, (const bool*)inliers);          // found better solution?         if (best_ninliers < ninliers) {             best_ninliers = ninliers;             param[0] = cur_param[0];             param[1] = cur_param[1];             bestrmse = rmse;         }          // 7. check exit condition         inliers_ratio = (float)best_ninliers/n;         if (rmse < rmse_ok && inliers_ratio > inliers_ratio_ok) {              if (debug)                 cout<<"breaking after "<< iter+1<<" iterations"<<endl;              break;         }     } // iterations      if (debug)         cout<<"inliers ratio = "<< inliers_ratio <<endl;      // 8. recreate inliers best parameters     (int i=0; i<n; i++) {         float y_fit = param[0]*x[i] + param[1];         float x_fit = (y[i] - param[1])/param[0];         if (max(abs(y[i]-y_fit), abs(x[i]-x_fit))   < maxerror) { //  block distance             inliers[i] = true;         } else {             inliers[i] = false;         }     }      if (inlrs==null)         delete inliers;     return bestrmse; }  // generates data give random seed void generatedata(float* x, float* y, unsigned int seed = 0, bool hasoutl = false) {      // initialize pseudo-random generator     srand (seed);      (int i=0; i<ndata_unit_test; i++) {          // uniform noise (negative , positive -50..50)         float noise = (float)(rand() % 100-50) *                 noise_level_unit_test / 100.0f;         //cout<<noise<<"; ";          // shift         int noutlisers = (float)ndata_unit_test * outlier_proportion_unit_test;         int start = ndata_unit_test/2-noutlisers/2; // put outliers in middle         bool outlier = (i > start ) && (i < start + noutlisers);         if (hasoutl && outlier)             noise += shift_outliers_unit_test ;          x[i] = i-ndata_unit_test/2; // center x data around 0         y[i] = true_param_unit_test[0]*x[i]+true_param_unit_test[1]+noise;         //cout<<"x, y = "<<x[i]<<"; "<<y[i]<<endl;      }  }  // generates data pasted numbers void generatedatapaste(float* x, float* y) {      const int n = 63;      // compiler generate error if n inaccurate     float data[n][2] = {             {1, 1},             {2, 1},             {3, 1},             {4, 1},             {5, 1},             {6, 1},             {7, 1},             {8, 1},             {9, 1},             {2, 2},             {3, 2},             {4, 2},             {5, 2},             {6, 2},             {7, 2},             {8, 2},             {9, 2},             {10, 2},             {11, 2},             {12, 2},             {13, 2},             {14, 2},             {15, 2},             {16, 2},             {17, 2},             {8, 3},             {10, 3},             {12, 3},             {13, 3},             {14, 3},             {15, 3},             {16, 3},             {17, 3},             {18, 3},             {9, 4},             {10, 4},             {11, 4},             {12, 4},             {13, 4},             {14, 4},             {15, 4},             {16, 4},             {17, 4},             {18, 4},             {19, 4},             {20, 4},             {21, 4},             {22, 4},             {23, 4},             {24, 4},             {25, 4},             {17, 5},             {18, 5},             {19, 5},             {20, 5},             {21, 5},             {22, 5},             {23, 5},             {24, 5},             {25, 5},             {26, 5},             {27, 5},             {28, 5}};      // crop or repeate data required sample size     (int i=0; i<ndata_unit_test; i++) {         x[i] = data[i%n][0];         y[i] = data[i%n][1];     }  }  // unit test of quadratic line fit bool unitest_fitline_quadratic(int col = 0, int row = 0) {      const bool hasoutliers = false; // cannot handle outliers     const bool datafromparam = false; // either data param or pasted ones      string str = hasoutliers?" outliers":" no outliers";     cout<<"unit-test_fitline()"<<str<<endl;     bool res = false;      // opencv window     mat img(unit_test_img_height, unit_test_img_width, cv_8u);     cv::namedwindow("quadratic", cv_window_autosize);     cv::movewindow("quadratic", col*(img.cols+50), row*(img.rows+50));      // create data     float x[ndata_unit_test], y[ndata_unit_test];     if (datafromparam)         generatedata(x, y, time(null), hasoutliers);     else         generatedatapaste(x, y);      // fit line     float param[2];     float er = fitline(x, y, ndata_unit_test, param) ;     if (er < noise_level_unit_test || !datafromparam)         res = true;      // print result     cout<<"a, b = "<<param[0]<<"; "<<param[1]<<"; rmse = "<<er<<endl;      if (datafromparam) {         cout<<"true = "<<true_param_unit_test[0]<<"; b = "<<                 true_param_unit_test[1]<<endl;         cout<<"combined param error = "<<                 abs(true_param_unit_test[0]-param[0])+                 abs(true_param_unit_test[1]-param[1])<<endl;         if (res)             cout<<"===passed"<<endl;         else             cout<<"===fail!"<<endl;         cout<<endl;     }     // visualize     showpoints(img, x, y, ndata_unit_test, null, param);     imshow("quadratic", img);     cv::waitkey(10);      return res; }  // unit test of ransac line fit bool unitest_fitline_ransac(int col = 0, int row = 0) {      const bool hasoutliers = true; // handles outliers gracefully     const bool datafromparam = false; // either data param or pasted ones      string str = hasoutliers?" outliers":" no outliers";     cout<<"unit-test_ransac()"<<str<<endl;     bool res = false;      // opencv window     mat img(unit_test_img_height, unit_test_img_width, cv_8u);     cv::namedwindow("ransac", cv_window_autosize);     cv::movewindow("ransac", col*(img.cols+50), row*(img.rows+50));      // create data     float x[ndata_unit_test], y[ndata_unit_test];     if (datafromparam)         generatedata(x, y, time(null), hasoutliers);     else         generatedatapaste(x, y);      // parameters     float param[2];     int niter = 20;     float maxerror = 1.0f;     bool inliers[ndata_unit_test];     bool debug = true;     float er;      // function call     er = ransac_line(x, y, ndata_unit_test, param, niter, maxerror,             inliers, debug);     if (er < noise_level_unit_test || !datafromparam)         res = true;      // print result     cout<<"outliers: ";     int noutliers = 0;     (int i=0; i<ndata_unit_test; i++) {         if (!inliers[i]) {             noutliers++;             cout<<i<<"; ";             if (noutliers % 30==0)                 cout<<endl;         }     }     cout<<" overall "<<noutliers<<endl;     cout<<"a, b = "<<param[0]<<"; "<<param[1]<<"; rmse = "<<er<<endl;      if (datafromparam) {         cout<<"true = "<<true_param_unit_test[0]<<"; b = "<<                 true_param_unit_test[1]<<endl;         cout<<"combined param error = "<<                 abs(true_param_unit_test[0]-param[0])+                 abs(true_param_unit_test[1]-param[1])<<endl;         if (res)             cout<<"===passed"<<endl;         else             cout<<"===fail!"<<endl;         cout<<endl;     }      // visualize     showpoints(img, x, y, ndata_unit_test, inliers, param);     imshow("ransac", img);     cv::waitkey(10);      return res; }  #endif /* ransac_h_ */ 

the code provide ability tolerate 90% of outliers while finding solution.

instead of using:
rmse += sqr(y[i] - y_predicted); distance of point calculated line
should rather use actual distance of point line
abs(y_i-cur_m*x_i-cur_b)/sqrt(cur_m*cur_m+1) or
abs(pt.y-cur_m*pt.x-cur_b)* pow(cur_m*cur_m+1,-.5) (less divisions)
or @ least approximately:
abs(y_i-cur_m*x_i-cur_b)/cur_m


Comments

Popular posts from this blog

monitor web browser programmatically in Android? -

Shrink a YouTube video to responsive width -

wpf - PdfWriter.GetInstance throws System.NullReferenceException -