RANSAC linear regression in 2D (robust line fit) -
this file includes c++ code ransac linear regression unit test uses opencv. typical use of line fit is
float x[n], float y[n]; bool inliers[n]; float rmse = ransac_line(x, y, npoints, param, niterations, max_er, inliers);
here file
/* * ransac.h * * created on: mar 4, 2013 * author: vivanchenko * * description: line represented equation * a*x+b*y=d, a=cos(alpha), b=sin(alpha), alpha - angle * between horizontal axis , line normal, , d distance * origin line; * * representing line in such way suited fitting error * evaluation , allows avoid marginal cases arise * traditional line representation y=ax+b */ #ifndef ransac_h_ #define ransac_h_ #include <string.h> #include <assert.h> #include <stdio.h> // null symbol #include "cv.h" #include "highgui.h" using namespace std; using namespace cv; #define sqr(a) ((a)*(a)) #define max_float (std::numeric_limits<float>::digits) #define large_number (max_float/2) #define small_number (1.0f/large_number) #define sign(x) ((x)>0?1:(-1)) // unit test params const int ndata_unit_test = 100; const float true_param_unit_test[2] = {0.9f, -27.0f}; const float noise_level_unit_test = 1.0f; const float outlier_proportion_unit_test = 0.3f; const float shift_outliers_unit_test = 10*noise_level_unit_test; // shift introduced outliers const int unit_test_img_width = 800; const int unit_test_img_height = 800; // conversion string many types template <class t> string tostring(t a) { stringstream ss (stringstream::in | stringstream::out); ss << a; return(ss.str()); } // outputs redctangle includes data cv::rect datarange(float* x, float* y, const int n) { float minx = x[0]; float miny = y[0]; float maxx = x[0]; float maxy = y[0]; (int i=1; i<n; i++) { if (minx > x[i]) minx = x[i]; if (miny > y[i]) miny = y[i]; if (maxx < x[i]) maxx = x[i]; if (maxy < y[i]) maxy = y[i]; } rect_<float> rect(minx, miny, maxx-minx+1, maxy-miny+1); return rect; } // simple linear transformation inline float linear(float src, float mult_val, float plus_val) { return(src*mult_val+plus_val); } // shows points, marks inliers, draws fit line (y points up); void showpoints(mat& image, float* x, float* y, const int n, bool* inlrs, float* param) { bool* inliers = inlrs; if (inlrs==null) { inliers = new bool[n]; std::fill_n(inliers, n, true); } // image dimensions int h = image.rows; int w = image.cols; const int pad = 10; // border padding // range of data rect_<float> range = datarange(x, y, n); string str = "x: " + tostring(range.x) + ".." + tostring(range.x+range.width) + "; y: " + tostring(range.y) + ".." + tostring(range.y+range.height) + "; data: y = " + tostring(true_param_unit_test[0]) + "x + " + tostring(true_param_unit_test[1]) + " + noise"; puttext(image, str, point(20, 20), font_hershey_plain, 1, scalar(255) ); // maping data image float datatoimg = min((h-2*pad)/range.height, (w-2*pad)/range.width); // inverse y direction int x0 = pad; int y0 = h-pad; // points (int i=0; i<n; i++) { // adjust origin float datax = x[i]-range.x; float datay = y[i]-range.y; // transform image coordiantes int imgx = linear(datax, datatoimg, x0) + 0.5f; int imgy = linear(datay, -datatoimg, y0) + 0.5f; int thickness = inliers[i]?2:1; circle(image, point(imgx, imgy), 2, scalar(255), thickness); } // line points float datax1 = range.x; float datax2 = range.x+range.width-1; float datay1 = linear(datax1, param[0], param[1]); float datay2 = linear(datax2, param[0], param[1]); // adjust origin datax1-=range.x; datax2-=range.x; datay1-=range.y; datay2-=range.y; // transform image coordiantes int x1 = linear(datax1, datatoimg, x0) + 0.5f; int x2 = linear(datax2, datatoimg, x0) + 0.5f; int y1 = linear(datay1, -datatoimg, y0) + 0.5f; // y points int y2 = linear(datay2, -datatoimg, y0) + 0.5f; cv::line(image, point(x1, y1), point(x2, y2), scalar(255), 1); if (inlrs==null) delete inliers; } // root mean sqaure error (rmse) of line fit y=param[0]*x+param[1] float errorline(float* x, float* y, int n, float* param, const bool* inliers = null) { if (n<=0 || x==null || y==null || param==null) return max_float; int ninliers = (inliers==null?n:0); double rmse = 0; // error accumulation loop (int = 0; < n; i++) { if (inliers!=null) { if (inliers[i]) ninliers++; else continue; } float y_predicted = param[0] * x[i] + param[1]; rmse += sqr(y[i] - y_predicted); } if (ninliers==0) return large_number; else return sqrt(rmse/ninliers); } // simple line fit using sum of squared differences // inliers used specify subset of points fit float fitline(float* x, float* y, int n, float* param, const bool* inliers = null, const bool debug = false) { if (n<=0 || x==null || y==null || param==null) { if (debug) cout<<"error fit line quadratic: n<=0 || x==null || y==null || param==null"<<endl; return max_float; } int ninliers = (inliers==null?n:0); double sum_x = 0; double sum_y = 0; double sum_xy = 0; double sum_x2 = 0; // create specific sums of x, y, xy, x^2 (int = 0; < n; i++) { // use inliers if (inliers!=null) { if (!inliers[i]) continue; else ninliers++; } sum_x += x[i]; sum_y += y[i]; sum_xy += x[i] * y[i]; sum_x2 += x[i] * x[i]; } if(ninliers < 2) { if (debug) cout<<"error fit line quadratic: less 2 data points"<<endl; return max_float; } // means double mean_x = sum_x / ninliers; double mean_y = sum_y / ninliers; float varx = sum_x2 - sum_x * mean_x; float cov = sum_xy - sum_x * mean_y; // eliminate bias: variance e bit underestimated since df=n-1, see // http://davidmlane.com/hyperstat/b16616.html) // if (ninliers>1) // varx *= (float)ninliers/(ninliers-1); // quadratic fit if (abs(varx) < small_number) { if (debug) cout<<"error fit line quadratic: 0 variance" <<endl; return max_float; } // see http://easycalculation.com/statistics/learn-regression.php param[0] = cov / varx; param[1] = mean_y - param[0] * mean_x; return errorline(x, y, n, param, inliers); } // ransac line fit (number of inliers has priority on rmse). float ransac_line(float* x, float* y, const int n, float* param, const int niter = 10, const float maxerror = 1.0f, bool* inlrs = null, bool debug = false) { if (x==null || y==null || n<2) { if (debug) cout<<"x==null || y==null || n<2" <<endl; return max_float; } srand (time(null)); // internal stopping criterions const float rmse_ok = 0.1f; const float inliers_ratio_ok = 0.7f; int ninliers, best_ninliers = 0; float inliers_ratio = 0; float rmse, bestrmse = max_float; float cur_param[2]; bool* inliers = inlrs; if (inlrs==null) { inliers = new bool[n]; std::fill_n(inliers, n, true); } // iterations int iter; (iter = 0; iter<niter; iter++) { // 1. select random set of 2 inliers int i1 = rand() % n; // [0, n[ int i2 = i1; while (i2==i1) i2 = rand() % n; // 2. select minimum number of points (2) float x1 = x[i1]; float x2 = x[i2]; float y1 = y[i1]; float y2 = y[i2]; // todo: may parameterize line differently (alpha, d) if (abs(x1-x2) < small_number) cur_param[0] = large_number * sign(cur_param[0]); else cur_param[0] = (y1-y2)/(x1-x2); cur_param[1] = y1-cur_param[0]*x1; if (abs(cur_param[0]) < small_number) // flat line? cur_param[0] = small_number * sign(cur_param[0]); // 3. determine inliers using whole set (int i=0; i<n; i++) { float y_fit = cur_param[0]*x[i] + cur_param[1]; float x_fit = (y[i] - cur_param[1])/cur_param[0]; if (max(abs(y[i]-y_fit), abs(x[i]-x_fit)) < maxerror) { // block distance inliers[i] = true; } else { inliers[i] = false; } } // 4. re-calculate params via quadratic fit on inliers rmse = fitline(x, y, n, cur_param, (const bool*)inliers); if (abs(cur_param[0]) < small_number) // flat line? cur_param[0] = small_number * sign(cur_param[0]); // 5. re-calculate inliers ninliers = 0; (int i=0; i<n; i++) { float y_fit = cur_param[0]*x[i] + cur_param[1]; float x_fit = (y[i] - cur_param[1])/cur_param[0]; if (max(abs(y[i]-y_fit), abs(x[i]-x_fit)) < maxerror) { inliers[i] = true; ninliers++; } else { inliers[i] = false; } } // 6. calculate error rmse = errorline(x, y, n, cur_param, (const bool*)inliers); // found better solution? if (best_ninliers < ninliers) { best_ninliers = ninliers; param[0] = cur_param[0]; param[1] = cur_param[1]; bestrmse = rmse; } // 7. check exit condition inliers_ratio = (float)best_ninliers/n; if (rmse < rmse_ok && inliers_ratio > inliers_ratio_ok) { if (debug) cout<<"breaking after "<< iter+1<<" iterations"<<endl; break; } } // iterations if (debug) cout<<"inliers ratio = "<< inliers_ratio <<endl; // 8. recreate inliers best parameters (int i=0; i<n; i++) { float y_fit = param[0]*x[i] + param[1]; float x_fit = (y[i] - param[1])/param[0]; if (max(abs(y[i]-y_fit), abs(x[i]-x_fit)) < maxerror) { // block distance inliers[i] = true; } else { inliers[i] = false; } } if (inlrs==null) delete inliers; return bestrmse; } // generates data give random seed void generatedata(float* x, float* y, unsigned int seed = 0, bool hasoutl = false) { // initialize pseudo-random generator srand (seed); (int i=0; i<ndata_unit_test; i++) { // uniform noise (negative , positive -50..50) float noise = (float)(rand() % 100-50) * noise_level_unit_test / 100.0f; //cout<<noise<<"; "; // shift int noutlisers = (float)ndata_unit_test * outlier_proportion_unit_test; int start = ndata_unit_test/2-noutlisers/2; // put outliers in middle bool outlier = (i > start ) && (i < start + noutlisers); if (hasoutl && outlier) noise += shift_outliers_unit_test ; x[i] = i-ndata_unit_test/2; // center x data around 0 y[i] = true_param_unit_test[0]*x[i]+true_param_unit_test[1]+noise; //cout<<"x, y = "<<x[i]<<"; "<<y[i]<<endl; } } // generates data pasted numbers void generatedatapaste(float* x, float* y) { const int n = 63; // compiler generate error if n inaccurate float data[n][2] = { {1, 1}, {2, 1}, {3, 1}, {4, 1}, {5, 1}, {6, 1}, {7, 1}, {8, 1}, {9, 1}, {2, 2}, {3, 2}, {4, 2}, {5, 2}, {6, 2}, {7, 2}, {8, 2}, {9, 2}, {10, 2}, {11, 2}, {12, 2}, {13, 2}, {14, 2}, {15, 2}, {16, 2}, {17, 2}, {8, 3}, {10, 3}, {12, 3}, {13, 3}, {14, 3}, {15, 3}, {16, 3}, {17, 3}, {18, 3}, {9, 4}, {10, 4}, {11, 4}, {12, 4}, {13, 4}, {14, 4}, {15, 4}, {16, 4}, {17, 4}, {18, 4}, {19, 4}, {20, 4}, {21, 4}, {22, 4}, {23, 4}, {24, 4}, {25, 4}, {17, 5}, {18, 5}, {19, 5}, {20, 5}, {21, 5}, {22, 5}, {23, 5}, {24, 5}, {25, 5}, {26, 5}, {27, 5}, {28, 5}}; // crop or repeate data required sample size (int i=0; i<ndata_unit_test; i++) { x[i] = data[i%n][0]; y[i] = data[i%n][1]; } } // unit test of quadratic line fit bool unitest_fitline_quadratic(int col = 0, int row = 0) { const bool hasoutliers = false; // cannot handle outliers const bool datafromparam = false; // either data param or pasted ones string str = hasoutliers?" outliers":" no outliers"; cout<<"unit-test_fitline()"<<str<<endl; bool res = false; // opencv window mat img(unit_test_img_height, unit_test_img_width, cv_8u); cv::namedwindow("quadratic", cv_window_autosize); cv::movewindow("quadratic", col*(img.cols+50), row*(img.rows+50)); // create data float x[ndata_unit_test], y[ndata_unit_test]; if (datafromparam) generatedata(x, y, time(null), hasoutliers); else generatedatapaste(x, y); // fit line float param[2]; float er = fitline(x, y, ndata_unit_test, param) ; if (er < noise_level_unit_test || !datafromparam) res = true; // print result cout<<"a, b = "<<param[0]<<"; "<<param[1]<<"; rmse = "<<er<<endl; if (datafromparam) { cout<<"true = "<<true_param_unit_test[0]<<"; b = "<< true_param_unit_test[1]<<endl; cout<<"combined param error = "<< abs(true_param_unit_test[0]-param[0])+ abs(true_param_unit_test[1]-param[1])<<endl; if (res) cout<<"===passed"<<endl; else cout<<"===fail!"<<endl; cout<<endl; } // visualize showpoints(img, x, y, ndata_unit_test, null, param); imshow("quadratic", img); cv::waitkey(10); return res; } // unit test of ransac line fit bool unitest_fitline_ransac(int col = 0, int row = 0) { const bool hasoutliers = true; // handles outliers gracefully const bool datafromparam = false; // either data param or pasted ones string str = hasoutliers?" outliers":" no outliers"; cout<<"unit-test_ransac()"<<str<<endl; bool res = false; // opencv window mat img(unit_test_img_height, unit_test_img_width, cv_8u); cv::namedwindow("ransac", cv_window_autosize); cv::movewindow("ransac", col*(img.cols+50), row*(img.rows+50)); // create data float x[ndata_unit_test], y[ndata_unit_test]; if (datafromparam) generatedata(x, y, time(null), hasoutliers); else generatedatapaste(x, y); // parameters float param[2]; int niter = 20; float maxerror = 1.0f; bool inliers[ndata_unit_test]; bool debug = true; float er; // function call er = ransac_line(x, y, ndata_unit_test, param, niter, maxerror, inliers, debug); if (er < noise_level_unit_test || !datafromparam) res = true; // print result cout<<"outliers: "; int noutliers = 0; (int i=0; i<ndata_unit_test; i++) { if (!inliers[i]) { noutliers++; cout<<i<<"; "; if (noutliers % 30==0) cout<<endl; } } cout<<" overall "<<noutliers<<endl; cout<<"a, b = "<<param[0]<<"; "<<param[1]<<"; rmse = "<<er<<endl; if (datafromparam) { cout<<"true = "<<true_param_unit_test[0]<<"; b = "<< true_param_unit_test[1]<<endl; cout<<"combined param error = "<< abs(true_param_unit_test[0]-param[0])+ abs(true_param_unit_test[1]-param[1])<<endl; if (res) cout<<"===passed"<<endl; else cout<<"===fail!"<<endl; cout<<endl; } // visualize showpoints(img, x, y, ndata_unit_test, inliers, param); imshow("ransac", img); cv::waitkey(10); return res; } #endif /* ransac_h_ */
the code provide ability tolerate 90% of outliers while finding solution.
instead of using:
rmse += sqr(y[i] - y_predicted);
distance of point calculated line
should rather use actual distance of point line
abs(y_i-cur_m*x_i-cur_b)/sqrt(cur_m*cur_m+1)
or abs(pt.y-cur_m*pt.x-cur_b)* pow(cur_m*cur_m+1,-.5)
(less divisions)
or @ least approximately: abs(y_i-cur_m*x_i-cur_b)/cur_m
Comments
Post a Comment