c++ - OpenCV image recognition - setting up ANN MLP -


i new in opencv world , neural networks have coding experience in c++/java.


i created first ann mlp , learned xor:

#include <opencv2/core.hpp> #include <opencv2/imgcodecs.hpp> #include <opencv2/highgui.hpp> #include <opencv2/imgproc/imgproc.hpp> #include <opencv2/ml/ml.hpp>  #include <iostream> #include <iomanip>  using namespace cv; using namespace ml; using namespace std;  void print(mat& mat, int prec) {     (int = 0; i<mat.size().height; i++)     {         cout << "[";         (int j = 0; j<mat.size().width; j++)         {             cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j);             if (j != mat.size().width - 1)                 cout << ", ";             else                 cout << "]" << endl;         }     } }  int main() {     const int hiddenlayersize = 4;     float inputtrainingdataarray[4][2] = {         { 0.0, 0.0 },         { 0.0, 1.0 },         { 1.0, 0.0 },         { 1.0, 1.0 }     };     mat inputtrainingdata = mat(4, 2, cv_32f, inputtrainingdataarray);      float outputtrainingdataarray[4][1] = {         { 0.0 },         { 1.0 },         { 1.0 },         { 0.0 }     };     mat outputtrainingdata = mat(4, 1, cv_32f, outputtrainingdataarray);      ptr<ann_mlp> mlp = ann_mlp::create();      mat layerssize = mat(3, 1, cv_16u);     layerssize.row(0) = scalar(inputtrainingdata.cols);     layerssize.row(1) = scalar(hiddenlayersize);     layerssize.row(2) = scalar(outputtrainingdata.cols);     mlp->setlayersizes(layerssize);      mlp->setactivationfunction(ann_mlp::activationfunctions::sigmoid_sym);      termcriteria termcrit = termcriteria(         termcriteria::type::count + termcriteria::type::eps,         100000000,         0.000000000000000001     );     mlp->settermcriteria(termcrit);      mlp->settrainmethod(ann_mlp::trainingmethods::backprop);      ptr<traindata> trainingdata = traindata::create(         inputtrainingdata,         sampletypes::row_sample,         outputtrainingdata     );      mlp->train(trainingdata         /*, ann_mlp::trainflags::update_weights         + ann_mlp::trainflags::no_input_scale         + ann_mlp::trainflags::no_output_scale*/     );      (int = 0; < inputtrainingdata.rows; i++) {         mat sample = mat(1, inputtrainingdata.cols, cv_32f, inputtrainingdataarray[i]);         mat result;         mlp->predict(sample, result);         cout << sample << " -> ";// << result << endl;         print(result, 0);         cout << endl;     }      return 0; } 

it works simple problem, learn network 1-10 binary conversion.


but need use mlp simple image classification - road signs. write code loading training images , preparing matrix learning i'm not able train network - "learn" in 1 second 1 000 000 iterations! , produce garbage results, same inputs!


here test images , source code:

#include <opencv2/core.hpp> #include <opencv2/imgcodecs.hpp> #include <opencv2/highgui.hpp> #include <opencv2/imgproc/imgproc.hpp> #include <opencv2/ml/ml.hpp>  #include <iostream> #include <chrono> #include <memory> #include <iomanip> #include <climits>  #include <windows.h>  using namespace cv; using namespace ml; using namespace std; using namespace chrono;  const int width_size = 50; const int height_size = (int)(width_size * sqrt(3)) / 2; const int image_data_size = width_size * height_size;  void print(mat& mat, int prec) {     (int = 0; i<mat.size().height; i++)     {         cout << "[ ";         (int j = 0; j<mat.size().width; j++)         {             cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j);             if (j != mat.size().width - 1)                 cout << ", ";             else                 cout << " ]" << endl;         }     } }  bool loadimage(string imagepath, mat& outputimage) {     // load image in grayscale     mat image = imread(imagepath, imread_grayscale);     mat temp;      // check invalid input     if (image.empty()) {         cout << "could not open or find image" << std::endl;         return false;     }      // resize image     size size(width_size, height_size);     resize(image, temp, size, 0, 0, cv_inter_area);      // convert float 1-channel     temp.convertto(outputimage, cv_32fc1, 1.0/255.0);      return true; }  vector<string> getfilesnamesinfolder(string folder) {     vector<string> names;     char search_path[200];     sprintf(search_path, "%s/*.*", folder.c_str());     win32_find_data fd;     handle hfind = ::findfirstfile(search_path, &fd);     if (hfind != invalid_handle_value) {         {             // read (real) files in current folder             // , delete '!' read other 2 default folder . , ..             if (!(fd.dwfileattributes & file_attribute_directory)) {                 names.push_back(fd.cfilename);             }         } while (::findnextfile(hfind, &fd));         ::findclose(hfind);     }     return names; }  class sign { public:     enum class category { = 'a', b = 'b', c = 'c', d = 'd' };      mat image;     category category;     int number;      sign(mat& image, string name) :image(image) {         category = static_cast<category>(name.at(0));         number = stoi(name.substr(2, name.length()));     }; };  vector<sign> loadsignsfromfolder(string foldername) {     vector<sign> roadsigns;      (string filename : getfilesnamesinfolder(foldername)) {         mat image;         loadimage(foldername + filename, image);         roadsigns.emplace_back(image, filename.substr(0, (filename.length() - 4))); //cut .png     }      return roadsigns; }  void showsignsinwindows(vector<sign> roadsigns) {     (sign sign : roadsigns) {         string windowname = "sign " + to_string(sign.number);         namedwindow(windowname, window_autosize);         imshow(windowname, sign.image);     }     waitkey(0); }  mat getinputdatafromsignsvector(vector<sign> roadsigns) {     mat roadsignsimagedata;      (sign sign : roadsigns) {         mat signimagedatainonerow = sign.image.reshape(0, 1);         roadsignsimagedata.push_back(signimagedatainonerow);     }      return roadsignsimagedata; }  mat getoutputdatafromsignsvector(vector<sign> roadsigns) {     int signscount = (int) roadsigns.size();     int signsvectorsize = signscount + 1;      mat roadsignsdata(0, signsvectorsize, cv_32fc1);      int = 1;     (sign sign : roadsigns) {         vector<float> outputtraningvector(signsvectorsize);         fill(outputtraningvector.begin(), outputtraningvector.end(), -1.0);         outputtraningvector[i++] = 1.0;          mat tempmatrix(outputtraningvector, false);         roadsignsdata.push_back(tempmatrix.reshape(0, 1));     }      return roadsignsdata; }  int main(int argc, char* argv[]) {     if (argc != 2) {         cout << " usage: display_image imagetoloadanddisplay" << endl;         return -1;     }      const int hiddenlayersize = 500;      vector<sign> roadsigns = loadsignsfromfolder("../../../znaki/a/");     mat inputtrainingdata = getinputdatafromsignsvector(roadsigns);     mat outputtrainingdata = getoutputdatafromsignsvector(roadsigns);      ptr<ann_mlp> mlp = ann_mlp::create();      mat layerssize = mat(3, 1, cv_16u);     layerssize.row(0) = scalar(inputtrainingdata.cols);     layerssize.row(1) = scalar(hiddenlayersize);     layerssize.row(2) = scalar(outputtrainingdata.cols);     mlp->setlayersizes(layerssize);      mlp->setactivationfunction(ann_mlp::activationfunctions::sigmoid_sym, 1.0, 1.0);      mlp->settrainmethod(ann_mlp::trainingmethods::backprop, 0.05, 0.05);     //mlp->settrainmethod(ann_mlp::trainingmethods::rprop);      termcriteria termcrit = termcriteria(         termcriteria::type::max_iter //| termcriteria::type::eps,         ,100 //(int) int_max         ,0.000001     );     mlp->settermcriteria(termcrit);      ptr<traindata> trainingdata = traindata::create(         inputtrainingdata,         sampletypes::row_sample,         outputtrainingdata     );      auto start = system_clock::now();     mlp->train(trainingdata         //, //ann_mlp::trainflags::update_weights         , ann_mlp::trainflags::no_input_scale         + ann_mlp::trainflags::no_output_scale     );     auto duration = duration_cast<milliseconds> (system_clock::now() - start);     cout << "training time: " << duration.count() << "ms" << endl;      (int = 0; < inputtrainingdata.rows; i++) {         mat result;         //mlp->predict(inputtrainingdata.row(i), result);         mlp->predict(roadsigns[i].image.reshape(0, 1), result);         //cout << result << endl;         print(result, 2);     }       //showsignsinwindows(roadsigns);     return 0; } 

what wrong in code, xor works images not? cheked input , output matrix , they're correct... explain me when to/shoud use ann_mlp::trainflags::no_input_scale , ann_mlp::trainflags::no_output_scale or values of setactivationfunction , settrainmethod parameters should use?


thanks!

there problem in backprop weight scale parameter - big , ann couldn't learn more difficult things.


i changed line mlp->settrainmethod(ann_mlp::trainingmethods::backprop, 0.0001); , hidden layer size 100 (to speed learning) - now it's working!


Comments

Popular posts from this blog

ios - RestKit 0.20 — CoreData: error: Failed to call designated initializer on NSManagedObject class (again) -

laravel - PDOException in Connector.php line 55: SQLSTATE[HY000] [1045] Access denied for user 'root'@'localhost' (using password: YES) -

java - Digest auth with Spring Security using javaconfig -