c++ - OpenCV image recognition - setting up ANN MLP -
i new in opencv world , neural networks have coding experience in c++/java.
i created first ann mlp , learned xor:
#include <opencv2/core.hpp> #include <opencv2/imgcodecs.hpp> #include <opencv2/highgui.hpp> #include <opencv2/imgproc/imgproc.hpp> #include <opencv2/ml/ml.hpp> #include <iostream> #include <iomanip> using namespace cv; using namespace ml; using namespace std; void print(mat& mat, int prec) { (int = 0; i<mat.size().height; i++) { cout << "["; (int j = 0; j<mat.size().width; j++) { cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j); if (j != mat.size().width - 1) cout << ", "; else cout << "]" << endl; } } } int main() { const int hiddenlayersize = 4; float inputtrainingdataarray[4][2] = { { 0.0, 0.0 }, { 0.0, 1.0 }, { 1.0, 0.0 }, { 1.0, 1.0 } }; mat inputtrainingdata = mat(4, 2, cv_32f, inputtrainingdataarray); float outputtrainingdataarray[4][1] = { { 0.0 }, { 1.0 }, { 1.0 }, { 0.0 } }; mat outputtrainingdata = mat(4, 1, cv_32f, outputtrainingdataarray); ptr<ann_mlp> mlp = ann_mlp::create(); mat layerssize = mat(3, 1, cv_16u); layerssize.row(0) = scalar(inputtrainingdata.cols); layerssize.row(1) = scalar(hiddenlayersize); layerssize.row(2) = scalar(outputtrainingdata.cols); mlp->setlayersizes(layerssize); mlp->setactivationfunction(ann_mlp::activationfunctions::sigmoid_sym); termcriteria termcrit = termcriteria( termcriteria::type::count + termcriteria::type::eps, 100000000, 0.000000000000000001 ); mlp->settermcriteria(termcrit); mlp->settrainmethod(ann_mlp::trainingmethods::backprop); ptr<traindata> trainingdata = traindata::create( inputtrainingdata, sampletypes::row_sample, outputtrainingdata ); mlp->train(trainingdata /*, ann_mlp::trainflags::update_weights + ann_mlp::trainflags::no_input_scale + ann_mlp::trainflags::no_output_scale*/ ); (int = 0; < inputtrainingdata.rows; i++) { mat sample = mat(1, inputtrainingdata.cols, cv_32f, inputtrainingdataarray[i]); mat result; mlp->predict(sample, result); cout << sample << " -> ";// << result << endl; print(result, 0); cout << endl; } return 0; }
it works simple problem, learn network 1-10 binary conversion.
but need use mlp simple image classification - road signs. write code loading training images , preparing matrix learning i'm not able train network - "learn" in 1 second 1 000 000 iterations! , produce garbage results, same inputs!
here test images , source code:
#include <opencv2/core.hpp> #include <opencv2/imgcodecs.hpp> #include <opencv2/highgui.hpp> #include <opencv2/imgproc/imgproc.hpp> #include <opencv2/ml/ml.hpp> #include <iostream> #include <chrono> #include <memory> #include <iomanip> #include <climits> #include <windows.h> using namespace cv; using namespace ml; using namespace std; using namespace chrono; const int width_size = 50; const int height_size = (int)(width_size * sqrt(3)) / 2; const int image_data_size = width_size * height_size; void print(mat& mat, int prec) { (int = 0; i<mat.size().height; i++) { cout << "[ "; (int j = 0; j<mat.size().width; j++) { cout << fixed << setw(2) << setprecision(prec) << mat.at<float>(i, j); if (j != mat.size().width - 1) cout << ", "; else cout << " ]" << endl; } } } bool loadimage(string imagepath, mat& outputimage) { // load image in grayscale mat image = imread(imagepath, imread_grayscale); mat temp; // check invalid input if (image.empty()) { cout << "could not open or find image" << std::endl; return false; } // resize image size size(width_size, height_size); resize(image, temp, size, 0, 0, cv_inter_area); // convert float 1-channel temp.convertto(outputimage, cv_32fc1, 1.0/255.0); return true; } vector<string> getfilesnamesinfolder(string folder) { vector<string> names; char search_path[200]; sprintf(search_path, "%s/*.*", folder.c_str()); win32_find_data fd; handle hfind = ::findfirstfile(search_path, &fd); if (hfind != invalid_handle_value) { { // read (real) files in current folder // , delete '!' read other 2 default folder . , .. if (!(fd.dwfileattributes & file_attribute_directory)) { names.push_back(fd.cfilename); } } while (::findnextfile(hfind, &fd)); ::findclose(hfind); } return names; } class sign { public: enum class category { = 'a', b = 'b', c = 'c', d = 'd' }; mat image; category category; int number; sign(mat& image, string name) :image(image) { category = static_cast<category>(name.at(0)); number = stoi(name.substr(2, name.length())); }; }; vector<sign> loadsignsfromfolder(string foldername) { vector<sign> roadsigns; (string filename : getfilesnamesinfolder(foldername)) { mat image; loadimage(foldername + filename, image); roadsigns.emplace_back(image, filename.substr(0, (filename.length() - 4))); //cut .png } return roadsigns; } void showsignsinwindows(vector<sign> roadsigns) { (sign sign : roadsigns) { string windowname = "sign " + to_string(sign.number); namedwindow(windowname, window_autosize); imshow(windowname, sign.image); } waitkey(0); } mat getinputdatafromsignsvector(vector<sign> roadsigns) { mat roadsignsimagedata; (sign sign : roadsigns) { mat signimagedatainonerow = sign.image.reshape(0, 1); roadsignsimagedata.push_back(signimagedatainonerow); } return roadsignsimagedata; } mat getoutputdatafromsignsvector(vector<sign> roadsigns) { int signscount = (int) roadsigns.size(); int signsvectorsize = signscount + 1; mat roadsignsdata(0, signsvectorsize, cv_32fc1); int = 1; (sign sign : roadsigns) { vector<float> outputtraningvector(signsvectorsize); fill(outputtraningvector.begin(), outputtraningvector.end(), -1.0); outputtraningvector[i++] = 1.0; mat tempmatrix(outputtraningvector, false); roadsignsdata.push_back(tempmatrix.reshape(0, 1)); } return roadsignsdata; } int main(int argc, char* argv[]) { if (argc != 2) { cout << " usage: display_image imagetoloadanddisplay" << endl; return -1; } const int hiddenlayersize = 500; vector<sign> roadsigns = loadsignsfromfolder("../../../znaki/a/"); mat inputtrainingdata = getinputdatafromsignsvector(roadsigns); mat outputtrainingdata = getoutputdatafromsignsvector(roadsigns); ptr<ann_mlp> mlp = ann_mlp::create(); mat layerssize = mat(3, 1, cv_16u); layerssize.row(0) = scalar(inputtrainingdata.cols); layerssize.row(1) = scalar(hiddenlayersize); layerssize.row(2) = scalar(outputtrainingdata.cols); mlp->setlayersizes(layerssize); mlp->setactivationfunction(ann_mlp::activationfunctions::sigmoid_sym, 1.0, 1.0); mlp->settrainmethod(ann_mlp::trainingmethods::backprop, 0.05, 0.05); //mlp->settrainmethod(ann_mlp::trainingmethods::rprop); termcriteria termcrit = termcriteria( termcriteria::type::max_iter //| termcriteria::type::eps, ,100 //(int) int_max ,0.000001 ); mlp->settermcriteria(termcrit); ptr<traindata> trainingdata = traindata::create( inputtrainingdata, sampletypes::row_sample, outputtrainingdata ); auto start = system_clock::now(); mlp->train(trainingdata //, //ann_mlp::trainflags::update_weights , ann_mlp::trainflags::no_input_scale + ann_mlp::trainflags::no_output_scale ); auto duration = duration_cast<milliseconds> (system_clock::now() - start); cout << "training time: " << duration.count() << "ms" << endl; (int = 0; < inputtrainingdata.rows; i++) { mat result; //mlp->predict(inputtrainingdata.row(i), result); mlp->predict(roadsigns[i].image.reshape(0, 1), result); //cout << result << endl; print(result, 2); } //showsignsinwindows(roadsigns); return 0; }
what wrong in code, xor works images not? cheked input , output matrix , they're correct... explain me when to/shoud use ann_mlp::trainflags::no_input_scale , ann_mlp::trainflags::no_output_scale or values of setactivationfunction , settrainmethod parameters should use?
thanks!
there problem in backprop weight scale parameter - big , ann couldn't learn more difficult things.
i changed line mlp->settrainmethod(ann_mlp::trainingmethods::backprop, 0.0001);
, hidden layer size 100 (to speed learning) - now it's working!
Comments
Post a Comment