Apriori Algorithm - Mining association rules in Java -


scan transactions find l1 ( k = 2; lk-1 !empty; k++) { generate ck lk-1 count occurences of itemsets in ck find lk

l1: set of frequent 1-itemsets counts no less support ck: set of candidates k-itemsets lk: subset of ck counts no less support

transactions (dataset.txt)

a, b, e    b, d    b, c   a, b, d    a, c    b, c    a, c    a, b, c, e   a, b, c    f 

support(min) = 20%

our association rule data mining task has multiple parameters , stages:

  1. generate candidates, scan , count, check min support
    here it's generating c till c3 c2 not counting , c3 giving wrong value.

c3 should

{a, b, c}   {a, b, d}   {a, b, e}   {a, c, d}   {a, c, e}   {a, d, e}   {b, c, d}   {b, c, e}   {b, d, e}   {c, d, e}     

however we're getting

a b c  c d  e e 

this wrong result. first error c2 , c3 not counting , c3 giving wrong result.

my code follows:

main.java

package apriori;  import java.io.bufferedreader; import java.io.file; import java.io.filereader; import java.io.filewriter; import java.io.ioexception; import java.util.arraylist; import java.util.collections; import java.util.hashmap; import java.util.hashset; import java.util.map; import java.util.scanner; import java.util.set; import java.io.*; import java.util.*;  @suppresswarnings("unused") public class main {     public static int minsup = 2;      public static void main(string args[]) throws ioexception {     filewriter summary= new filewriter("summary.txt");     string freqitems = "frequent.txt";     string infreqitems = "infrequent.txt";     long starttime = system.nanotime();     do{         candidategen.candgen();         candidategen.candgen();         candidategen.candgen();      }while(supportcounter.itemsize > 0);  //  writing summary file                 long endtime = system.nanotime();         linenumberreader  frelnr = new linenumberreader(new filereader(freqitems));         linenumberreader  infrelnr = new linenumberreader(new filereader(infreqitems));         frelnr.skip(long.max_value);         infrelnr.skip(long.max_value);         long totaltime = (endtime - starttime);         summary.write("minsup = "+minsup+system.getproperty("line.separator" )+                         "total t(c): "+candidategen.gettime()+" nano seconds"+system.getproperty("line.separator" )+                         "total t(l): "+supportcounter.gettime()+" nano seconds"+system.getproperty("line.separator" )+                         "total time of execution = "+totaltime+" nano seconds"+system.getproperty("line.separator" )+                         "frequent itemsets: "+(frelnr.getlinenumber() - 1)+system.getproperty("line.separator")+                         "infrequent itemsets: "+(infrelnr.getlinenumber() - 1)+system.getproperty("line.separator"));         summary.close();         frelnr.close();         infrelnr.close();     } } 

supportcounter.java

package apriori;  import java.io.file; import java.io.filewriter; import java.io.ioexception; import java.util.arraylist; import java.util.collections; import java.util.hashmap; import java.util.scanner;  public class supportcounter{     static long starttime = system.nanotime();     static int callcount = 0;     static int itemsize=0;     public static void supcoun()     {     //  hashmap map = new hashmap();      //  string dataset = "dataset.txt";//determines name of file         try {             callcount = callcount +1;              if(callcount ==1){                  hashmap map = new hashmap();              string dataset = "dataset.txt";//determines name of file         //  callcount = callcount +1;             filewriter lk = new filewriter("l"+callcount+".txt");             scanner list = new scanner(new file(dataset));               while (list.hasnext()) {                  string word = list.next();                 if (!list.hasnext()){                     lk.write("time of execution : "+ gettime()+" nano seconds"+system.getproperty("line.separator" ));                 }                 if(map.containskey(word)) {                     //itemsize=1;                   integer count = (integer)map.get(word);                   map.put(word, new integer(count.intvalue() + 1));                 } else {                    map.put(word, new integer(1));                 }               }             arraylist arraylist = new arraylist(map.keyset());             collections.sort(arraylist);             (int = 0; < arraylist.size(); i++) {               string key = (string)arraylist.get(i);               integer count = (integer)map.get(key);             if( count >= main.minsup)                     {                     lk.write(key + " : " + count + system.getproperty( "line.separator" ));                     }                 }                   list.close();             lk.close();             }//call count = 1 if end              else if(callcount > 1){ // write lk                 countfre(callcount);              }//else-if end         } // try end          catch (ioexception e)          {             e.printstacktrace();         }     }//supcoun end  private static void countfre(int filenumber) throws ioexception{          arraylist<string> ckwords = new arraylist<string>();         arraylist<string> dbwords = new arraylist<string>();         filewriter lk = new filewriter("l"+filenumber+".txt");         file ck = new file("c"+filenumber+".txt");         scanner ckscan = new scanner(ck);//.usedelimiter(":");           file dataset = new file("dataset.txt");          scanner dbscan = new scanner(dataset).usedelimiter("\n");           int j1,i1 =0;          if(ckscan.hasnext())          {          ckscan.nextline();          }          lk.write("time of execution : "+ gettime()+" nano seconds"+system.getproperty("line.separator" ));          while(dbscan.hasnext())          {               string wrd = dbscan.nextline();              dbwords.add(wrd);          }          while(ckscan.hasnext())          {              string wrd2 = ckscan.nextline();              ckwords.add(wrd2);          }          int counter =0;          ckscan = new scanner(ck);//.usedelimiter(":");          if(ckscan.hasnext())          ckscan.nextline();          while(ckscan.hasnext())          {              dbscan = new scanner(dataset).usedelimiter("\n");              string wrd2 = ckscan.nextline();              ckwords.add(wrd2);               for(j1=0;j1<dbwords.size();j1++){                  if(dbwords.get(j1).contains(wrd2)){                      counter++;                  }              }               if(counter >= main.minsup){               lk.write(wrd2+" : "+counter+ system.getproperty( "line.separator" ));              }             // system.out.println(wrd2+"--"+counter);        // system.out.println("--------------------------------------------------------------");          }          lk.close();                }     //////////////end timer public static long gettime()     {         long endtime   = system.nanotime();         long totaltime = (long) ((endtime - starttime));         return(totaltime);     } } 

candidategen.java

package apriori;  import java.io.file; import java.io.filewriter; import java.io.ioexception; import java.util.arraylist; import java.util.arrays; import java.util.collections; import java.util.hashmap; import java.util.scanner;  public class candidategen {     static long starttime = system.nanotime();     static int callcount = 0;  public static void candgen()     {         //hashmap map = new hashmap();          try {             string dataset = "dataset.txt";//determines name of file             callcount = callcount +1;              if(callcount == 1)             {             hashmap map = new hashmap();             filewriter ck = new filewriter("c"+callcount+".txt");             filewriter infeq = new filewriter("infrequent.txt");             filewriter feq = new filewriter("frequent.txt");              scanner list = new scanner(new file(dataset));             while (list.hasnext()) {                  string word = list.next();                 if (!list.hasnext()){                     feq.write("time of execution : "+gettime()+" nano seconds"+system.getproperty("line.separator" ));                     infeq.write("time of execution : "+gettime()+" nano seconds"+ system.getproperty("line.separator" ));                     ck.write("time of execution : "+gettime()+" nano seconds"+ system.getproperty("line.separator" ));                 }                                if(map.containskey(word)) {                   integer count = (integer)map.get(word);                   map.put(word, new integer(count.intvalue() + 1));                 } else {                    map.put(word, new integer(1));                 }               }             arraylist arraylist = new arraylist(map.keyset());             collections.sort(arraylist);             (int = 0; < arraylist.size(); i++) {               string key = (string)arraylist.get(i);               integer count = (integer)map.get(key);              if( count < main.minsup)             {                 infeq.write(key  + system.getproperty( "line.separator" ));             }             else{                 feq.write(key + system.getproperty( "line.separator" ));                 }             ck.write(key +" : "+count+system.getproperty( "line.separator" ));                }             //system.out.println("frequent , infrequent items separated frequent.txt , infrequent.txt ");             list.close();             infeq.close();             feq.close();             ck.close();             supportcounter.supcoun();         }          /// 2-itemset             else if(callcount == 2){ // write ck                 string lk = "l"+(callcount-1)+".txt";                 filewriter ck = new filewriter("c"+callcount+".txt");                 hashmap map = new hashmap();                 scanner list = new scanner(new file(lk));                 list.nextline();                  while (list.hasnext()) {                 string word = list.next();                  if(map.containskey(word)) {                       integer count = (integer)map.get(word);                        map.put(word, new integer(count.intvalue() + 1));                     } else {                        map.put(word, new integer(1));                     }                 list.nextline();                   }//while                   arraylist arraylist = new arraylist(map.keyset());                 collections.sort(arraylist);                  ck.write("time of execution : "+gettime()+" nano seconds"+ system.getproperty("line.separator" ));                  (int = 0; < arraylist.size(); i++) {                      (int j = i+1; j < arraylist.size(); j++) {                        string key = (string)arraylist.get(i);                       string key2 = (string)arraylist.get(j);                       ck.write(key + " " + key2 + system.getproperty( "line.separator" ));                    // system.out.println(key + "," + key2 );                     }                 }                 ck.close();                 countfre(callcount);                 supportcounter.supcoun();             }//else-if end      /// 3-itemset             else if(callcount >2){             string lk = "l"+(callcount-1)+".txt";             filewriter ck = new filewriter("c"+callcount+".txt");             scanner list = new scanner(new file(lk));             scanner list2 = new scanner(new file(lk));             list.nextline();             int c=0;             arraylist arraylist= new arraylist();             arraylist arraylist2= new arraylist();             //hashmap map = new hashmap();              while(list.hasnext())             {                 string word = list.next();                 c++;                 //system.out.println(word);                 if(word.contains(":"))                 {                     list.nextline();                     c=0;                     continue;                 }                 else if(c == callcount)                 {                     if(list.hasnext())                     {                      list.nextline();                     continue;                     }                     else                         break;                 }                 //system.out.println(word);                 arraylist.add(word);                 }              list2.nextline();             list2.nextline();             while(list2.hasnext())             {                 string word = list2.next();                 c++;                 //system.out.println(word);                 if(word.contains(":"))                 {                     list2.nextline();                     c=0;                     continue;                 }                 else if(c == callcount)                 {                     if(list2.hasnext())                     {                      list2.nextline();                     continue;                     }                     else                         break;                 }                 //system.out.println(word);                 arraylist2.add(word);              }             int el = 0;             string set3,set4;             arraylist arraylist3= new arraylist();             //scanner scanarray = new scanner((readable) arraylist2);             for(int i=0;i<arraylist.size();i++)             {                  c++;                 set3 = (string) arraylist.get(i);                 for(int j=0;j<(arraylist2.size());j++)                 {                      set4 = (string)arraylist2.get(j);                     if(set3.contains(set4))                         {                             //system.out.println(i+" "+j);                             i++;                             //system.out.println(i+" "+j);                             //j++;                             //system.out.println(i-1+" "+i+" "+(j+1)+"-");                             string w = (string)arraylist.get(i-1);                             string w2 = (string) arraylist.get(i);                             //j++;                             string w3 = (string) arraylist2.get(j+1);                             system.out.println(w +" "+w2+" "+w3);                             ck.write(w+" "+w2+" "+w3+system.getproperty("line.separator"));                             arraylist3.add(w);                             arraylist3.add(w2);                             arraylist3.add(w3);                             el++;                              //system.out.println(el);                         }                     i=i+1;                     //j=j+2;                     }                 //  j=j=0;                 }             for(int i=0;i<el;i++)             {             //  system.out.println(arraylist3.get(i));             }             //}             ck.close();              countfre(callcount);          }//else-if end      }         catch (ioexception e)          {             e.printstacktrace();         }       }  private static void countfre(int filenumber) throws ioexception{      arraylist<string> ckwords = new arraylist<string>();     arraylist<string> dbwords = new arraylist<string>();      file ck = new file("c2.txt");      scanner ckscan = new scanner(ck);//.usedelimiter(":");       file dataset = new file("dataset.txt");      scanner dbscan = new scanner(dataset).usedelimiter("\n");       int j1,i1 =0;      ckscan.nextline();      while(dbscan.hasnext())      {          string wrd = dbscan.nextline();          dbwords.add(wrd);      }      while(ckscan.hasnext())      {          string wrd2 = ckscan.nextline();          ckwords.add(wrd2);      }      int counter =0;      ckscan = new scanner(ck);//.usedelimiter(":");      ckscan.nextline();      while(ckscan.hasnext())      {          dbscan = new scanner(dataset).usedelimiter("\n");          string wrd2 = ckscan.nextline();          ckwords.add(wrd2);          for(j1=0;j1<dbwords.size();j1++){              if(dbwords.get(j1).contains(wrd2)){                  counter++;              }       }  //      system.out.println(wrd2+"--"+counter);   //  system.out.println("--------------------------------------------------------------");      }   }         // end of timer     public static   double gettime()     {         long endtime   = system.nanotime();         double totaltime = (double) ((endtime - starttime));         return (double) (totaltime);     } } 
  1. mining rules frequent itemsets code not supporting mining rules example

    rule confidence support
    => b 66% 40%
    => c 66% 40%
    => e 33% 20%
    b => c 57% 40%
    b => d 29% 20%
    b => e 29% 20%

any critique appreciated. thank in advanced


Comments

Popular posts from this blog

ios - RestKit 0.20 — CoreData: error: Failed to call designated initializer on NSManagedObject class (again) -

laravel - PDOException in Connector.php line 55: SQLSTATE[HY000] [1045] Access denied for user 'root'@'localhost' (using password: YES) -

java - Digest auth with Spring Security using javaconfig -