Apriori Algorithm - Mining association rules in Java -
scan transactions find l1 ( k = 2; lk-1 !empty; k++) { generate ck lk-1 count occurences of itemsets in ck find lk
l1: set of frequent 1-itemsets counts no less support ck: set of candidates k-itemsets lk: subset of ck counts no less support
transactions (dataset.txt)
a, b, e b, d b, c a, b, d a, c b, c a, c a, b, c, e a, b, c f
support(min) = 20%
our association rule data mining task has multiple parameters , stages:
- generate candidates, scan , count, check min support
here it's generating c till c3 c2 not counting , c3 giving wrong value.
c3 should
{a, b, c} {a, b, d} {a, b, e} {a, c, d} {a, c, e} {a, d, e} {b, c, d} {b, c, e} {b, d, e} {c, d, e}
however we're getting
a b c c d e e
this wrong result. first error c2 , c3 not counting , c3 giving wrong result.
my code follows:
main.java
package apriori; import java.io.bufferedreader; import java.io.file; import java.io.filereader; import java.io.filewriter; import java.io.ioexception; import java.util.arraylist; import java.util.collections; import java.util.hashmap; import java.util.hashset; import java.util.map; import java.util.scanner; import java.util.set; import java.io.*; import java.util.*; @suppresswarnings("unused") public class main { public static int minsup = 2; public static void main(string args[]) throws ioexception { filewriter summary= new filewriter("summary.txt"); string freqitems = "frequent.txt"; string infreqitems = "infrequent.txt"; long starttime = system.nanotime(); do{ candidategen.candgen(); candidategen.candgen(); candidategen.candgen(); }while(supportcounter.itemsize > 0); // writing summary file long endtime = system.nanotime(); linenumberreader frelnr = new linenumberreader(new filereader(freqitems)); linenumberreader infrelnr = new linenumberreader(new filereader(infreqitems)); frelnr.skip(long.max_value); infrelnr.skip(long.max_value); long totaltime = (endtime - starttime); summary.write("minsup = "+minsup+system.getproperty("line.separator" )+ "total t(c): "+candidategen.gettime()+" nano seconds"+system.getproperty("line.separator" )+ "total t(l): "+supportcounter.gettime()+" nano seconds"+system.getproperty("line.separator" )+ "total time of execution = "+totaltime+" nano seconds"+system.getproperty("line.separator" )+ "frequent itemsets: "+(frelnr.getlinenumber() - 1)+system.getproperty("line.separator")+ "infrequent itemsets: "+(infrelnr.getlinenumber() - 1)+system.getproperty("line.separator")); summary.close(); frelnr.close(); infrelnr.close(); } }
supportcounter.java
package apriori; import java.io.file; import java.io.filewriter; import java.io.ioexception; import java.util.arraylist; import java.util.collections; import java.util.hashmap; import java.util.scanner; public class supportcounter{ static long starttime = system.nanotime(); static int callcount = 0; static int itemsize=0; public static void supcoun() { // hashmap map = new hashmap(); // string dataset = "dataset.txt";//determines name of file try { callcount = callcount +1; if(callcount ==1){ hashmap map = new hashmap(); string dataset = "dataset.txt";//determines name of file // callcount = callcount +1; filewriter lk = new filewriter("l"+callcount+".txt"); scanner list = new scanner(new file(dataset)); while (list.hasnext()) { string word = list.next(); if (!list.hasnext()){ lk.write("time of execution : "+ gettime()+" nano seconds"+system.getproperty("line.separator" )); } if(map.containskey(word)) { //itemsize=1; integer count = (integer)map.get(word); map.put(word, new integer(count.intvalue() + 1)); } else { map.put(word, new integer(1)); } } arraylist arraylist = new arraylist(map.keyset()); collections.sort(arraylist); (int = 0; < arraylist.size(); i++) { string key = (string)arraylist.get(i); integer count = (integer)map.get(key); if( count >= main.minsup) { lk.write(key + " : " + count + system.getproperty( "line.separator" )); } } list.close(); lk.close(); }//call count = 1 if end else if(callcount > 1){ // write lk countfre(callcount); }//else-if end } // try end catch (ioexception e) { e.printstacktrace(); } }//supcoun end private static void countfre(int filenumber) throws ioexception{ arraylist<string> ckwords = new arraylist<string>(); arraylist<string> dbwords = new arraylist<string>(); filewriter lk = new filewriter("l"+filenumber+".txt"); file ck = new file("c"+filenumber+".txt"); scanner ckscan = new scanner(ck);//.usedelimiter(":"); file dataset = new file("dataset.txt"); scanner dbscan = new scanner(dataset).usedelimiter("\n"); int j1,i1 =0; if(ckscan.hasnext()) { ckscan.nextline(); } lk.write("time of execution : "+ gettime()+" nano seconds"+system.getproperty("line.separator" )); while(dbscan.hasnext()) { string wrd = dbscan.nextline(); dbwords.add(wrd); } while(ckscan.hasnext()) { string wrd2 = ckscan.nextline(); ckwords.add(wrd2); } int counter =0; ckscan = new scanner(ck);//.usedelimiter(":"); if(ckscan.hasnext()) ckscan.nextline(); while(ckscan.hasnext()) { dbscan = new scanner(dataset).usedelimiter("\n"); string wrd2 = ckscan.nextline(); ckwords.add(wrd2); for(j1=0;j1<dbwords.size();j1++){ if(dbwords.get(j1).contains(wrd2)){ counter++; } } if(counter >= main.minsup){ lk.write(wrd2+" : "+counter+ system.getproperty( "line.separator" )); } // system.out.println(wrd2+"--"+counter); // system.out.println("--------------------------------------------------------------"); } lk.close(); } //////////////end timer public static long gettime() { long endtime = system.nanotime(); long totaltime = (long) ((endtime - starttime)); return(totaltime); } }
candidategen.java
package apriori; import java.io.file; import java.io.filewriter; import java.io.ioexception; import java.util.arraylist; import java.util.arrays; import java.util.collections; import java.util.hashmap; import java.util.scanner; public class candidategen { static long starttime = system.nanotime(); static int callcount = 0; public static void candgen() { //hashmap map = new hashmap(); try { string dataset = "dataset.txt";//determines name of file callcount = callcount +1; if(callcount == 1) { hashmap map = new hashmap(); filewriter ck = new filewriter("c"+callcount+".txt"); filewriter infeq = new filewriter("infrequent.txt"); filewriter feq = new filewriter("frequent.txt"); scanner list = new scanner(new file(dataset)); while (list.hasnext()) { string word = list.next(); if (!list.hasnext()){ feq.write("time of execution : "+gettime()+" nano seconds"+system.getproperty("line.separator" )); infeq.write("time of execution : "+gettime()+" nano seconds"+ system.getproperty("line.separator" )); ck.write("time of execution : "+gettime()+" nano seconds"+ system.getproperty("line.separator" )); } if(map.containskey(word)) { integer count = (integer)map.get(word); map.put(word, new integer(count.intvalue() + 1)); } else { map.put(word, new integer(1)); } } arraylist arraylist = new arraylist(map.keyset()); collections.sort(arraylist); (int = 0; < arraylist.size(); i++) { string key = (string)arraylist.get(i); integer count = (integer)map.get(key); if( count < main.minsup) { infeq.write(key + system.getproperty( "line.separator" )); } else{ feq.write(key + system.getproperty( "line.separator" )); } ck.write(key +" : "+count+system.getproperty( "line.separator" )); } //system.out.println("frequent , infrequent items separated frequent.txt , infrequent.txt "); list.close(); infeq.close(); feq.close(); ck.close(); supportcounter.supcoun(); } /// 2-itemset else if(callcount == 2){ // write ck string lk = "l"+(callcount-1)+".txt"; filewriter ck = new filewriter("c"+callcount+".txt"); hashmap map = new hashmap(); scanner list = new scanner(new file(lk)); list.nextline(); while (list.hasnext()) { string word = list.next(); if(map.containskey(word)) { integer count = (integer)map.get(word); map.put(word, new integer(count.intvalue() + 1)); } else { map.put(word, new integer(1)); } list.nextline(); }//while arraylist arraylist = new arraylist(map.keyset()); collections.sort(arraylist); ck.write("time of execution : "+gettime()+" nano seconds"+ system.getproperty("line.separator" )); (int = 0; < arraylist.size(); i++) { (int j = i+1; j < arraylist.size(); j++) { string key = (string)arraylist.get(i); string key2 = (string)arraylist.get(j); ck.write(key + " " + key2 + system.getproperty( "line.separator" )); // system.out.println(key + "," + key2 ); } } ck.close(); countfre(callcount); supportcounter.supcoun(); }//else-if end /// 3-itemset else if(callcount >2){ string lk = "l"+(callcount-1)+".txt"; filewriter ck = new filewriter("c"+callcount+".txt"); scanner list = new scanner(new file(lk)); scanner list2 = new scanner(new file(lk)); list.nextline(); int c=0; arraylist arraylist= new arraylist(); arraylist arraylist2= new arraylist(); //hashmap map = new hashmap(); while(list.hasnext()) { string word = list.next(); c++; //system.out.println(word); if(word.contains(":")) { list.nextline(); c=0; continue; } else if(c == callcount) { if(list.hasnext()) { list.nextline(); continue; } else break; } //system.out.println(word); arraylist.add(word); } list2.nextline(); list2.nextline(); while(list2.hasnext()) { string word = list2.next(); c++; //system.out.println(word); if(word.contains(":")) { list2.nextline(); c=0; continue; } else if(c == callcount) { if(list2.hasnext()) { list2.nextline(); continue; } else break; } //system.out.println(word); arraylist2.add(word); } int el = 0; string set3,set4; arraylist arraylist3= new arraylist(); //scanner scanarray = new scanner((readable) arraylist2); for(int i=0;i<arraylist.size();i++) { c++; set3 = (string) arraylist.get(i); for(int j=0;j<(arraylist2.size());j++) { set4 = (string)arraylist2.get(j); if(set3.contains(set4)) { //system.out.println(i+" "+j); i++; //system.out.println(i+" "+j); //j++; //system.out.println(i-1+" "+i+" "+(j+1)+"-"); string w = (string)arraylist.get(i-1); string w2 = (string) arraylist.get(i); //j++; string w3 = (string) arraylist2.get(j+1); system.out.println(w +" "+w2+" "+w3); ck.write(w+" "+w2+" "+w3+system.getproperty("line.separator")); arraylist3.add(w); arraylist3.add(w2); arraylist3.add(w3); el++; //system.out.println(el); } i=i+1; //j=j+2; } // j=j=0; } for(int i=0;i<el;i++) { // system.out.println(arraylist3.get(i)); } //} ck.close(); countfre(callcount); }//else-if end } catch (ioexception e) { e.printstacktrace(); } } private static void countfre(int filenumber) throws ioexception{ arraylist<string> ckwords = new arraylist<string>(); arraylist<string> dbwords = new arraylist<string>(); file ck = new file("c2.txt"); scanner ckscan = new scanner(ck);//.usedelimiter(":"); file dataset = new file("dataset.txt"); scanner dbscan = new scanner(dataset).usedelimiter("\n"); int j1,i1 =0; ckscan.nextline(); while(dbscan.hasnext()) { string wrd = dbscan.nextline(); dbwords.add(wrd); } while(ckscan.hasnext()) { string wrd2 = ckscan.nextline(); ckwords.add(wrd2); } int counter =0; ckscan = new scanner(ck);//.usedelimiter(":"); ckscan.nextline(); while(ckscan.hasnext()) { dbscan = new scanner(dataset).usedelimiter("\n"); string wrd2 = ckscan.nextline(); ckwords.add(wrd2); for(j1=0;j1<dbwords.size();j1++){ if(dbwords.get(j1).contains(wrd2)){ counter++; } } // system.out.println(wrd2+"--"+counter); // system.out.println("--------------------------------------------------------------"); } } // end of timer public static double gettime() { long endtime = system.nanotime(); double totaltime = (double) ((endtime - starttime)); return (double) (totaltime); } }
mining rules frequent itemsets code not supporting mining rules example
rule confidence support
=> b 66% 40%
=> c 66% 40%
=> e 33% 20%
b => c 57% 40%
b => d 29% 20%
b => e 29% 20%
any critique appreciated. thank in advanced
Comments
Post a Comment