数据挖掘第三次实验报告.doc_第1页
数据挖掘第三次实验报告.doc_第2页
数据挖掘第三次实验报告.doc_第3页
数据挖掘第三次实验报告.doc_第4页
数据挖掘第三次实验报告.doc_第5页
已阅读5页,还剩7页未读 继续免费阅读

下载本文档

版权说明:本文档由用户提供并上传,收益归属内容提供方,若内容存在侵权,请进行举报或认领

文档简介

第三次实验报告 班级:数理系091001 学号:091001102 姓名:高攀 指导老师:刘建伟Aprior 算法1. 实验目的: 首先从理论角度分析了关联规则挖掘算法与聚类挖掘算法原理及其应用领域,然后介绍了Aprior算法的实现及封装,并设计了可视化界面,对算法进行了测试。 2. 实验要求: 利用Aprior 算法实现Algorithm数学计算方法。3.实验原理:Apriori算法使用一种称作逐层迭代的候选产生测试(candidate generation and test)的方法,k-项目集用于探索(k+1)-项目集。首先,找出频繁1-项目集的集合,该集合记作L 。L 用于找频繁2-向募集到集合L ,而L 用于找L ,如此下去,直到不能找到频繁k-项目集。找每一个L 均需要一次数据库的扫描。4.实验内容: package datamining;import java.io.*;import java.util.*;/* * A bare bone clean implementation of the Apriori * algorithm for finding frequent itemsets. Good for educational * purposes and as a root class for experimenting on * optimizations. * * In the latest version the use of DataHandler is added for reading * the database. * * author Michael Holler * version 0.8, 16.03.2004 */public class Apriori int pass;/ number of passes int total;/ total number of frequent itemsets int minsup;/ minimal support of itemset String filename;/ the filename of the database Item root;/ the root item of the Trie BufferedWriter writer;/ the buffer to write the output to DataHandler dh;/ the handler for the database /* * Default constructur for creating a Apriori object. */ public Apriori() this.pass = 0; this.minsup = 4; this.dh = new DataHandler(test.dat); this.root = new Item(0); /* * Constructur for creating a Apriori object with parameters. * * paramfilenamethe name of the database file * paramminsupthe minimal support threshold * paramoutfilethe name of the output file */ public Apriori(String filename, int minsup, String outfile) this.pass = 0; this.minsup = minsup; this.dh = new DataHandler(filename); this.root = new Item(0); try if (!outfile.equals() writer = new BufferedWriter(new FileWriter(outfile); catch (Exception e) /* * Constructur for creating a Apriori object with parameters. * This one is used with other mining algorithms. * * paramminsupthe minimal support threshold * paramdatahandlerthe handler for the database */ public Apriori(int minsup, DataHandler datahandler) this.pass = 0; this.minsup = minsup; this.dh = datahandler; this.root = new Item(0); /* * The workhorse method for the basic implementation of * the Apriori algorithm. */ public void findFrequentSets() boolean running = true; int candidates = 0, transactions= 0, pruned = 0, itemsets; while (running) this.pass+; candidates = this.generateCandidates(this.root, new Vector(), 1); transactions = this.countSupport(); pruned = this.pruneCandidates(this.root); itemsets = candidates - pruned; / correct the candidate count on first pass for printing if (this.pass = 1)candidates = total; total += itemsets; if (itemsets 1) running = false; System.out.println(pass: + this.pass + , total: + total + , candidates: + candidates + , pruned: + pruned); /* * Method for generating new candidates. * Copies the siblings of an item to its children. * * paramitemthe item to which generated items are added * paramdepththe depth of recursion * returnthe number of new candidates generated */ public int generateCandidates(Item item, Vector current, int depth) Vector v = item.getChildren(); Item child = item; int generated = 0; for (Enumeration e = v.elements(); e.hasMoreElements(); ) child = (Item)e.nextElement(); current.add(child); if (depth = this.pass-1) generated += this.copySiblings(child, v, current); else generated += this.generateCandidates(child, current, depth+1); current.remove(child); return generated; /* * Method for copying the siblings of an Item to its children. * * paramitemthe item to which the siblings are copied * paramsiblingsthe siblings to be copied * paramcurrentthe current itemset to be generated * returnthe number of siblings copied */ public int copySiblings(Item item, Vector siblings, Vector current) Enumeration e = siblings.elements(); Item parent = item; Item sibling = new Item(); int copied = 0; while (sibling.getLabel() parent.getLabel() & e.hasMoreElements() sibling = (Item)e.nextElement(); while (e.hasMoreElements() sibling = (Item)e.nextElement(); current.add(sibling); if (this.pass = 2 | this.checkSubsets(current, this.root.getChildren(), 0, 1) parent.addChild(new Item(sibling.getLabel(); copied+; current.remove(sibling); return copied; /* * Checks if the subsets of the itemset to be generated are all frequent. * * paramcurrent the current itemset to be generated * paramchildrenthe children in the trie on this depth * parammarkthe mark in the current itemset * paramdepthdepth of recursion * returntrue if the subsets are frequent, else false */ public boolean checkSubsets(Vector current, Vector children, int mark, int depth) boolean ok = true; Item child; int index; int i = depth; if (children = null) return false; while (ok & (mark = 0) if (depth 0; items = this.dh.read() rowcount+; if (this.pass = 1) this.root.incSupport(); this.total += generateFirstCandidates(items); else countSupport(root, items, 0, 1); return rowcount; /* * Method generates the first candidates by adding each item * found in the database to the children of the root item. Also * counts the supports of the items found in the database. * * paramitemsthe array of integer items from the database * returnthe number of candidates generated */ public int generateFirstCandidates(int items) Vector v = root.getChildren(); Enumeration e = v.elements(); Item item = new Item(); int generated = 0; for (int i = 0; i items.length; i+) while (e.hasMoreElements() & item.getLabel() itemsi) int index = v.indexOf(item); Item child = new Item(itemsi); child.incSupport(); this.root.addChild(child, index); generated+; else Item child = new Item(itemsi); child.incSupport(); this.root.addChild(child); generated+; return generated; /* * Adds the cover of the Item given as paramater and all the * Items in Trie below it. * * paramitemthe item the cover of which is to be counted * paramitemsthe array of integer items from the database * parami the position in the array * paramdepththe depth of recursion */ public void countSupport(Item item, int items, int i, int depth) Vector v = item.getChildren(); Item child; int tmp; Enumeration e = v.elements(); / loop through the children to check while (e.hasMoreElements() child = (Item)e.nextElement(); / break, if the whole transaction is checked if (i = items.length) break; / do a linear search for the child in the transaction starting from i tmp = i; while (tmp items.length & itemstmp child.getLabel() tmp+; / if the same item exists, increase support or go deaper if (tmp items.length & child.getLabel() = itemstmp) if (depth = this.pass) child.incSupport(); else countSupport(child, items, tmp+1, depth+1); i = tmp+1; /* * Method for pruning the candidates. Removes items that are * not frequent from the Trie. * * paramitemthe item the children of which will be pruned * returnthe number of items pruned from the candidates */ public int pruneCandidates(Item item) Vector v = item.getChildren(); Item child = item; int pruned = 0; for (Enumeration e = new Vector(v).elements(); e.hasMoreElements(); ) child = (Item)e.nextElement(); / check infrequency, existence and that it is fully counted if (child.getSupport() this.minsup) v.remove(child);pruned+; else pruned += pruneCandidates(child); return pruned; /* * Method gets and returns the root of the * candidate trie. * * returnthe root of the candidate trie */ public Item getTrie() return this.root; /* * Method prints the itemsets to the system output and to a file * if the name of an output file exists. */ public void printFrequentSets() if (this.writer != null) print(root, ); System.out.println(nnumber of frequent itemsets found: + this.total); /* * Loops through the Trie recursively adding * paths and subpaths to the output string along the way. * * paramitemthe item where the recursion is * paramstrthe string of the gatherd itemset */ public void print(Item item, String str) Vector v = item.getChildren(); for (Enumeration e = v.elements(); e.hasMoreElements(); ) item = (Item)e.nextElement(); try this.writer.write(str + item.getLabel() + ( + item.getSupport() + )n); this.writer.flush(); catch (Exception x) System.out.println(no output file); if (item.hasChildren() print(item, str + item.getLabel() + ); /* * Main method for testing the algorithm. * * param argsthe arguments can contain the

温馨提示

  • 1. 本站所有资源如无特殊说明,都需要本地电脑安装OFFICE2007和PDF阅读器。图纸软件为CAD,CAXA,PROE,UG,SolidWorks等.压缩文件请下载最新的WinRAR软件解压。
  • 2. 本站的文档不包含任何第三方提供的附件图纸等,如果需要附件,请联系上传者。文件的所有权益归上传用户所有。
  • 3. 本站RAR压缩包中若带图纸,网页内容里面会有图纸预览,若没有图纸预览就没有图纸。
  • 4. 未经权益所有人同意不得将文件中的内容挪作商业或盈利用途。
  • 5. 人人文库网仅提供信息存储空间,仅对用户上传内容的表现方式做保护处理,对用户上传分享的文档内容本身不做任何修改或编辑,并不能对任何下载内容负责。
  • 6. 下载文件中如有侵权或不适当内容,请与我们联系,我们立即纠正。
  • 7. 本站不保证下载资源的准确性、安全性和完整性, 同时也不承担用户因使用这些下载资源对自己和他人造成任何形式的伤害或损失。

评论

0/150

提交评论