




版权说明:本文档由用户提供并上传,收益归属内容提供方,若内容存在侵权,请进行举报或认领
文档简介
1、【决策树】 C4.5算法建立决策树JAVA练习以下程序是我练习写的,不一定正确也没做存储优化。有问题请留言交流。转载请挂连接。当前的属性为:age income student credit_rating当前的数据集为(最后一列是TARGET_VALUE):-youth high no fair noyouth high no excellent nomiddle_aged high no fair yessenior low yes fair yessenior low yes excellent nomiddle_aged low yes excellent yesyouth medium
2、 no fair noyouth low yes fair yessenior medium yes fair yesyouth mediumyes excellent yesmiddle_aged high yes fair yessenior medium no excellent no-C4.5建立树类package C45Test;import java.util.ArrayList;import java.util.List;import java.util.Map;public class DecisionTree public TreeNode createDT(ListArra
3、yList data,List attributeList) System.out.println(当前的DATA为); for(int i=0;idata.size();i+) ArrayList temp = data.get(i); for(int j=0;jtemp.size();j+) System.out.print(temp.get(j)+ ); System.out.println(); System.out.println(-); System.out.println(当前的ATTR为); for(int i=0;iattributeList.size();i+) Syste
4、m.out.print(attributeList.get(i)+ ); System.out.println(); System.out.println(-); TreeNode node = new TreeNode(); String result = InfoGain.IsPure(InfoGain.getTarget(data); if(result != null) node.setNodeName(leafNode); node.setTargetFunValue(result); return node; if(attributeList.size() = 0) node.se
5、tTargetFunValue(result); return node; else InfoGain gain = new InfoGain(data,attributeList); double maxGain = 0.0; int attrIndex = -1; for(int i=0;iattributeList.size();i+) double tempGain = gain.getGainRatio(i); if(maxGain tempGain) maxGain = tempGain; attrIndex = i; System.out.println(选择出的最大增益率属性为
6、: + attributeList.get(attrIndex); node.setAttributeValue(attributeList.get(attrIndex); ListArrayList resultData = null; Map attrvalueMap = gain.getAttributeValue(attrIndex); for(Map.Entry entry : attrvalueMap.entrySet() resultData = gain.getData4Value(entry.getKey(), attrIndex); TreeNode leafNode =
7、null; System.out.println(当前为+attributeList.get(attrIndex)+的+entry.getKey()+分支。); if(resultData.size() = 0) leafNode = new TreeNode(); leafNode.setNodeName(attributeList.get(attrIndex); leafNode.setTargetFunValue(result); leafNode.setAttributeValue(entry.getKey(); else for (int j = 0; j resultData.si
8、ze(); j+) resultData.get(j).remove(attrIndex); ArrayList resultAttr = new ArrayList(attributeList); resultAttr.remove(attrIndex); leafNode = createDT(resultData,resultAttr); node.getChildTreeNode().add(leafNode); node.getPathName().add(entry.getKey(); return node; class TreeNode private String attri
9、buteValue; private List childTreeNode; private List pathName; private String targetFunValue; private String nodeName; public TreeNode(String nodeName) this.nodeName = nodeName; this.childTreeNode = new ArrayList(); this.pathName = new ArrayList(); public TreeNode() this.childTreeNode = new ArrayList
10、(); this.pathName = new ArrayList(); public String getAttributeValue() return attributeValue; public void setAttributeValue(String attributeValue) this.attributeValue = attributeValue; public List getChildTreeNode() return childTreeNode; public void setChildTreeNode(List childTreeNode) this.childTre
11、eNode = childTreeNode; public String getTargetFunValue() return targetFunValue; public void setTargetFunValue(String targetFunValue) this.targetFunValue = targetFunValue; public String getNodeName() return nodeName; public void setNodeName(String nodeName) this.nodeName = nodeName; public List getPa
12、thName() return pathName; public void setPathName(List pathName) this.pathName = pathName; 增益率计算类(取log的时候底用的是e,没用2)package C45Test;import java.util.ArrayList;import java.util.HashMap;import java.util.HashSet;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.Set;/C
13、 4.5 实现public class InfoGain private ListArrayList data; private List attribute; public InfoGain(ListArrayList data,List attribute) this.data = new ArrayListArrayList(); for(int i=0;idata.size();i+) List temp = data.get(i); ArrayList t = new ArrayList(); for(int j=0;jtemp.size();j+) t.add(temp.get(j
14、); this.data.add(t); this.attribute = new ArrayList(); for(int k=0;kattribute.size();k+) this.attribute.add(attribute.get(k); /*this.data = data; this.attribute = attribute;*/ /获得熵 public double getEntropy() Map targetValueMap = getTargetValue(); Set targetkey = targetValueMap.keySet(); double entro
15、py = 0.0; for(String key : targetkey) double p = MathUtils.div(double)targetValueMap.get(key), (double)data.size(); entropy += (-1) * p * Math.log(p); return entropy; /获得InfoA public double getInfoAttribute(int attributeIndex) Map attributeValueMap = getAttributeValue(attributeIndex); double infoA =
16、 0.0; for(Map.Entry entry : attributeValueMap.entrySet() int size = data.size(); double attributeP = MathUtils.div(double)entry.getValue() , (double) size); Map targetValueMap = getAttributeValueTargetValue(entry.getKey(),attributeIndex); long totalCount = 0L; for(Map.Entry entryValue :targetValueMa
17、p.entrySet() totalCount += entryValue.getValue(); double valueSum = 0.0; for(Map.Entry entryTargetValue : targetValueMap.entrySet() double p = MathUtils.div(double)entryTargetValue.getValue(), (double)totalCount); valueSum += Math.log(p) * p; infoA += (-1) * attributeP * valueSum; return infoA; /得到属
18、性值在决策空间的比例 public Map getAttributeValueTargetValue(String attributeName,int attributeIndex) Map targetValueMap = new HashMap(); IteratorArrayList iterator = data.iterator(); while(iterator.hasNext() List tempList = iterator.next(); if(attributeName.equalsIgnoreCase(tempList.get(attributeIndex) int s
19、ize = tempList.size(); String key = tempList.get(size - 1); Long value = targetValueMap.get(key); targetValueMap.put(key, value != null ? +value :1L); return targetValueMap; /得到属性在决策空间上的数量 public Map getAttributeValue(int attributeIndex) Map attributeValueMap = new HashMap(); for(ArrayList note : da
20、ta) String key = note.get(attributeIndex); Long value = attributeValueMap.get(key); attributeValueMap.put(key, value != null ? +value :1L); return attributeValueMap; public ListArrayList getData4Value(String attrValue,int attrIndex) ListArrayList resultData = new ArrayListArrayList(); IteratorArrayL
21、ist iterator = data.iterator(); for(;iterator.hasNext();) ArrayList templist = iterator.next(); if(templist.get(attrIndex).equalsIgnoreCase(attrValue) ArrayList temp = (ArrayList) templist.clone(); resultData.add(temp); return resultData; /获得增益率 public double getGainRatio(int attributeIndex) return
22、MathUtils.div(getGain(attributeIndex), getSplitInfo(attributeIndex); /获得增益量 public double getGain(int attributeIndex) return getEntropy() - getInfoAttribute(attributeIndex); /得到惩罚因子 public double getSplitInfo(int attributeIndex) Map attributeValueMap = getAttributeValue(attributeIndex); double split
23、A = 0.0; for(Map.Entry entry : attributeValueMap.entrySet() int size = data.size(); double attributeP = MathUtils.div(double)entry.getValue() , (double) size); splitA += attributeP * Math.log(attributeP) * (-1); return splitA; /得到目标函数在当前集合范围内的离散的值 public Map getTargetValue() Map targetValueMap = new
24、 HashMap(); IteratorArrayList iterator = data.iterator(); while(iterator.hasNext() List tempList = iterator.next(); String key = tempList.get(tempList.size() - 1); Long value = targetValueMap.get(key); targetValueMap.put(key, value != null ? +value : 1L); return targetValueMap; /获得TARGET值 public sta
25、tic List getTarget(ListArrayList data) List list = new ArrayList(); for(ArrayList temp : data) int index = temp.size() -1; String value = temp.get(index); list.add(value); return list; /判断当前纯度是否100% public static String IsPure(List list) Set set = new HashSet(); for(String name :list) set.add(name);
26、 if(set.size() 1) return null; Iterator iterator = set.iterator(); return iterator.next(); 测试类,数据集读取以上的分别放到2个List中。package C45Test;import java.util.ArrayList;import java.util.List;import C45Test.DecisionTree.TreeNode;public class MainC45 private static final ListArrayList dataList = new ArrayListArr
27、ayList(); private static final List attributeList = new ArrayList(); public static void main(String args) DecisionTree dt = new DecisionTree(); TreeNode node = dt.createDT(configData(),configAttribute(); System.out.println(); 大数运算工具类package C45Test;import java.math.BigDecimal;public abstract class M
28、athUtils /默认余数长度 private static final int DIV_SCALE = 10; /受限于DOUBLE长度 public static double add(double value1,double value2) BigDecimal big1 = new BigDecimal(String.valueOf(value1); BigDecimal big2 = new BigDecimal(String.valueOf(value2); return big1.add(big2).doubleValue(); /大数加法 public static double add(String value1,String value2) BigDecimal big1 = new BigD
温馨提示
- 1. 本站所有资源如无特殊说明,都需要本地电脑安装OFFICE2007和PDF阅读器。图纸软件为CAD,CAXA,PROE,UG,SolidWorks等.压缩文件请下载最新的WinRAR软件解压。
- 2. 本站的文档不包含任何第三方提供的附件图纸等,如果需要附件,请联系上传者。文件的所有权益归上传用户所有。
- 3. 本站RAR压缩包中若带图纸,网页内容里面会有图纸预览,若没有图纸预览就没有图纸。
- 4. 未经权益所有人同意不得将文件中的内容挪作商业或盈利用途。
- 5. 人人文库网仅提供信息存储空间,仅对用户上传内容的表现方式做保护处理,对用户上传分享的文档内容本身不做任何修改或编辑,并不能对任何下载内容负责。
- 6. 下载文件中如有侵权或不适当内容,请与我们联系,我们立即纠正。
- 7. 本站不保证下载资源的准确性、安全性和完整性, 同时也不承担用户因使用这些下载资源对自己和他人造成任何形式的伤害或损失。
最新文档
- 甘肃应急预案管理办法
- 2025全国两会应知应会测试题及答案
- 出租车计费程序课件
- 出租车安全培训课件
- 学生安全行为守则汇编
- 北海辅警考试题库(含答案)
- 注册会计师考试经济法科目试题及答案指导
- 2025年无房产证买卖合同
- 2025共有产权房租赁合同
- 冲床安全生产培训课件
- 加油站消防安全管理制度
- 2025-2030中国光保真度(Li-Fi)行业市场发展趋势与前景展望战略研究报告
- 全册知识点(素材)六年级上册科学青岛版
- 学校食堂管理工作资料汇编
- 心衰患者的麻醉处理1例课件
- 电竞酒店服务礼仪与职业素养培训
- 《公路运输网络规划》课件
- 物业客服管家工作内容培训
- 2025年机关事业单位工人招聘《机动车驾驶员》技师 考试题库与参考答案
- 2025年长江陆水枢纽工程局有限公司招聘笔试参考题库含答案解析
- 2024年技能竞赛钳工理论考试题库600题(含答案)
评论
0/150
提交评论