




版权说明:本文档由用户提供并上传,收益归属内容提供方,若内容存在侵权,请进行举报或认领
文档简介
1、精选优质文档-倾情为你奉上机器学习课内实验报告(1) ID算法实现决策树2015 - 2016学年 第 2 学期专业:智能科学与技术班级:智能1301班学号:姓名:张争辉一、 实验目的:理解ID3算法的基本原理,并且编程实现。二、 实验要求:使用C/C+/MATLAB实现ID3算法。输入:若干行,每行 5 个字符串,表示Outlook Temperature Humidity Wind Play ball如上表。输出:决策树。实验结果如下:输入: Sunny Hot High Weak No Sunny Hot High Strong No Overcast Hot High Weak Yes
2、 Rain Mild High Weak Yes Rain Cool Normal Weak Yes Rain Cool Normal Strong No Overcast Cool Normal Strong Yes Sunny Mild High Weak No Sunny Cool Normal Weak Yes Rain Mild Normal Weak Yes Sunny Mild Normal Strong Yes Overcast Mild High Strong Yes Overcast Hot Normal Weak Yes Rain Mild High Strong No输
3、出:Outlook Rain Wind Strong No Weak Yes Overcast Yes Sunny Humidity Normal Yes High No 三、 具体实现:实现算法如下:#include <iostream>#include <fstream>#include <math.h>#include <string>using namespace std;#define ROW 14#define COL 5#define log2 0.typedef struct TNode char data15; char wei
4、ght15; TNode * firstchild,*nextsibling;*tree;typedef struct LNode char OutLook15; char Temperature15; char Humidity15; char Wind15; char PlayTennis5; LNode *next;*link;typedef struct AttrNode char attributes15;/属性 int attr_Num;/属性的个数 AttrNode *next;*Attributes;char * ExamplesROWCOL = /"OverCast
5、","Cool","High","Strong","No", /"Rain","Hot","Normal","Strong","Yes", "Sunny","Hot","High","Weak","No", "Sunny","Hot","High",
6、"Strong","No", "OverCast","Hot","High","Weak","Yes", "Rain","Mild","High","Weak","Yes", "Rain","Cool","Normal","Weak","Yes", "
7、;Rain","Cool","Normal","Strong","No", "OverCast","Cool","Normal","Strong","Yes", "Sunny","Mild","High","Weak","No", "Sunny","Cool",&quo
8、t;Normal","Weak","Yes", "Rain","Mild","Normal","Weak","Yes", "Sunny","Mild","Normal","Strong","Yes", "OverCast","Mild","Normal","Strong"
9、;,"Yes", "OverCast","Hot","Normal","Weak","Yes", "Rain","Mild","High","Strong","No" ;char * Attributes_kind4 = "OutLook","Temperature","Humidity","Wind&quo
10、t;int Attr_kind4 = 3,3,2,2;char * OutLook_kind3 = "Sunny","OverCast","Rain"char * Temperature_kind3 = "Hot","Mild","Cool"char * Humidity_kind2 = "High","Normal"char * Wind_kind2 = "Weak","Strong"/*int
11、 i_Exampple145 = 0,0,0,0,1, 0,0,0,1,1, 1,0,0,1,0, 2,1,0,0,0, 2,2,1,0,0, 2,2,1,1,1, 1,2,1,1,0, 0,1,0,0,1, 0,2,1,0,0, 2,1,1,0,0, 0,1,1,1,0, 1,1,1,1,0, 1,1,1,0,0, 2,1,0,0,1 ;*/void treelists(tree T);void InitAttr(Attributes &attr_link,char * Attributes_kind,int Attr_kind);void InitLink(link &L,
12、char * ExamplesCOL);void ID3(tree &T,link L,link Target_Attr,Attributes attr);void PN_Num(link L,int &positve,int &negative);double Gain(int positive,int negative,char * atrribute,link L,Attributes attr_L);void main() link LL,p; Attributes attr_L,q; tree T; T = new TNode; T->firstchil
13、d = T->nextsibling = NULL; strcpy(T->weight,""); strcpy(T->data,""); attr_L = new AttrNode; attr_L->next = NULL; LL = new LNode; LL->next = NULL; /成功建立两个链表 InitLink(LL,Examples); InitAttr(attr_L,Attributes_kind,Attr_kind); ID3(T,LL,NULL,attr_L); cout<<"决
14、策树以广义表形式输出如下:"<<endl; treelists(T);/以广义表的形式输出树/cout<<Gain(9,5,"OutLook",LL,attr_L)<<endl; cout<<endl;/以广义表的形式输出树void treelists(tree T) tree p; if(!T) return; cout<<""<<T->weight<<"" cout<<T->data; p = T->firs
15、tchild; if (p) cout<<"(" while (p) treelists(p); p = p->nextsibling; if (p)cout<<',' cout<<")" void InitAttr(Attributes &attr_link,char * Attributes_kind,int Attr_kind) Attributes p; for (int i =0;i < 4;i+) p = new AttrNode; p->next = NULL;
16、strcpy(p->attributes,Attributes_kindi); p->attr_Num = Attr_kindi; p->next = attr_link->next; attr_link->next = p; void InitLink(link &LL,char * ExamplesCOL) link p; for (int i = 0;i < ROW;i+) p = new LNode; p->next = NULL; strcpy(p->OutLook,Examplesi0); strcpy(p->Tempe
17、rature,Examplesi1); strcpy(p->Humidity,Examplesi2); strcpy(p->Wind,Examplesi3); strcpy(p->PlayTennis,Examplesi4); p->next = LL->next; LL->next = p; void PN_Num(link L,int &positve,int &negative) positve = 0; negative = 0; link p; p = L->next; while (p) if (strcmp(p->P
18、layTennis,"No") = 0) negative+; else if(strcmp(p->PlayTennis,"Yes") = 0) positve+; p = p->next; /计算信息增益/link L: 样本集合S/attr_L:属性集合double Gain(int positive,int negative,char * atrribute,link L,Attributes attr_L) int atrr_kinds;/每个属性中的值的个数 Attributes p = attr_L->next; link
19、q = L->next; int attr_th = 0;/第几个属性 while (p) if (strcmp(p->attributes,atrribute) = 0) atrr_kinds = p->attr_Num; break; p = p->next; attr_th+; double entropy,gain=0; double p1 = 1.0*positive/(positive + negative); double p2 = 1.0*negative/(positive + negative); entropy = -p1*log(p1)/log2
20、 - p2*log(p2)/log2;/集合熵 gain = entropy; /获取每个属性值在训练样本中出现的个数 /获取每个属性值所对应的正例和反例的个数 /声明一个3*atrr_kinds的数组 int * kinds= new int * 3; for (int j =0;j < 3;j+) kindsj = new intatrr_kinds;/保存每个属性值在训练样本中出现的个数 /初始化 for (int j = 0;j< 3;j+) for (int i =0;i < atrr_kinds;i+) kindsji = 0; while (q) if (str
21、cmp("OutLook",atrribute) = 0) for (int i = 0;i < atrr_kinds;i+) if(strcmp(q->OutLook,OutLook_kindi) = 0) kinds0i+; if(strcmp(q->PlayTennis,"Yes") = 0) kinds1i+; else kinds2i+; else if (strcmp("Temperature",atrribute) = 0) for (int i = 0;i < atrr_kinds;i+) if
22、(strcmp(q->Temperature,Temperature_kindi) = 0) kinds0i+; if(strcmp(q->PlayTennis,"Yes") = 0) kinds1i+; else kinds2i+; else if (strcmp("Humidity",atrribute) = 0) for (int i = 0;i < atrr_kinds;i+) if(strcmp(q->Humidity,Humidity_kindi) = 0) kinds0i+; if(strcmp(q->Play
23、Tennis,"Yes") = 0) kinds1i+;/ else kinds2i+; else if (strcmp("Wind",atrribute) = 0) for (int i = 0;i < atrr_kinds;i+) if(strcmp(q->Wind,Wind_kindi) = 0) kinds0i+; if(strcmp(q->PlayTennis,"Yes") = 0) kinds1i+; else kinds2i+; q = q->next; /计算信息增益 double * gain
24、_kind = new doubleatrr_kinds; int positive_kind = 0,negative_kind = 0; for (int j = 0;j < atrr_kinds;j+) if (kinds0j != 0 && kinds1j != 0 && kinds2j != 0) p1 = 1.0*kinds1j/kinds0j; p2 = 1.0*kinds2j/kinds0j; gain_kindj = -p1*log(p1)/log2-p2*log(p2)/log2; gain = gain - (1.0*kinds0j/
25、(positive + negative)*gain_kindj; else gain_kindj = 0; return gain;/在ID3算法中的训练样本子集合与属性子集合的链表需要进行清空void FreeLink(link &Link) link p,q; p = Link->next; Link->next = NULL; while (p) q = p; p = p->next; free(q); void ID3(tree &T,link L,link Target_Attr,Attributes attr) Attributes p,max,
26、attr_child,p1; link q,link_child,q1; tree r,tree_p; int positive =0,negative =0; PN_Num(L,positive,negative); /初始化两个子集合 attr_child = new AttrNode; attr_child->next = NULL; link_child = new LNode; link_child->next = NULL; if (positive = 0)/全是反例 strcpy(T->data,"No"); return; else if
27、( negative = 0)/全是正例 strcpy(T->data,"Yes"); return; p = attr->next; /属性链表 double gain,g = 0; /*/ /* 建立属性子集合与训练样本子集合有两个方案: 一:在原来链表的基础上进行删除; 二:另外申请空间进行存储子集合; 采用第二种方法虽然浪费了空间,但也省了很多事情,避免了变量之间的应用混乱 */ /*/ if(p) while (p) gain = Gain(positive,negative,p->attributes,L,attr); cout<<
28、p->attributes<<" "<<gain<<endl; if(gain > g) g = gain; max = p;/寻找信息增益最大的属性 p = p->next; strcpy(T->data,max->attributes);/增加决策树的节点 cout<<"信息增益最大的属性:max->attributes = "<<max->attributes<<endl<<endl; /下面开始建立决策树 /创建属性子集合
29、 p = attr->next; while (p) if (strcmp(p->attributes,max->attributes) != 0) p1 = new AttrNode; strcpy(p1->attributes,p->attributes); p1->attr_Num = p->attr_Num; p1->next = NULL; p1->next = attr_child->next; attr_child->next = p1; p = p->next; /需要区分出是哪一种属性 /建立每一层的第一
30、个节点 if (strcmp("OutLook",max->attributes) = 0) r = new TNode; r->firstchild = r->nextsibling = NULL; strcpy(r->weight,OutLook_kind0); T->firstchild = r; /获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child q = L->next; while (q) if (strcmp(q->OutLook,OutLook_kind0) = 0) q1
31、 = new LNode; strcpy(q1->OutLook,q->OutLook); strcpy(q1->Humidity,q->Humidity); strcpy(q1->Temperature,q->Temperature); strcpy(q1->Wind,q->Wind); strcpy(q1->PlayTennis,q->PlayTennis); q1->next = NULL; q1->next = link_child->next; link_child->next = q1; q = q
32、->next; else if (strcmp("Temperature",max->attributes) = 0) r = new TNode; r->firstchild = r->nextsibling = NULL; strcpy(r->weight,Temperature_kind0); T->firstchild = r; /获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child q = L->next; while (q) if (strcmp(q->Temperatur
33、e,Temperature_kind0) = 0) q1 = new LNode; strcpy(q1->OutLook,q->OutLook); strcpy(q1->Humidity,q->Humidity); strcpy(q1->Temperature,q->Temperature); strcpy(q1->Wind,q->Wind); strcpy(q1->PlayTennis,q->PlayTennis); q1->next = NULL; q1->next = link_child->next; lin
34、k_child->next = q1; q = q->next; else if (strcmp("Humidity",max->attributes) = 0) r = new TNode; r->firstchild = r->nextsibling = NULL; strcpy(r->weight,Humidity_kind0); T->firstchild = r; /获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child q = L->next; while (q) if (s
35、trcmp(q->Humidity,Humidity_kind0) = 0) q1 = new LNode; strcpy(q1->OutLook,q->OutLook); strcpy(q1->Humidity,q->Humidity); strcpy(q1->Temperature,q->Temperature); strcpy(q1->Wind,q->Wind); strcpy(q1->PlayTennis,q->PlayTennis); q1->next = NULL; q1->next = link_chi
36、ld->next; link_child->next = q1; q = q->next; else if (strcmp("Wind",max->attributes) = 0) r = new TNode; r->firstchild = r->nextsibling = NULL; strcpy(r->weight,Wind_kind0); T->firstchild = r; /获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child q = L->next; while (
37、q) if (strcmp(q->Wind,Wind_kind0) = 0) q1 = new LNode; strcpy(q1->OutLook,q->OutLook); strcpy(q1->Humidity,q->Humidity); strcpy(q1->Temperature,q->Temperature); strcpy(q1->Wind,q->Wind); strcpy(q1->PlayTennis,q->PlayTennis); q1->next = NULL; q1->next = link_chi
38、ld->next; link_child->next = q1; q = q->next; int p = 0,n = 0; PN_Num(link_child,p,n); if (p != 0 && n != 0) ID3(T->firstchild,link_child,Target_Attr,attr_child); FreeLink(link_child); else if(p = 0) strcpy(T->firstchild->data,"No"); FreeLink(link_child); /strcpy(
39、T->firstchild->data,q1->PlayTennis);/-此处应该需要修改-:) else if(n = 0) strcpy(T->firstchild->data,"Yes"); FreeLink(link_child); /建立每一层上的其他节点 tree_p = T->firstchild; for (int i = 1;i < max->attr_Num;i+) /需要区分出是哪一种属性 if (strcmp("OutLook",max->attributes) = 0) r
40、 = new TNode; r->firstchild = r->nextsibling = NULL; strcpy(r->weight,OutLook_kindi); tree_p->nextsibling = r; /获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child q = L->next; while (q) if (strcmp(q->OutLook,OutLook_kindi) = 0) q1 = new LNode; strcpy(q1->OutLook,q->OutLook); str
41、cpy(q1->Humidity,q->Humidity); strcpy(q1->Temperature,q->Temperature); strcpy(q1->Wind,q->Wind); strcpy(q1->PlayTennis,q->PlayTennis); q1->next = NULL; q1->next = link_child->next; link_child->next = q1; q = q->next; else if (strcmp("Temperature",max-
42、>attributes) = 0) r = new TNode; r->firstchild = r->nextsibling = NULL; strcpy(r->weight,Temperature_kindi); tree_p->nextsibling = r; /获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child q = L->next; while (q) if (strcmp(q->Temperature,Temperature_kindi) = 0) q1 = new LNode; strcpy(
43、q1->OutLook,q->OutLook); strcpy(q1->Humidity,q->Humidity); strcpy(q1->Temperature,q->Temperature); strcpy(q1->Wind,q->Wind); strcpy(q1->PlayTennis,q->PlayTennis); q1->next = NULL; q1->next = link_child->next; link_child->next = q1; q = q->next; else if (s
44、trcmp("Humidity",max->attributes) = 0) r = new TNode; r->firstchild = r->nextsibling = NULL; strcpy(r->weight,Humidity_kindi); tree_p->nextsibling = r; /获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child q = L->next; while (q) if (strcmp(q->Humidity,Humidity_kindi) = 0) q1 = new LNode; strcpy(q1->OutLook,q->OutLook); strcpy(q1->Humidity,q->Humidity); strcpy(q1->Temp
温馨提示
- 1. 本站所有资源如无特殊说明,都需要本地电脑安装OFFICE2007和PDF阅读器。图纸软件为CAD,CAXA,PROE,UG,SolidWorks等.压缩文件请下载最新的WinRAR软件解压。
- 2. 本站的文档不包含任何第三方提供的附件图纸等,如果需要附件,请联系上传者。文件的所有权益归上传用户所有。
- 3. 本站RAR压缩包中若带图纸,网页内容里面会有图纸预览,若没有图纸预览就没有图纸。
- 4. 未经权益所有人同意不得将文件中的内容挪作商业或盈利用途。
- 5. 人人文库网仅提供信息存储空间,仅对用户上传内容的表现方式做保护处理,对用户上传分享的文档内容本身不做任何修改或编辑,并不能对任何下载内容负责。
- 6. 下载文件中如有侵权或不适当内容,请与我们联系,我们立即纠正。
- 7. 本站不保证下载资源的准确性、安全性和完整性, 同时也不承担用户因使用这些下载资源对自己和他人造成任何形式的伤害或损失。
最新文档
- 矿上招工考试题及答案
- 肯德基考试题目及答案
- 解析卷-青岛版9年级数学下册期末试题含完整答案详解(全优)
- 考试题及答案普工
- 2025年教师招聘之《幼儿教师招聘》通关练习试题附参考答案详解【基础题】
- 静止运动考试题及答案
- 水解蒸煮工标准化作业考核试卷及答案
- 押题宝典教师招聘之《小学教师招聘》通关考试题库(精练)附答案详解
- 经验护航考试题及答案
- 积材工职业技能考核试卷及答案
- 地毯更换简易施工合同协议书
- 实验小学二年级体育集体备课教案
- 西方文化概论(第二版)课件全套 曹顺庆 第0-6章 绪论 西方文化的渊源与流变、西方文学 -西方社会生活与习俗
- 李白课件教学课件
- 百年郎酒试题专项测试题及答案
- 托管中心学生托管合同协议书
- 高中生物近5年生物高考真题分类和解析(神经调节)
- 押金管理制度
- 人教版(2024) 七年级上册英语培优补差教学工作计划
- 职业素养提升第2版(大学生职业素养指导课程)全套教学课件
- JJF 1033-2023 计量标准考核规范
评论
0/150
提交评论