




版权说明:本文档由用户提供并上传,收益归属内容提供方,若内容存在侵权,请进行举报或认领
文档简介
1、精选优质文档-倾情为你奉上机器学习课内实验报告(1) ID算法实现决策树2015 - 2016学年 第 2 学期专业:智能科学与技术班级:智能1301班学号:姓名:张争辉一、 实验目的:理解ID3算法的基本原理,并且编程实现。二、 实验要求:使用C/C+/MATLAB实现ID3算法。输入:若干行,每行 5 个字符串,表示Outlook Temperature Humidity Wind Play ball如上表。输出:决策树。实验结果如下:输入: Sunny Hot High Weak No Sunny Hot High Strong No Overcast Hot High Weak Yes
2、 Rain Mild High Weak Yes Rain Cool Normal Weak Yes Rain Cool Normal Strong No Overcast Cool Normal Strong Yes Sunny Mild High Weak No Sunny Cool Normal Weak Yes Rain Mild Normal Weak Yes Sunny Mild Normal Strong Yes Overcast Mild High Strong Yes Overcast Hot Normal Weak Yes Rain Mild High Strong No输
3、出:Outlook Rain Wind Strong No Weak Yes Overcast Yes Sunny Humidity Normal Yes High No 三、 具体实现:实现算法如下:#include <iostream>#include <fstream>#include <math.h>#include <string>using namespace std;#define ROW 14#define COL 5#define log2 0.typedef struct TNode char data15; char wei
4、ght15; TNode * firstchild,*nextsibling;*tree;typedef struct LNode char OutLook15; char Temperature15; char Humidity15; char Wind15; char PlayTennis5; LNode *next;*link;typedef struct AttrNode char attributes15;/属性 int attr_Num;/属性的个数 AttrNode *next;*Attributes;char * ExamplesROWCOL = /"OverCast
5、","Cool","High","Strong","No", /"Rain","Hot","Normal","Strong","Yes", "Sunny","Hot","High","Weak","No", "Sunny","Hot","High",
6、"Strong","No", "OverCast","Hot","High","Weak","Yes", "Rain","Mild","High","Weak","Yes", "Rain","Cool","Normal","Weak","Yes", "
7、;Rain","Cool","Normal","Strong","No", "OverCast","Cool","Normal","Strong","Yes", "Sunny","Mild","High","Weak","No", "Sunny","Cool",&quo
8、t;Normal","Weak","Yes", "Rain","Mild","Normal","Weak","Yes", "Sunny","Mild","Normal","Strong","Yes", "OverCast","Mild","Normal","Strong"
9、;,"Yes", "OverCast","Hot","Normal","Weak","Yes", "Rain","Mild","High","Strong","No" ;char * Attributes_kind4 = "OutLook","Temperature","Humidity","Wind&quo
10、t;int Attr_kind4 = 3,3,2,2;char * OutLook_kind3 = "Sunny","OverCast","Rain"char * Temperature_kind3 = "Hot","Mild","Cool"char * Humidity_kind2 = "High","Normal"char * Wind_kind2 = "Weak","Strong"/*int
11、 i_Exampple145 = 0,0,0,0,1, 0,0,0,1,1, 1,0,0,1,0, 2,1,0,0,0, 2,2,1,0,0, 2,2,1,1,1, 1,2,1,1,0, 0,1,0,0,1, 0,2,1,0,0, 2,1,1,0,0, 0,1,1,1,0, 1,1,1,1,0, 1,1,1,0,0, 2,1,0,0,1 ;*/void treelists(tree T);void InitAttr(Attributes &attr_link,char * Attributes_kind,int Attr_kind);void InitLink(link &L,
12、char * ExamplesCOL);void ID3(tree &T,link L,link Target_Attr,Attributes attr);void PN_Num(link L,int &positve,int &negative);double Gain(int positive,int negative,char * atrribute,link L,Attributes attr_L);void main() link LL,p; Attributes attr_L,q; tree T; T = new TNode; T->firstchil
13、d = T->nextsibling = NULL; strcpy(T->weight,""); strcpy(T->data,""); attr_L = new AttrNode; attr_L->next = NULL; LL = new LNode; LL->next = NULL; /成功建立两个链表 InitLink(LL,Examples); InitAttr(attr_L,Attributes_kind,Attr_kind); ID3(T,LL,NULL,attr_L); cout<<"决
14、策树以广义表形式输出如下:"<<endl; treelists(T);/以广义表的形式输出树/cout<<Gain(9,5,"OutLook",LL,attr_L)<<endl; cout<<endl;/以广义表的形式输出树void treelists(tree T) tree p; if(!T) return; cout<<""<<T->weight<<"" cout<<T->data; p = T->firs
15、tchild; if (p) cout<<"(" while (p) treelists(p); p = p->nextsibling; if (p)cout<<',' cout<<")" void InitAttr(Attributes &attr_link,char * Attributes_kind,int Attr_kind) Attributes p; for (int i =0;i < 4;i+) p = new AttrNode; p->next = NULL;
16、strcpy(p->attributes,Attributes_kindi); p->attr_Num = Attr_kindi; p->next = attr_link->next; attr_link->next = p; void InitLink(link &LL,char * ExamplesCOL) link p; for (int i = 0;i < ROW;i+) p = new LNode; p->next = NULL; strcpy(p->OutLook,Examplesi0); strcpy(p->Tempe
17、rature,Examplesi1); strcpy(p->Humidity,Examplesi2); strcpy(p->Wind,Examplesi3); strcpy(p->PlayTennis,Examplesi4); p->next = LL->next; LL->next = p; void PN_Num(link L,int &positve,int &negative) positve = 0; negative = 0; link p; p = L->next; while (p) if (strcmp(p->P
18、layTennis,"No") = 0) negative+; else if(strcmp(p->PlayTennis,"Yes") = 0) positve+; p = p->next; /计算信息增益/link L: 样本集合S/attr_L:属性集合double Gain(int positive,int negative,char * atrribute,link L,Attributes attr_L) int atrr_kinds;/每个属性中的值的个数 Attributes p = attr_L->next; link
19、q = L->next; int attr_th = 0;/第几个属性 while (p) if (strcmp(p->attributes,atrribute) = 0) atrr_kinds = p->attr_Num; break; p = p->next; attr_th+; double entropy,gain=0; double p1 = 1.0*positive/(positive + negative); double p2 = 1.0*negative/(positive + negative); entropy = -p1*log(p1)/log2
20、 - p2*log(p2)/log2;/集合熵 gain = entropy; /获取每个属性值在训练样本中出现的个数 /获取每个属性值所对应的正例和反例的个数 /声明一个3*atrr_kinds的数组 int * kinds= new int * 3; for (int j =0;j < 3;j+) kindsj = new intatrr_kinds;/保存每个属性值在训练样本中出现的个数 /初始化 for (int j = 0;j< 3;j+) for (int i =0;i < atrr_kinds;i+) kindsji = 0; while (q) if (str
21、cmp("OutLook",atrribute) = 0) for (int i = 0;i < atrr_kinds;i+) if(strcmp(q->OutLook,OutLook_kindi) = 0) kinds0i+; if(strcmp(q->PlayTennis,"Yes") = 0) kinds1i+; else kinds2i+; else if (strcmp("Temperature",atrribute) = 0) for (int i = 0;i < atrr_kinds;i+) if
22、(strcmp(q->Temperature,Temperature_kindi) = 0) kinds0i+; if(strcmp(q->PlayTennis,"Yes") = 0) kinds1i+; else kinds2i+; else if (strcmp("Humidity",atrribute) = 0) for (int i = 0;i < atrr_kinds;i+) if(strcmp(q->Humidity,Humidity_kindi) = 0) kinds0i+; if(strcmp(q->Play
23、Tennis,"Yes") = 0) kinds1i+;/ else kinds2i+; else if (strcmp("Wind",atrribute) = 0) for (int i = 0;i < atrr_kinds;i+) if(strcmp(q->Wind,Wind_kindi) = 0) kinds0i+; if(strcmp(q->PlayTennis,"Yes") = 0) kinds1i+; else kinds2i+; q = q->next; /计算信息增益 double * gain
24、_kind = new doubleatrr_kinds; int positive_kind = 0,negative_kind = 0; for (int j = 0;j < atrr_kinds;j+) if (kinds0j != 0 && kinds1j != 0 && kinds2j != 0) p1 = 1.0*kinds1j/kinds0j; p2 = 1.0*kinds2j/kinds0j; gain_kindj = -p1*log(p1)/log2-p2*log(p2)/log2; gain = gain - (1.0*kinds0j/
25、(positive + negative)*gain_kindj; else gain_kindj = 0; return gain;/在ID3算法中的训练样本子集合与属性子集合的链表需要进行清空void FreeLink(link &Link) link p,q; p = Link->next; Link->next = NULL; while (p) q = p; p = p->next; free(q); void ID3(tree &T,link L,link Target_Attr,Attributes attr) Attributes p,max,
26、attr_child,p1; link q,link_child,q1; tree r,tree_p; int positive =0,negative =0; PN_Num(L,positive,negative); /初始化两个子集合 attr_child = new AttrNode; attr_child->next = NULL; link_child = new LNode; link_child->next = NULL; if (positive = 0)/全是反例 strcpy(T->data,"No"); return; else if
27、( negative = 0)/全是正例 strcpy(T->data,"Yes"); return; p = attr->next; /属性链表 double gain,g = 0; /*/ /* 建立属性子集合与训练样本子集合有两个方案: 一:在原来链表的基础上进行删除; 二:另外申请空间进行存储子集合; 采用第二种方法虽然浪费了空间,但也省了很多事情,避免了变量之间的应用混乱 */ /*/ if(p) while (p) gain = Gain(positive,negative,p->attributes,L,attr); cout<<
28、p->attributes<<" "<<gain<<endl; if(gain > g) g = gain; max = p;/寻找信息增益最大的属性 p = p->next; strcpy(T->data,max->attributes);/增加决策树的节点 cout<<"信息增益最大的属性:max->attributes = "<<max->attributes<<endl<<endl; /下面开始建立决策树 /创建属性子集合
29、 p = attr->next; while (p) if (strcmp(p->attributes,max->attributes) != 0) p1 = new AttrNode; strcpy(p1->attributes,p->attributes); p1->attr_Num = p->attr_Num; p1->next = NULL; p1->next = attr_child->next; attr_child->next = p1; p = p->next; /需要区分出是哪一种属性 /建立每一层的第一
30、个节点 if (strcmp("OutLook",max->attributes) = 0) r = new TNode; r->firstchild = r->nextsibling = NULL; strcpy(r->weight,OutLook_kind0); T->firstchild = r; /获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child q = L->next; while (q) if (strcmp(q->OutLook,OutLook_kind0) = 0) q1
31、 = new LNode; strcpy(q1->OutLook,q->OutLook); strcpy(q1->Humidity,q->Humidity); strcpy(q1->Temperature,q->Temperature); strcpy(q1->Wind,q->Wind); strcpy(q1->PlayTennis,q->PlayTennis); q1->next = NULL; q1->next = link_child->next; link_child->next = q1; q = q
32、->next; else if (strcmp("Temperature",max->attributes) = 0) r = new TNode; r->firstchild = r->nextsibling = NULL; strcpy(r->weight,Temperature_kind0); T->firstchild = r; /获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child q = L->next; while (q) if (strcmp(q->Temperatur
33、e,Temperature_kind0) = 0) q1 = new LNode; strcpy(q1->OutLook,q->OutLook); strcpy(q1->Humidity,q->Humidity); strcpy(q1->Temperature,q->Temperature); strcpy(q1->Wind,q->Wind); strcpy(q1->PlayTennis,q->PlayTennis); q1->next = NULL; q1->next = link_child->next; lin
34、k_child->next = q1; q = q->next; else if (strcmp("Humidity",max->attributes) = 0) r = new TNode; r->firstchild = r->nextsibling = NULL; strcpy(r->weight,Humidity_kind0); T->firstchild = r; /获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child q = L->next; while (q) if (s
35、trcmp(q->Humidity,Humidity_kind0) = 0) q1 = new LNode; strcpy(q1->OutLook,q->OutLook); strcpy(q1->Humidity,q->Humidity); strcpy(q1->Temperature,q->Temperature); strcpy(q1->Wind,q->Wind); strcpy(q1->PlayTennis,q->PlayTennis); q1->next = NULL; q1->next = link_chi
36、ld->next; link_child->next = q1; q = q->next; else if (strcmp("Wind",max->attributes) = 0) r = new TNode; r->firstchild = r->nextsibling = NULL; strcpy(r->weight,Wind_kind0); T->firstchild = r; /获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child q = L->next; while (
37、q) if (strcmp(q->Wind,Wind_kind0) = 0) q1 = new LNode; strcpy(q1->OutLook,q->OutLook); strcpy(q1->Humidity,q->Humidity); strcpy(q1->Temperature,q->Temperature); strcpy(q1->Wind,q->Wind); strcpy(q1->PlayTennis,q->PlayTennis); q1->next = NULL; q1->next = link_chi
38、ld->next; link_child->next = q1; q = q->next; int p = 0,n = 0; PN_Num(link_child,p,n); if (p != 0 && n != 0) ID3(T->firstchild,link_child,Target_Attr,attr_child); FreeLink(link_child); else if(p = 0) strcpy(T->firstchild->data,"No"); FreeLink(link_child); /strcpy(
39、T->firstchild->data,q1->PlayTennis);/-此处应该需要修改-:) else if(n = 0) strcpy(T->firstchild->data,"Yes"); FreeLink(link_child); /建立每一层上的其他节点 tree_p = T->firstchild; for (int i = 1;i < max->attr_Num;i+) /需要区分出是哪一种属性 if (strcmp("OutLook",max->attributes) = 0) r
40、 = new TNode; r->firstchild = r->nextsibling = NULL; strcpy(r->weight,OutLook_kindi); tree_p->nextsibling = r; /获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child q = L->next; while (q) if (strcmp(q->OutLook,OutLook_kindi) = 0) q1 = new LNode; strcpy(q1->OutLook,q->OutLook); str
41、cpy(q1->Humidity,q->Humidity); strcpy(q1->Temperature,q->Temperature); strcpy(q1->Wind,q->Wind); strcpy(q1->PlayTennis,q->PlayTennis); q1->next = NULL; q1->next = link_child->next; link_child->next = q1; q = q->next; else if (strcmp("Temperature",max-
42、>attributes) = 0) r = new TNode; r->firstchild = r->nextsibling = NULL; strcpy(r->weight,Temperature_kindi); tree_p->nextsibling = r; /获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child q = L->next; while (q) if (strcmp(q->Temperature,Temperature_kindi) = 0) q1 = new LNode; strcpy(
43、q1->OutLook,q->OutLook); strcpy(q1->Humidity,q->Humidity); strcpy(q1->Temperature,q->Temperature); strcpy(q1->Wind,q->Wind); strcpy(q1->PlayTennis,q->PlayTennis); q1->next = NULL; q1->next = link_child->next; link_child->next = q1; q = q->next; else if (s
44、trcmp("Humidity",max->attributes) = 0) r = new TNode; r->firstchild = r->nextsibling = NULL; strcpy(r->weight,Humidity_kindi); tree_p->nextsibling = r; /获取与属性值相关的训练样例Example(vi),建立一个新的训练样本链表link_child q = L->next; while (q) if (strcmp(q->Humidity,Humidity_kindi) = 0) q1 = new LNode; strcpy(q1->OutLook,q->OutLook); strcpy(q1->Humidity,q->Humidity); strcpy(q1->Temp
温馨提示
- 1. 本站所有资源如无特殊说明,都需要本地电脑安装OFFICE2007和PDF阅读器。图纸软件为CAD,CAXA,PROE,UG,SolidWorks等.压缩文件请下载最新的WinRAR软件解压。
- 2. 本站的文档不包含任何第三方提供的附件图纸等,如果需要附件,请联系上传者。文件的所有权益归上传用户所有。
- 3. 本站RAR压缩包中若带图纸,网页内容里面会有图纸预览,若没有图纸预览就没有图纸。
- 4. 未经权益所有人同意不得将文件中的内容挪作商业或盈利用途。
- 5. 人人文库网仅提供信息存储空间,仅对用户上传内容的表现方式做保护处理,对用户上传分享的文档内容本身不做任何修改或编辑,并不能对任何下载内容负责。
- 6. 下载文件中如有侵权或不适当内容,请与我们联系,我们立即纠正。
- 7. 本站不保证下载资源的准确性、安全性和完整性, 同时也不承担用户因使用这些下载资源对自己和他人造成任何形式的伤害或损失。
最新文档
- 农村合资合作建房合同范本
- 不标准水电施工合同范本
- 内江小区保安合同范本
- 东亮驾照合同范本
- 两方协议合同范本
- 买房假合同范例
- 农村秸秆销售合同范本
- 合同范本押金退还
- 化工重苯销售合同范例
- 卡车购车合同范本
- 无人机固定翼行业报告
- 《茎和叶》名师课件
- 玻璃体腔注射-操作流程和注意事项(特选参考)课件
- JGJ114-2014 钢筋焊接网混凝土结构技术规程
- 110kV升压站构支架组立施工方案
- CONSORT2010流程图(FlowDiagram)【模板】文档
- 前厅月工作计划
- 生物医学工程伦理 课件全套 第1-10章 生物医学工程与伦理-医学技术选择与应用的伦理问题
- 新战略营销课件
- 人文地理学考试名词解释全套
- 统编版五年级下册第五单元 习作:形形色色的人 课件 (共16张PPT)
评论
0/150
提交评论