机器学习实验报告_第1页
机器学习实验报告_第2页
机器学习实验报告_第3页
机器学习实验报告_第4页
机器学习实验报告_第5页
已阅读5页,还剩14页未读 继续免费阅读

下载本文档

版权说明:本文档由用户提供并上传,收益归属内容提供方,若内容存在侵权,请进行举报或认领

文档简介

1、机器学习课内实验报告(1) ID 算法实现决策树2015 - 2016 学年 第 2学期专业:智能科学与技术班级:智能 1301 班学号:06133029姓名:张争辉精选文档一、实验目的:理解 ID3 算法的基本原理,并且编程实现。二、实验要求:使用 C/C+/MATLAB 实现 ID3 算法。输入:若干行,每行 5个字符串,表示OutlookTemperatureHumidityWindPlay ball如上表。输出:决策树。实验结果如下:输入:SunnyHotHighWeakNoSunnyHotHighStrongNoOvercastHotHighWeakYesRainMildHighWe

2、akYesRainCoolNormalWeakYesRainCoolNormalStrongNoOvercastCoolNormalStrongYesSunnyMildHighWeakNoSunnyCoolNormalWeakYesRainMildNormalWeakYesSunnyMildNormalStrongYesOvercastMildHighStrongYesOvercastHotNormalWeakYesRainMildHighStrongNo输出:OutlookRainWindStrongNoWeakYesOvercastYesSunnyHumidity2精选文档NormalYe

3、sHighNo三、具体实现:实现算法如下:#include <iostream>#include <fstream>#include <math.h>#include <string>using namespace std;#define ROW 14#define COL 5#define log2 0.69314718055typedef struct TNodechar data15;char weight15;TNode * firstchild,*nextsibling;*tree;typedef struct LNodecharOut

4、Look15;charTemperature15;charHumidity15;charWind15;charPlayTennis5;LNode *next;*link;typedef struct AttrNodecharattributes15;/属性intattr_Num;/属性的个数AttrNode *next;*Attributes;char * ExamplesROWCOL = /"OverCast","Cool","High","Strong","No",/ "Rain&

5、quot;,"Hot","Normal","Strong","Yes","Sunny","Hot","High","Weak","No","Sunny","Hot","High","Strong","No","OverCast","Hot","High",&q

6、uot;Weak","Yes",3精选文档"Rain","Mild","High","Weak","Yes","Rain","Cool","Normal","Weak","Yes","Rain","Cool","Normal","Strong","No","O

7、verCast","Cool","Normal","Strong","Yes","Sunny","Mild","High","Weak","No","Sunny","Cool","Normal","Weak","Yes","Rain","Mild","Norm

8、al","Weak","Yes","Sunny","Mild","Normal","Strong","Yes","OverCast","Mild","Normal","Strong","Yes","OverCast","Hot","Normal","Weak",&quo

9、t;Yes","Rain","Mild","High","Strong","No"char * Attributes_kind4 = "OutLook","Temperature","Humidity","Wind"intAttr_kind4 = 3,3,2,2;char * OutLook_kind3 = "Sunny","OverCast","Rain&

10、quot; char * Temperature_kind3 = "Hot","Mild","Cool" char * Humidity_kind2 = "High","Normal"char * Wind_kind2 = "Weak","Strong"/*int i_Exampple145 = 0,0,0,0,1,0,0,0,1,1,1,0,0,1,0,2,1,0,0,0,2,2,1,0,0,2,2,1,1,1,1,2,1,1,0,0,1,0,0,1,0

11、,2,1,0,0,2,1,1,0,0,0,1,1,1,0,1,1,1,1,0,1,1,1,0,0,2,1,0,0,1;*/void treelists(tree T);void InitAttr(Attributes &attr_link,char * Attributes_kind,int Attr_kind); void InitLink(link &L,char * ExamplesCOL);void ID3(tree &T,link L,link Target_Attr,Attributes attr); void PN_Num(link L,int &

12、positve,int &negative);double Gain(int positive,int negative,char * atrribute,link L,Attributes attr_L);void main()4精选文档link LL,p;Attributes attr_L,q;tree T;T = new TNode;T->firstchild = T->nextsibling = NULL;strcpy(T->weight,"");strcpy(T->data,"");attr_L = new At

13、trNode;attr_L->next = NULL;LL = new LNode; LL->next = NULL;/成功建立两个链表InitLink(LL,Examples);InitAttr(attr_L,Attributes_kind,Attr_kind);ID3(T,LL,NULL,attr_L);cout<<"决策树以广义表形式输出如下:"<<endl;treelists(T);/以广义表的形式输出树/ cout<<Gain(9,5,"OutLook",LL,attr_L)<<end

14、l;cout<<endl;/以广义表的形式输出树void treelists(tree T)tree p;if(!T)return;cout<<""<<T->weight<<""cout<<T->data;p = T->firstchild;if (p)cout<<"("while (p)5精选文档treelists(p);p = p->nextsibling;if (p)cout<<','cout<<

15、;")"void InitAttr(Attributes &attr_link,char * Attributes_kind,int Attr_kind)Attributes p;for (int i =0;i < 4;i+)p = new AttrNode;p->next = NULL;strcpy(p->attributes,Attributes_kindi);p->attr_Num = Attr_kindi;p->next = attr_link->next;attr_link->next = p;void InitL

16、ink(link &LL,char * ExamplesCOL)link p;for (int i = 0;i < ROW;i+)p = new LNode;p->next = NULL;strcpy(p->OutLook,Examplesi0);strcpy(p->Temperature,Examplesi1);strcpy(p->Humidity,Examplesi2);strcpy(p->Wind,Examplesi3);strcpy(p->PlayTennis,Examplesi4);p->next = LL->next;L

17、L->next = p;void PN_Num(link L,int &positve,int &negative)6精选文档positve = 0;negative = 0;link p;p = L->next;while (p)if (strcmp(p->PlayTennis,"No") = 0)negative+;else if(strcmp(p->PlayTennis,"Yes") = 0)positve+;p = p->next;/计算信息增益/link L:样本集合 S/attr_L :属性集合d

18、ouble Gain(int positive,int negative,char * atrribute,link L,Attributes attr_L)int atrr_kinds;/ 每个属性中的值的个数Attributes p = attr_L->next;link q = L->next;int attr_th = 0;/第几个属性while (p)if (strcmp(p->attributes,atrribute) = 0)atrr_kinds = p->attr_Num;break;p = p->next;attr_th+;double entr

19、opy,gain=0;double p1 = 1.0*positive/(positive + negative);double p2 = 1.0*negative/(positive + negative);entropy = -p1*log(p1)/log2 - p2*log(p2)/log2;/ 集合熵7精选文档gain = entropy;/获取每个属性值在训练样本中出现的个数/获取每个属性值所对应的正例和反例的个数/声明一个 3*atrr_kinds 的数组int * kinds= new int * 3;for (int j =0;j < 3;j+)kindsj = new

20、intatrr_kinds;/ 保存每个属性值在训练样本中出现的个数/初始化for (int j = 0;j< 3;j+)for (int i =0;i < atrr_kinds;i+)kindsji = 0;while (q)if (strcmp("OutLook",atrribute) = 0)for (int i = 0;i < atrr_kinds;i+)if(strcmp(q->OutLook,OutLook_kindi) = 0)kinds0i+;if(strcmp(q->PlayTennis,"Yes") =

21、0)kinds1i+;elsekinds2i+;else if (strcmp("Temperature",atrribute) = 0)for (int i = 0;i < atrr_kinds;i+)if(strcmp(q->Temperature,Temperature_kindi) = 0)kinds0i+;8精选文档if(strcmp(q->PlayTennis,"Yes") = 0)kinds1i+;elsekinds2i+;else if (strcmp("Humidity",atrribute) =

22、0)for (int i = 0;i < atrr_kinds;i+)if(strcmp(q->Humidity,Humidity_kindi) = 0)kinds0i+;if(strcmp(q->PlayTennis,"Yes") = 0)kinds1i+;/elsekinds2i+;else if (strcmp("Wind",atrribute) = 0)for (int i = 0;i < atrr_kinds;i+)if(strcmp(q->Wind,Wind_kindi) = 0)kinds0i+;if(strc

23、mp(q->PlayTennis,"Yes") = 0)kinds1i+;elsekinds2i+;q = q->next;/计算信息增益double * gain_kind = new doubleatrr_kinds;int positive_kind = 0,negative_kind = 0;9精选文档for (int j = 0;j < atrr_kinds;j+)if (kinds0j != 0 && kinds1j != 0 && kinds2j != 0)p1 = 1.0*kinds1j/kinds0j;p2

24、 = 1.0*kinds2j/kinds0j;gain_kindj = -p1*log(p1)/log2-p2*log(p2)/log2;gain = gain - (1.0*kinds0j/(positive + negative)*gain_kindj;elsegain_kindj = 0;return gain;/在 ID3 算法中的训练样本子集合与属性子集合的链表需要进行清空 void FreeLink(link &Link)link p,q;p = Link->next;Link->next = NULL;while (p)q = p;p = p->next

25、;free(q);void ID3(tree &T,link L,link Target_Attr,Attributes attr)Attributes p,max,attr_child,p1;link q,link_child,q1;tree r,tree_p;int positive =0,negative =0;PN_Num(L,positive,negative);/初始化两个子集合10精选文档attr_child = new AttrNode;attr_child->next = NULL;link_child = new LNode;link_child->ne

26、xt = NULL;if (positive = 0)/ 全是反例strcpy(T->data,"No");return;else if( negative = 0)/全是正例strcpy(T->data,"Yes");return;p = attr->next; /属性链表double gain,g = 0;/* */* 建立属性子集合与训练样本子集合有两个方案:一:在原来链表的基础上进行删除;二:另外申请空间进行存储子集合;采用第二种方法虽然浪费了空间,但也省了很多事情,避免了变量之间的应用混乱*/* */if(p)while (p

27、)gain = Gain(positive,negative,p->attributes,L,attr);cout<<p->attributes<<""<<gain<<endl;if(gain > g)g = gain;max = p;/寻找信息增益最大的属性11精选文档p = p->next;strcpy(T->data,max->attributes);/增加决策树的节点cout<<" 信 息 增 益 最 大 的 属 性 : max->attributes =

28、 "<<max->attributes<<endl<<endl;/下面开始建立决策树/创建属性子集合p = attr->next;while (p)if (strcmp(p->attributes,max->attributes) != 0)p1 = new AttrNode;strcpy(p1->attributes,p->attributes);p1->attr_Num = p->attr_Num;p1->next = NULL;p1->next = attr_child->ne

29、xt;attr_child->next = p1;p = p->next;/需要区分出是哪一种属性/建立每一层的第一个节点if (strcmp("OutLook",max->attributes) = 0)r = new TNode;r->firstchild = r->nextsibling = NULL;strcpy(r->weight,OutLook_kind0);T->firstchild = r;/获取与属性值相关的训练样例 Example(vi), 建立一个新的训练样本链表 link_childq = L->nex

30、t;while (q)if (strcmp(q->OutLook,OutLook_kind0) = 0)q1 = new LNode;strcpy(q1->OutLook,q->OutLook);strcpy(q1->Humidity,q->Humidity);12精选文档strcpy(q1->Temperature,q->Temperature);strcpy(q1->Wind,q->Wind);strcpy(q1->PlayTennis,q->PlayTennis);q1->next = NULL;q1->nex

31、t = link_child->next;link_child->next = q1;q = q->next;else if (strcmp("Temperature",max->attributes) = 0)r = new TNode;r->firstchild = r->nextsibling = NULL;strcpy(r->weight,Temperature_kind0);T->firstchild = r;/获取与属性值相关的训练样例 Example(vi), 建立一个新的训练样本链表 link_childq =

32、 L->next;while (q)if (strcmp(q->Temperature,Temperature_kind0) = 0)q1 = new LNode;strcpy(q1->OutLook,q->OutLook);strcpy(q1->Humidity,q->Humidity);strcpy(q1->Temperature,q->Temperature);strcpy(q1->Wind,q->Wind);strcpy(q1->PlayTennis,q->PlayTennis);q1->next = NUL

33、L;q1->next = link_child->next;link_child->next = q1;q = q->next;else if (strcmp("Humidity",max->attributes) = 0)r = new TNode;r->firstchild = r->nextsibling = NULL;strcpy(r->weight,Humidity_kind0);T->firstchild = r;13精选文档/获取与属性值相关的训练样例 Example(vi), 建立一个新的训练样本链表 l

34、ink_childq = L->next;while (q)if (strcmp(q->Humidity,Humidity_kind0) = 0)q1 = new LNode;strcpy(q1->OutLook,q->OutLook);strcpy(q1->Humidity,q->Humidity);strcpy(q1->Temperature,q->Temperature);strcpy(q1->Wind,q->Wind);strcpy(q1->PlayTennis,q->PlayTennis);q1->next

35、 = NULL;q1->next = link_child->next;link_child->next = q1;q = q->next;else if (strcmp("Wind",max->attributes) = 0)r = new TNode;r->firstchild = r->nextsibling = NULL;strcpy(r->weight,Wind_kind0);T->firstchild = r;/获取与属性值相关的训练样例 Example(vi), 建立一个新的训练样本链表 link_chil

36、dq = L->next;while (q)if (strcmp(q->Wind,Wind_kind0) = 0)q1 = new LNode;strcpy(q1->OutLook,q->OutLook);strcpy(q1->Humidity,q->Humidity);strcpy(q1->Temperature,q->Temperature);strcpy(q1->Wind,q->Wind);strcpy(q1->PlayTennis,q->PlayTennis);q1->next = NULL;q1->n

37、ext = link_child->next;14精选文档link_child->next = q1;q = q->next;int p = 0,n = 0;PN_Num(link_child,p,n);if (p != 0 && n != 0)ID3(T->firstchild,link_child,Target_Attr,attr_child);FreeLink(link_child);else if(p = 0)strcpy(T->firstchild->data,"No");FreeLink(link_child)

38、;/ strcpy(T->firstchild->data,q1->PlayTennis);/-此处应该需要修改-:)else if(n = 0)strcpy(T->firstchild->data,"Yes");FreeLink(link_child);/建立每一层上的其他节点tree_p = T->firstchild;for (int i = 1;i < max->attr_Num;i+)/需要区分出是哪一种属性if (strcmp("OutLook",max->attributes) = 0)

39、r = new TNode;r->firstchild = r->nextsibling = NULL;strcpy(r->weight,OutLook_kindi);tree_p->nextsibling = r;/获取与属性值相关的训练样例 Example(vi), 建立一个新的训练样本链表 link_childq = L->next;while (q)if (strcmp(q->OutLook,OutLook_kindi) = 0)15精选文档q1 = new LNode;strcpy(q1->OutLook,q->OutLook);str

40、cpy(q1->Humidity,q->Humidity);strcpy(q1->Temperature,q->Temperature);strcpy(q1->Wind,q->Wind);strcpy(q1->PlayTennis,q->PlayTennis);q1->next = NULL;q1->next = link_child->next;link_child->next = q1;q = q->next;else if (strcmp("Temperature",max->attr

41、ibutes) = 0)r = new TNode;r->firstchild = r->nextsibling = NULL;strcpy(r->weight,Temperature_kindi);tree_p->nextsibling = r;/获取与属性值相关的训练样例 Example(vi), 建立一个新的训练样本链表 link_childq = L->next;while (q)if (strcmp(q->Temperature,Temperature_kindi) = 0)q1 = new LNode;strcpy(q1->OutLook,

42、q->OutLook);strcpy(q1->Humidity,q->Humidity);strcpy(q1->Temperature,q->Temperature);strcpy(q1->Wind,q->Wind);strcpy(q1->PlayTennis,q->PlayTennis);q1->next = NULL;q1->next = link_child->next;link_child->next = q1;q = q->next;else if (strcmp("Humidity&quo

43、t;,max->attributes) = 0)16精选文档r = new TNode;r->firstchild = r->nextsibling = NULL;strcpy(r->weight,Humidity_kindi);tree_p->nextsibling = r;/获取与属性值相关的训练样例 Example(vi), 建立一个新的训练样本链表 link_childq = L->next;while (q)if (strcmp(q->Humidity,Humidity_kindi) = 0)q1 = new LNode;strcpy(q1->OutLook,q->OutLook);strcpy(q1->Humidity,q->Humidity);strcpy(q1->Temperature,q-&g

温馨提示

  • 1. 本站所有资源如无特殊说明,都需要本地电脑安装OFFICE2007和PDF阅读器。图纸软件为CAD,CAXA,PROE,UG,SolidWorks等.压缩文件请下载最新的WinRAR软件解压。
  • 2. 本站的文档不包含任何第三方提供的附件图纸等,如果需要附件,请联系上传者。文件的所有权益归上传用户所有。
  • 3. 本站RAR压缩包中若带图纸,网页内容里面会有图纸预览,若没有图纸预览就没有图纸。
  • 4. 未经权益所有人同意不得将文件中的内容挪作商业或盈利用途。
  • 5. 人人文库网仅提供信息存储空间,仅对用户上传内容的表现方式做保护处理,对用户上传分享的文档内容本身不做任何修改或编辑,并不能对任何下载内容负责。
  • 6. 下载文件中如有侵权或不适当内容,请与我们联系,我们立即纠正。
  • 7. 本站不保证下载资源的准确性、安全性和完整性, 同时也不承担用户因使用这些下载资源对自己和他人造成任何形式的伤害或损失。

评论

0/150

提交评论