数据结构课程设计-文章中单词查找.docx

上传人：伐*** IP属地：宁夏上传时间：2019-01-23 格式：DOCX 页数：21 大小：775.51KB 积分：15 举报 版权申诉

已阅读5页，还剩16页未读，继续免费阅读

版权说明：本文档由用户提供并上传，收益归属内容提供方，若内容存在侵权，请进行举报或认领

文档简介

数据结构课程设计报告题目：文章中单词查找专业：软件工程起止时间： 2015.07.062015.07.10 集美大学计算机工程学院软件工程教研室制2015 年 7 月 09日目录一引言1二系统功能和原始数据1三程序总体设计1四功能模块函数设计和调试5五程序清单9六课程设计总结19七参考资料19一、引言本课程实习是在理论学习和基础实验的基础上，学习开发规模较大的程序，运用已掌握应用数据结构来解决实际问题的基本方法。通过对程序结构的分析，设计和开发的过程，提高综合应用数据结构的能力，为学习软件专业课程创建较扎实的理论基础和实践基础。本次任务是设计一个能够实现从存放多篇英文文章的文件目录中读取文件，并统计各篇文章单词个数，或查找指定单词在各篇文章中出现的位置的程序，并鼓励开发者通过多种渠道提高程序运行效率。通过本次课程设计不仅可以加深对所学知识的理解也提高了把知识应用到实践中能力。二、系统功能和原始数据(1)系统功能有多篇英文文章存放于文件中，每行约等于80个字符，每页约等于40行。分别放于多个文件中，并实现如下功能：（1）统计文件的个数，统计每篇文章的单词个数，统计文章中不重复单词个数（2）查找一个单词所在的文章，页号，行号，测试三种情况可能的时间，该单词仅出现一次，出现多次，不出现。(2)原始数据存放于文件中的多篇英文文章三、程序总体设计（1）数据结构主程序下定义数据结构：typedef structchar datamaxlength; /串数据域int length;/串长度sqstring;/串类型typedef structunsigned int count; /已查找到的个数int localpage100; /存放页码int localrow100; / 存放行数searchout; /暂存单词100个查找结果wordcount类下定义数据结构：typedef struct nodechar data; /节点数据unsigned int count /出现次数struct node *next; /next指向下一个字母节点struct node *sibling; / sibling指向相邻节点word;/统计下节点类型typedef structint top;/栈顶word* datamaxlength; /栈数据域stack; /输出统计结果字母栈类型（2）模块划分和层次结构划分和层次结构（3）函数原型清单主程序下函数清单函数原型：void countallpaper() 函数功能：统计所有文件中单词函数原型：void search()函数功能：查找函数函数原型：void getfiles(unsigned int &files_num,char filenamemaxfiles20)函数功能：获取文件夹下所有txt文件：files_num为文件数，filename为文件名数组函数原型：unsigned int winapi count(pvoid param)函数功能：线程函数用于统计单词函数原型：void outfile(searchout &s,file *fout)函数功能：将暂存于searchout的查找结果输入文件函数原型：int mate(sqstring t)函数功能：查找单词sqstring t返回查找时间wordcount类下函数清单public：构造函数：wordcount(char* filesname)函数功能：统计filesname文件的所有单词函数原型：unsigned int getusedtime(void)函数功能：获取wordcount对象的usedtime值private：函数原型：void init(word *&node,int ch)函数功能：使用ch字符初始化节点函数原型：void insert(word *&p,char ch)函数功能：在p节点前插入值域为ch的节点函数原型：void jointree(word *&p,char ch)函数功能：将字母字符ch插入p节点的next域函数原型：void fout(word *r,int &d,file* fout)函数功能：将树r保存通过fout输出（4）程序总体框架（5）程序组织stdafx.h（主程序头文件）：定义程序需要使用到的常量、结构体，引用程序所需要的文件。article.cpp（主程序源文件）：主要包含主程序的函数具体实现方法。wordcount.h（类头文件）：主要包含类成员、方法声明，定义结构体和常数等。wordcount.cpp（类源文件）：主要包含函数具体实现方法。四、功能模块函数设计和调试（一）算法描述：（1）统计单词模块统计单词模块使用树形存储结构（如下图）和类设计实现，实现过程如下：类（wordcount）在构造时（使用wordcount（char *）构造函数），每次从文件中读取一个字符时，并判断是否为字母字符。若为字母字符，则将在当前节点（假设为q）的next域（下一层）的节点中查找到一个字母的合适位置（使每一层有序），若该层没有该字母的节点则新建节点。反复读取字符，直到读取一个非字母字符，则将q-count加1，q=root（root为根）。反复上述过程直到文件读完整篇文章。最后，将树遍历并输到文件树形存储结构统计单词模块流程图（2）查找单词模块查找单词时，实现过程如下：每次从文件中读取字符以填满数组temp80，再将数组头指针（first）置为0，使数组尾指针（rear）指向最后一个非字母字符并将其置为n（如下图1）。将目标单词（sqstring t）与数组比较，若匹配长度为t.length，比较第t.length是否为字母，若并不为字母则记录单词位置，否则不记录。数组头指针指向下一个非字母字符，若该字符为n，行加1。读取下一个字符，重复匹配过程直到first=rear或tempfirst=eof则退出本循环。将temp数组中rear后的字符复制到temp前面单元，重复上诉过程，直到文章结束。图1查找单词模块流程图（二）程序调试（1）统计单词（2）查找单词五、程序清单主程序：stdafx.h#pragma once#include targetver.h#include #include #include#include#include #include #include #include #include #define maxfiles 30/文件夹下最多文件数#define filenamelength 20/文件名最大长度#define pagesize 40/单页最大行数#define maxlength 30/单词最大长度#define rowsize 80/单行最大长度#define pathin f:testpaper#define pathout f:testcounttypedef structchar datamaxlength;int length;sqstring;/串typedef structunsigned int count;int localpage100,localrow100;/存放页码，行数searchout;/暂存单词查找结果typedef structchar filesmaxfiles/2filenamelength;/unsigned int files_num;/文件数param;/用于多线程传参数article.cpp#include stdafx.h#include wordcount.hvoid getfiles(unsigned int &files_num,char filenamemaxfiles20)/获取f:testpaper文件夹下所有txt文件 files_num文件数 filename文件名数组long handle;files_num=0;struct _finddata_t fileinfo;char path50=pathin;strcat(path,*.txt);if(handle=_findfirst(path,&fileinfo)=-1l)printf(没有找到匹配的项目n);elseint i=0;for(;fileii!=0;i+)/将fileii复制到filename数组filenamefiles_numi=fileii;filenamefiles_numi=0;/添加0files_num+;while( _findnext(handle,&fileinfo)=0&files_nummaxfiles)for(i=0;fileii!=0;i+)/将fileii复制到filename数组filenamefiles_numi=fileii;filenamefiles_numi=0;/添加0files_num+;_findclose(handle);/线程函数unsigned int winapi count(pvoid param)wordcount(char*)param);printf(222);return null;void countallpaper()/统计所有文件char filesmaxfiles20;/存放文件列表数组maxfiles为可读最大文件数，20为文件名长度unsigned int files_num=0;getfiles(files_num,files);handle hthreadmaxfiles;unsigned int threadid;/设置参数clock_t t1=clock();/创建线程printf();for(int i=0;ifiles_num;i+)hthreadi=(handle)_beginthreadex(null,0,count,filesi,0,&threadid);/等待线程结束for(int i=0;ifiles_num;i+)waitforsingleobject(hthreadi, -1);closehandle(hthreadi);printf(n已统计该文件夹下%d个txt文件单词个数,n本次统计耗时:%dmsn,files_num,clock()-t1);printf(注：统计结果存放于%s目录下n,pathout);void initsqstring(sqstring &s,char str)int i;for(i=0;stri!=0;i+)s.datai=stri;s.datai=0;s.length=i;/输出查找结果到文件void outfile(searchout &s,file *fout)/if(s.count=0)fputs(there is no word which you want in this article.n,fout);return;for(int i=0;is.count%100;i+)fprintf(fout,%dt%dn,s.localpagei,s.localrowi);/查找单词int mate(sqstring t)clock_t t1;int j=0,page,row,k,rear; /k为数组头指针，first为尾指针bool flag;char ch,filesmaxfiles20,infile50,outfile50=pathout;char temp81;/files存放文件列表数组maxfiles为可读最大文件数，20为文件名长度strcat(outfile,searchresult.txt);unsigned int files_num=0,totaltime=0,usedtime;/文件个数getfiles(files_num,files);searchout s;file *fin,*fout;if(fout=fopen(outfile,w)=null)printf(cant creat a new txt:%sn,outfile);return -1;fprintf(fout,word %s in every article location(pagetrow):n,t.data);for(unsigned int i=0;ifiles_num;i+)/打开文件strcpy(infile,f:testpaper);strcat(infile,filesi);if(fin=fopen(infile,r)=null)printf(cant open %sn,infile);return -1;/初始化s.count=0;page=row=1;rear=79;flag=false;/文件末尾标志fprintf(fout,nnword in %s location:n,filesi);t1=clock();while(!flag)rear+;for(k=0;rear80;k+,rear+)tempk=temprear;for(;k=0;k-)/k指向最后一个非字母if(tempkz|(tempkz&tempk=0)tempk=n;/换为换行符k=0;/k指向temp头ch=tempk;/开始匹配，直到文章结束为止while(k(rear)&ch!=eof) /rear为temp可读长度j=0;while(ch=t.dataj&jt.length)ch=temp+k;/指向下一个字符j+;if(j=t.length) /匹配if(chz|(chz&ch=a&ch=z)|(ch=a)&kpagesize)/pagesize为最大行数row=1;page+;ch=temp+k;if(ch=eof)flag=true;usedtime=clock()-t1;outfile(s,fout);/读完一篇输出sfprintf(fout,the numbre of words:%d,s.count);fprintf(fout,nused time:%d,usedtime);fclose(fin);totaltime+=usedtime;fprintf(fout,nnused totaltime:%d,totaltime);fclose(fout);return totaltime;void search()char searchmaxlength;sqstring t;int time;printf(请输入要查找的单词：);fflush(stdin);/清空输入缓冲区scanf(%s,search);initsqstring(t,search);time=mate(t);if(time!=-1)printf(已统计可匹配单词：%sn,search);printf(本次统计耗时:%d msn,time);elseprintf(统计失败，请检查后再试！n);int _tmain(int argc, _tchar* argv)char ifcontinue;int choose;doprintf(welcomen);printf(*请将需要统计的英文文章存放于%s目录下n,pathin);printf(ntttt1、统计单词nntttt2、查找单词n);printf(n);printf(请输入功能号(1/2)：);fflush(stdin);/清空输入缓冲区scanf(%d,&choose);if(choose=1)countallpaper();else if(choose=2)search();printf(统计结果已存放于%ssearchresult.txt目录下文档.n,pathout);elseprintf(输入不正确！n);fflush(stdin);printf(n是否继续？(y/y)：);scanf(%c,&ifcontinue);std:system(cls);while(ifcontinue=y|ifcontinue=y);printf(nntt程序已退出.nnn);printf(n);system(pause);return 0;wordcount类：wordcount.h#pragma oncetypedef struct nodechar data;unsigned int count;struct node *next,*sibling;word;typedef structint top;word* datamaxlength;stack;class wordcountprivate:stack st;word *root;unsigned int usedtime;void initstack(stack &s);void init(word *&node,int ch);void insert(word *&p,char ch);void jointree(word *&p,char ch);void fout(word *r,int &d,file* fout,unsigned int &sum);public:wordcount();wordcount(char* filesname);unsigned int getusedtime(void);wordcount(void);wordcount.cpp#include stdafx.h#include wordcount.hwordcount:wordcount()wordcount:wordcount(char* filename)char outfile50=pathout,infile50=pathin;/文件名char s120;char ch;/文件读入字符,outtxt_name文件名int i=0,d=0;/d为不同单词数,j为当前行截止下标unsigned int sum=0;root=new word();word* p=root;initstack(st);clock_t t1;/生成文件目录和文件名for(i=0;filenamei!=.;i+)s1i=filenamei;s1i=0;strcat(outfile,s1);strcat(outfile,count.txt0);strcat(infile,filename);file *fin,*fout;if(fin=fopen(infile,r)=null)printf(cant open %sn,infile);elseif(fout=fopen(outfile,w)=null)printf(cant creat a new txt: %sn,outfile);t1=clock();/开始计时while(ch=fgetc(fin)!=eof)/开始统计个数jointree(p,ch);usedtime=clock()-t1;fclose(fin);fout(root-next,d,fout,sum);fprintf(fout,%s%d,the number of all words:,sum);fprintf(fout,%s%d,nthe number of different words:,d);fprintf(fout,%s%d,nused time:,usedtime);/将计时差存入txtfclose(fout);wordcount:wordcount(void)void wordcount:initstack(stack &s)s.top=-1;void wordcount:init(word *&node,int ch)node=new word();node-data=ch;node-count=0;node-sibling=node-next=null;void wordcount:insert(word *&p,char ch)/p节点前插入新节点word *newnode;/p节点后插入新节点newnodeinit(newnode,p-data);newnode-sibling=p-sibling;p-sibling=newnode;newnode-count=p-count;/将p的值赋予newnodenewnode-next=p-next;p-next=null;/p节点赋新值p-data=ch;p-count=0;void wordcount:jointree(word *&p,char ch)/将读入的字母字符加入树if(ch=a&ch=z)|(ch=a)/是否为字母if(p-next=null)word *newnode;/创建并初始化节点init(newnode,ch);p-next=newnode;p=p-next;return;p=p-next;while(p-sibling!=null&p-datasibling;if(p-data=ch)return;else if(p-datach)insert(p,ch);/在p前面插入新节点return;else if(p-sibling=null)word *newnode;/创建并初始化节点init(newnode,ch);p-sibling=newnode;p=newnode;elsep-count+;p=root;void wordcount:fout(word *r,int &d

人人文库> 全部分类> 专业文献 > 工程机械

温馨提示

1. 本站所有资源如无特殊说明，都需要本地电脑安装OFFICE2007和PDF阅读器。图纸软件为CAD,CAXA,PROE,UG,SolidWorks等.压缩文件请下载最新的WinRAR软件解压。
2. 本站的文档不包含任何第三方提供的附件图纸等，如果需要附件，请联系上传者。文件的所有权益归上传用户所有。
3. 本站RAR压缩包中若带图纸，网页内容里面会有图纸预览，若没有图纸预览就没有图纸。
4. 未经权益所有人同意不得将文件中的内容挪作商业或盈利用途。
5. 人人文库网仅提供信息存储空间，仅对用户上传内容的表现方式做保护处理，对用户上传分享的文档内容本身不做任何修改或编辑，并不能对任何下载内容负责。
6. 下载文件中如有侵权或不适当内容，请与我们联系，我们立即纠正。
7. 本站不保证下载资源的准确性、安全性和完整性, 同时也不承担用户因使用这些下载资源对自己和他人造成任何形式的伤害或损失。

数据结构课程设计-文章中单词查找.docx

文档简介

温馨提示

最新文档

评论

数据结构课程设计-文章中单词查找.docx

文档简介

温馨提示

最新文档

评论

相关文档