版权说明:本文档由用户提供并上传,收益归属内容提供方,若内容存在侵权,请进行举报或认领
文档简介
用CilkPlus实现并行性#include<stdio.h>#include<windows.h>#include<mmsystem.h>#include<math.h>#include<cilk/cilk.h>#include<cilk/reducer_opadd.h>constlongintVERYBIG=100000;//*******************************************************************************************intmain(void){ inti; DWORDstarttime,elapsedtime; //---------------------------------------------------------------------- //Outputastartmessage //printf("NoneParallelTimingsfor%diterations\n\n",VERYBIG); printf("CilkPlusParallelTimingsfor%diterations\n\n",VERYBIG); //repeatexperimentseveraltimes for(i=0;i<6;i++) { //getstartingtime starttime=timeGetTime(); //resetchecksum&runningtotal cilk::reducer_opadd<longint>sum(0); cilk::reducer_opadd<double>total(0.0); //WorkLoop,dosomeworkbyloopingVERYBIGtimes cilk_for(intj=0;j<VERYBIG;j++) { longintk; doublesumx,sumy; //incrementchecksum sum+=1; //Calculatefirstarithmeticseries sumx=0.0; for(k=0;k<j;k++) sumx=sumx+(double)k; //Calculatesecondarithmeticseries sumy=0.0; for(k=j;k>0;k--) sumy=sumy+(double)k; if(sumx>0.0)total=total+1.0/sqrt(sumx); if(sumy>0.0)total=total+1.0/sqrt(sumy); } //getendingtimeanduseittodetermineelapsedtime elapsedtime=timeGetTime()-starttime; //reportelapsedtime printf("TimeElapsed%10dmSecsTotal=%lfCheckSum=%ld\n",(int)elapsedtime,total.get_value(),sum.get_value()); } //returnintegerasrequiredbyfunctionheader return0;}Windows多线程SemaphoreOpenMP错误检测#include<stdio.h>#include<omp.h>staticlongnum_steps=10000*4;doublestep,gsum1,gsum2;voidSafeAdd(doublesum1,doublesum2,omp_lock_t&lock1,omp_lock_t&lock2){ //lockgsum1andupdate omp_set_lock(&lock1); gsum1+=sum1; //lockgsum2andupdate omp_set_lock(&lock2); gsum2+=sum2; omp_unset_lock(&lock2); omp_unset_lock(&lock1);}intmain(){ inti; doublex1,x2; omp_lock_tlock1,lock2; gsum1=0.0; gsum2=0.0; omp_init_lock(&lock1); omp_init_lock(&lock2); printf("CalculatingPi...\n"); step=1.0/(double)num_steps; for(i=0;i<num_steps;i+=4) { doublesum1,sum2; #pragmaompparallelsections//#pragmaompparallelsectionsprivate(x1,x2,sum1,sum2)数据竞争 { #pragmaompsection { //calculatefirstbar x1=(i+0.5)*step; x1*=x1; sum1=4.0/(1.0+x1); //calculatesecondbar x2=(i+1.5)*step; x2*=x2; sum2=4.0/(1.0+x2); SafeAdd(sum1,sum2,lock1,lock2); } #pragmaompsection { //calculatethirdbar x1=(i+2.5)*step; x1*=x1; sum1=4.0/(1.0+x1); //calculatefourthbar x2=(i+3.5)*step; x2*=x2; sum2=4.0/(1.0+x2); SafeAdd(sum1,sum2,lock2,lock1);//死锁 } } } //calacvalueofpi doublepi=step*(gsum1+gsum2); printf("pi:%2.21f\n",pi); omp_destroy_lock(&lock1); omp_destroy_lock(&lock2);}内存错误MPI#include"mpi.h"#include<stdio.h>#include<string.h>intmain(intargc,char*argv[]){inti,rank,size,namelen;charname[MPI_MAX_PROCESSOR_NAME];MPI_Statusstat;MPI_Init(&argc,&argv);MPI_Comm_size(MPI_COMM_WORLD,&size);MPI_Comm_rank(MPI_COMM_WORLD,&rank);MPI_Get_processor_name(name,&namelen);if(rank==0){ printf("Helloworld:rank%dof%drunningon%s\n",rank,size,name); for(i=1;i<size;i++){ MPI_Recv(&rank,1,MPI_INT,i,1,MPI_COMM_WORLD,&stat); MPI_Recv(&size,1,MPI_INT,i,1,MPI_COMM_WORLD,&stat); MPI_Recv(&namelen,1,MPI_INT,i,1,MPI_COMM_WORLD,&stat); MPI_Recv(name,namelen+1,MPI_CHAR,i,1,MPI_COMM_WORLD,&stat); printf("Helloworld:rank%dof%drunningon%s\n",rank,size,name); }}else{ MPI_Send(&rank,1,MPI_INT,0,1,MPI_COMM_WORLD); MPI_Send(&size,1,MPI_INT,0,1,MPI_COMM_WORLD); MPI_Send(&namelen,1,MPI_INT,0,1,MPI_COMM_WORLD); MPI_Send(name,namelen+1,MPI_CHAR,0,1,MPI_COMM_WORLD);}MPI_Finalize();return(0);}CUDA#include<stdio.h>#include<cuda_runtime.h>#defineNUM_THREADS256#defineN1000boolInitCUDA();voidmatgen(float*a,intlda,intn);clock_tmatmult(constfloat*a,intlda,constfloat*b,intldb,float*c,intldc,intn);voidcompare_mat(constfloat*a,intlda,constfloat*b,intldb,intn);clock_tmatmultCUDA(constfloat*a,intlda,constfloat*b,intldb,float*c,intldc,intn);__global__staticvoidmatMultCUDA(constfloat*a,size_tlda,constfloat*b,size_tldb,float*c,size_tldc,intn);intmain(){ float*a,*b,*c,*d; if(!InitCUDA())return0; a=(float*)malloc(sizeof(float)*N*N); b=(float*)malloc(sizeof(float)*N*N); c=(float*)malloc(sizeof(float)*N*N); d=(float*)malloc(sizeof(float)*N*N); srand(0); matgen(a,N,N); matgen(b,N,N); clock_ttime1=matmultCUDA(a,N,b,N,c,N,N); clock_ttime2=matmult(a,N,b,N,d,N,N); compare_mat(c,N,d,N,N); doublesec1=(double)time1/CLOCKS_PER_SEC; doublesec2=(double)time2/CLOCKS_PER_SEC; printf("Timeused:%.2fseconds(%.2lfGFLOPS)inCUDA,Timeused:%.2fseconds(%.2lfGFLOPS)inCPU\n",sec1,2.0*N*N*N/(sec1*1E9),sec2,2.0*N*N*N/(sec2*1E9)); return0;}boolInitCUDA(){ intcount; cudaGetDeviceCount(&count); if(count==0){ fprintf(stderr,"Thereisnodevice.\n"); returnfalse; } inti; for(i=0;i<count;i++){ cudaDevicePropprop; if(cudaGetDeviceProperties(&prop,i)==cudaSuccess){ if(prop.major>=1){ break; } } } if(i==count){ fprintf(stderr,"ThereisnodevicesupportingCUDA1.x.\n"); returnfalse; } cudaSetDevice(i); returntrue;}voidmatgen(float*a,intlda,intn){ inti,j; for(i=0;i<n;i++){ for(j=0;j<n;j++){ a[i*lda+j]=(float)rand()/RAND_MAX+ (float)rand()/(RAND_MAX*RAND_MAX); } }}clock_tmatmult(constfloat*a,intlda,constfloat*b,intldb,float*c,intldc,intn){ clock_tstart,end; inti,j,k; start=clock(); for(i=0;i<n;i++){ for(j=0;j<n;j++){ doublet=0; for(k=0;k<n;k++){ t+=a[i*lda+k]*b[k*ldb+j]; } c[i*ldc+j]=t; } } end=clock(); returnend-start;}voidcompare_mat(constfloat*a,intlda,constfloat*b,intldb,intn){ floatmax_err=0; floataverage_err=0; inti,j; for(i=0;i<n;i++){ for(j=0;j<n;j++){ if(b[i*ldb+j]!=0){ floaterr=fabs((a[i*lda+j]- b[i*ldb+j])/b[i*ldb+j]); if(max_err<err)max_err=err; average_err+=err; } } } printf("Maxerror:%gAverageerror:%g\n",max_err,average_err/(n*n));}clock_tmatmultCUDA(constfloat*a,intlda,constfloat*b,intldb,float*c,intldc,intn){ float*ac,*bc,*cc; clock_tstart,end; start=clock(); size_tpitch_a,pitch_b,pitch_c; cudaMallocPitch((void**)&ac,&pitch_a,sizeof(float)*n,n); cudaMallocPitch((void**)&bc,&pitch_b,sizeof(float)*n,n); cudaMallocPitch((void**)&cc,&pitch_c,sizeof(float)*n,n); cudaMemcpy2D(ac,pitch_a,a,sizeof(float)*lda,sizeof(float)*n,n,cudaMemcpyHostToDevice); cudaMemcpy2D(bc,pitch_b,b,sizeof(float)*ldb,sizeof(float)*n,n,cudaMemcpyHostToDevice); //intblocks=(n+NUM_THREADS-1)/NUM_THREADS; matMultCUDA<<<n,NUM_THREADS,sizeof(float)*n>>>(ac,pitch_a/sizeof(float),bc,pitch_b/sizeof(float),cc,pitch_c/sizeof(float),n); cudaMemcpy2D(c,sizeof(float)*ldc,cc,pitch_c,sizeof(float)*n,n,cudaMemcpyDeviceToHost); cudaFree(ac); cudaFree(bc); cudaFree(cc); end=clock(); returnend-start;}__global__staticvoidmatMultCUDA(constfloat*a,size_tlda,constfloat*b,size_tldb,float*c,size_tldc,intn){ extern__shared__floatdata[]; constinttid=threadIdx.x; constintrow=blockIdx.x; inti,j; for(i=tid;i<n;i+=blockDim.x){ data[i]=a[row*lda+i]; } __syncthreads(); for(j=tid;j<n;j+=blockDim.x){ floatt=0; floaty=0; for(i=0;i<n;i++){ floatr; y-=data[i]*b[i*ldb+j]; r=t-y; y=(r-t)+y; t=r; } c[row*ldc+j]=t; }}Win32全局变量Win32事件Win32CriticalSectionWin32MutexesLinux#include<pthread.h>#include<stdlib.h>#defineMAX_THREADS512void*compute_pi(void*);....main(){...pthread_tp_threads[MAX_THREADS];pthread_attr_tattr;pthread_attr_init(&attr);for(i=0;i<num_threads;i++){hits[i]=i;pthread_create(&p_threads[i],&attr,compute_pi,(void*)&hits[i]);}for(i=0;i<num_threads;i++){pthread_join(p_threads[i],NULL);total_hits+=hits[i];}void*compute_pi(void*s){……pthread_exit(0);}Linux#include "unpthread.h"void *copyto(void*);staticint sockfd; /*globalforboththreadstoaccess*/staticFILE *fp;voidstr_cli(FILE*fp_arg,intsockfd_arg){ char recvline[MAXLINE]; pthread_t tid; sockfd=sockfd_arg; /*copyargumentstoexternals*/ fp=fp_arg; Pthread_create(&tid,NULL,copyto,NULL); while(Readline(sockfd,recvline,MAXLINE)>0) Fputs(recvline,stdout);}void*copyto(void*arg){ char sendline[MAXLINE]; while(Fgets(sendline,MAXLINE,fp)!=NULL) Writen(sockfd,sendline,strlen(sendline)); Shutdown(sockfd,SHUT_WR); /*EOFonstdin,sendFIN*/ return(NULL); /*return(i.e.,threadterminates)whenend-of-fileonstdin*/}#include "unpthread.h"staticvoid *doit(void*); /*eachthreadexecutesthisfunction*/intmain(intargc,char**argv){ int listenfd,connfd; socklen_t addrlen,len; structsockaddr *cliaddr; if(argc==2) listenfd=Tcp_listen(NULL,argv[1],&addrlen); elseif(argc==3) listenfd=Tcp_listen(argv[1],argv[2],&addrlen); else err_quit("usage:tcpserv01[<host>]<serviceorport>"); cliaddr=Malloc(addrlen); for(;;){ len=addrlen; connfd=Accept(listenfd,cliaddr,&len); Pthread_create(NULL,NULL,&doit,(void*)connfd); }}staticvoid*doit(void*arg){ Pthread_detach(pthread_self()); str_echo((int)arg); /*samefunctionasbefore*/ Close((int)arg); /*wearedonewithconnectedsocket*/ return(NULL);}Linuxmutexmain(){....pthread_mutex_init(&minimum_value_lock,NULL);....}void*find_min(void*list_ptr){....pthread_mutex_lock(&minimum_value_lock);if(my_min<minimum_value)minimum_value=my_min;/*andunlockthemutex*/pthread_mutex_unlock(&minimum_value_lock);#include<stdio.h>#include<pthread.h>#defineTHREAD_NUMBER10pthread_mutex_tmutex=PTHREAD_MUTEX_INITIALIZER;pthread_cond_tcond=PTHREAD_COND_INITIALIZER;intsum=0;void*th_counter(void*argc){inti;i=*(int*)argc;sleep(1);pthread_mutex_lock(&mutex);sum=sum+i;if(sum>10)pthread_cond_signal(&cond);pthread_mutex_unlock(&mutex);printf("count%disover\n",i);return;}void*waitsum(void*argc){pthread_mutex_lock(&mutex);while(sum<=10)pthread_cond_wait(&cond,&mutex);printf("Getasignalthatthesumhasbeenupto10!\n");pthread_mutex_unlock(&mutex);}intmain(void){pthread_tpt[THREAD_NUMBER];inti;intarg[THREAD_NUMBER];pthread_create(&pt[THREAD_NUMBER-1],NULL,waitsum,NULL);for(i=0;i<THREAD_NUMBER-1;i++){arg[i]=i;pthread_create(&pt[i],NULL,th_counter,(void*)&arg[i]);}for(i=0;i<THREAD_NUMBER;i++)pthread_detach(pt[i]);//pthread_join(pt[i],NULL);printf("Themainthreadiswaitingforallthethreadsfinishing...\n");sleep(5);printf("sumis%d\n",sum);pthread_mutex_destroy(&mutex);pthread_cond_destroy(&cond);return0;}Linux生产者消费者pthread_cond_tcond_queue_empty,cond_queue_full;pthread_mutex_ttask_queue_cond_lock;inttask_available;/*otherdatastructureshere*/main(){/*declarationsandinitializations*/task_available=0;pthread_init();pthread_cond_init(&cond_queue_empty,NULL);pthread_cond_init(&cond_queue_full,NULL);pthread_mutex_init(&task_queue_cond_lock,NULL);/*createandjoinproducerandconsumerthreads*/}void*producer(void*producer_thread_data){intinserted;while(!done()){create_task();pthread_mutex_lock(&task_queue_cond_lock);while(task_available==1)pthread_cond_wait(&cond_queue_empty,task_queue_cond_lock);insert_into_queue();task_available=1;pthread_cond_signal(&cond_queue_full);pthread_mutex_unlock(&task_queue_cond_lock);}}void*consumer(void*consumer_thread_data){while(!done()){pthread_mutex_lock(&task_queue_cond_lock);while(task_available==0)pthread_cond_wait(&cond_queue_full,&task_queue_cond_lock);my_task=extract_from_queue();task_available=0;pthread_cond_signal(&cond_queue_empty);pthread_mutex_unlock(&task_queue_cond_lock);process_task(my_task);}}Linux读写锁typedefstruct{intreaders;intwriter;pthread_cond_treaders_proceed;pthread_cond_twriter_proceed;intpending_writers;pthread_mutex_tread_write_lock;}mylib_rwlock_t;voidmylib_rwlock_init(mylib_rwlock_t*l){l->readers=l->writer=l->pending_writers=0;pthread_mutex_init(&(l->read_write_lock),NULL);pthread_cond_init(&(l->readers_proceed),NULL);pthread_cond_init(&(l->writer_proceed),NULL);}voidmylib_rwlock_rlock(mylib_rwlock_t*l){/*ifthereisawritelockorpendingwriters,performconditionwait..elseincrementcountofreadersandgrantreadlock*/pthread_mutex_lock(&(l->read_write_lock));while((l->pending_writers>0)||(l->writer>0))pthread_cond_wait(&(l->readers_proceed),&(l->read_write_lock));l->readers++;pthread_mutex_unlock(&(l->read_write_lock));}voidmylib_rwlock_wlock(mylib_rwlock_t*l){/*iftherearereadersorwriters,incrementpendingwriterscountandwait.Onbeingwoken,decrementpendingwriterscountandincrementwritercount*/pthread_mutex_lock(&(l->read_write_lock));while((l->writer>0)||(l->readers>0)){l->pending_writers++;pthread_cond_wait(&(l->writer_proceed),&(l->read_write_lock));}l->pending_writers--;l->writer++;pthread_mutex_unlock(&(l->read_write_lock));}voidmylib_rwlock_unlock(mylib_rwlock_t*l){/*ifthereisawritelockthenunlock,elseiftherearereadlocks,decrementcountofreadlocks.Ifthecountis0andthereisapendingwriter,letitthrough,elseiftherearependingreaders,letthemallgothrough*/pthread_mutex_lock(&(l->read_write_lock));if(l->writer>0)l->writer=0;elseif(l->readers>0)l->readers--;pthread_mutex_unlock(&(l->read_write_lock
温馨提示
- 1. 本站所有资源如无特殊说明,都需要本地电脑安装OFFICE2007和PDF阅读器。图纸软件为CAD,CAXA,PROE,UG,SolidWorks等.压缩文件请下载最新的WinRAR软件解压。
- 2. 本站的文档不包含任何第三方提供的附件图纸等,如果需要附件,请联系上传者。文件的所有权益归上传用户所有。
- 3. 本站RAR压缩包中若带图纸,网页内容里面会有图纸预览,若没有图纸预览就没有图纸。
- 4. 未经权益所有人同意不得将文件中的内容挪作商业或盈利用途。
- 5. 人人文库网仅提供信息存储空间,仅对用户上传内容的表现方式做保护处理,对用户上传分享的文档内容本身不做任何修改或编辑,并不能对任何下载内容负责。
- 6. 下载文件中如有侵权或不适当内容,请与我们联系,我们立即纠正。
- 7. 本站不保证下载资源的准确性、安全性和完整性, 同时也不承担用户因使用这些下载资源对自己和他人造成任何形式的伤害或损失。
最新文档
- 2025年度工伤事故赔偿免责协议书修订指南
- 2025年度城市更新项目土地置换合同协议3篇
- 2025年度合伙经营项目退出合作协议书
- 2025年度电影特效外聘演员聘用协议3篇
- 2025年度国际知识产权交易合同范文3篇
- 2025年度儿童教育机构开店合作协议书
- 永州职业技术学院《康乐服务与管理》2023-2024学年第一学期期末试卷
- 永州师范高等专科学校《设计基础造型》2023-2024学年第一学期期末试卷
- 营口理工学院《男生极限飞盘》2023-2024学年第一学期期末试卷
- 鹰潭职业技术学院《物理化学(2-2)》2023-2024学年第一学期期末试卷
- 装修工作的进度报告
- 《食品包装与安全》课件
- 普外科护士长述职报告
- 混凝土组织供应运输售后服务方案
- +山东省泰安市肥城市2023-2024学年七年级上学期期末考试地理试题+
- 内蒙古自治区呼和浩特市部分学校2023-2024学年九年级上学期期末数学试卷
- 文物保护工作的调研报告(16篇)
- 成长计划300字初中综合素质评价初三
- 口腔科会员制度
- 2023新能源场站一次调频控制系统技术规范
- 胸痛中心培训急性胸痛患者的早期快速甄别
评论
0/150
提交评论