并行代码学习资料_第1页
并行代码学习资料_第2页
并行代码学习资料_第3页
并行代码学习资料_第4页
并行代码学习资料_第5页
已阅读5页,还剩11页未读 继续免费阅读

下载本文档

版权说明:本文档由用户提供并上传,收益归属内容提供方,若内容存在侵权,请进行举报或认领

文档简介

用CilkPlus实现并行性#include<stdio.h>#include<windows.h>#include<mmsystem.h>#include<math.h>#include<cilk/cilk.h>#include<cilk/reducer_opadd.h>constlongintVERYBIG=100000;//*******************************************************************************************intmain(void){ inti; DWORDstarttime,elapsedtime; //---------------------------------------------------------------------- //Outputastartmessage //printf("NoneParallelTimingsfor%diterations\n\n",VERYBIG); printf("CilkPlusParallelTimingsfor%diterations\n\n",VERYBIG); //repeatexperimentseveraltimes for(i=0;i<6;i++) { //getstartingtime starttime=timeGetTime(); //resetchecksum&runningtotal cilk::reducer_opadd<longint>sum(0); cilk::reducer_opadd<double>total(0.0); //WorkLoop,dosomeworkbyloopingVERYBIGtimes cilk_for(intj=0;j<VERYBIG;j++) { longintk; doublesumx,sumy; //incrementchecksum sum+=1; //Calculatefirstarithmeticseries sumx=0.0; for(k=0;k<j;k++) sumx=sumx+(double)k; //Calculatesecondarithmeticseries sumy=0.0; for(k=j;k>0;k--) sumy=sumy+(double)k; if(sumx>0.0)total=total+1.0/sqrt(sumx); if(sumy>0.0)total=total+1.0/sqrt(sumy); } //getendingtimeanduseittodetermineelapsedtime elapsedtime=timeGetTime()-starttime; //reportelapsedtime printf("TimeElapsed%10dmSecsTotal=%lfCheckSum=%ld\n",(int)elapsedtime,total.get_value(),sum.get_value()); } //returnintegerasrequiredbyfunctionheader return0;}Windows多线程SemaphoreOpenMP错误检测#include<stdio.h>#include<omp.h>staticlongnum_steps=10000*4;doublestep,gsum1,gsum2;voidSafeAdd(doublesum1,doublesum2,omp_lock_t&lock1,omp_lock_t&lock2){ //lockgsum1andupdate omp_set_lock(&lock1); gsum1+=sum1; //lockgsum2andupdate omp_set_lock(&lock2); gsum2+=sum2; omp_unset_lock(&lock2); omp_unset_lock(&lock1);}intmain(){ inti; doublex1,x2; omp_lock_tlock1,lock2; gsum1=0.0; gsum2=0.0; omp_init_lock(&lock1); omp_init_lock(&lock2); printf("CalculatingPi...\n"); step=1.0/(double)num_steps; for(i=0;i<num_steps;i+=4) { doublesum1,sum2; #pragmaompparallelsections//#pragmaompparallelsectionsprivate(x1,x2,sum1,sum2)数据竞争 { #pragmaompsection { //calculatefirstbar x1=(i+0.5)*step; x1*=x1; sum1=4.0/(1.0+x1); //calculatesecondbar x2=(i+1.5)*step; x2*=x2; sum2=4.0/(1.0+x2); SafeAdd(sum1,sum2,lock1,lock2); } #pragmaompsection { //calculatethirdbar x1=(i+2.5)*step; x1*=x1; sum1=4.0/(1.0+x1); //calculatefourthbar x2=(i+3.5)*step; x2*=x2; sum2=4.0/(1.0+x2); SafeAdd(sum1,sum2,lock2,lock1);//死锁 } } } //calacvalueofpi doublepi=step*(gsum1+gsum2); printf("pi:%2.21f\n",pi); omp_destroy_lock(&lock1); omp_destroy_lock(&lock2);}内存错误MPI#include"mpi.h"#include<stdio.h>#include<string.h>intmain(intargc,char*argv[]){inti,rank,size,namelen;charname[MPI_MAX_PROCESSOR_NAME];MPI_Statusstat;MPI_Init(&argc,&argv);MPI_Comm_size(MPI_COMM_WORLD,&size);MPI_Comm_rank(MPI_COMM_WORLD,&rank);MPI_Get_processor_name(name,&namelen);if(rank==0){ printf("Helloworld:rank%dof%drunningon%s\n",rank,size,name); for(i=1;i<size;i++){ MPI_Recv(&rank,1,MPI_INT,i,1,MPI_COMM_WORLD,&stat); MPI_Recv(&size,1,MPI_INT,i,1,MPI_COMM_WORLD,&stat); MPI_Recv(&namelen,1,MPI_INT,i,1,MPI_COMM_WORLD,&stat); MPI_Recv(name,namelen+1,MPI_CHAR,i,1,MPI_COMM_WORLD,&stat); printf("Helloworld:rank%dof%drunningon%s\n",rank,size,name); }}else{ MPI_Send(&rank,1,MPI_INT,0,1,MPI_COMM_WORLD); MPI_Send(&size,1,MPI_INT,0,1,MPI_COMM_WORLD); MPI_Send(&namelen,1,MPI_INT,0,1,MPI_COMM_WORLD); MPI_Send(name,namelen+1,MPI_CHAR,0,1,MPI_COMM_WORLD);}MPI_Finalize();return(0);}CUDA#include<stdio.h>#include<cuda_runtime.h>#defineNUM_THREADS256#defineN1000boolInitCUDA();voidmatgen(float*a,intlda,intn);clock_tmatmult(constfloat*a,intlda,constfloat*b,intldb,float*c,intldc,intn);voidcompare_mat(constfloat*a,intlda,constfloat*b,intldb,intn);clock_tmatmultCUDA(constfloat*a,intlda,constfloat*b,intldb,float*c,intldc,intn);__global__staticvoidmatMultCUDA(constfloat*a,size_tlda,constfloat*b,size_tldb,float*c,size_tldc,intn);intmain(){ float*a,*b,*c,*d; if(!InitCUDA())return0; a=(float*)malloc(sizeof(float)*N*N); b=(float*)malloc(sizeof(float)*N*N); c=(float*)malloc(sizeof(float)*N*N); d=(float*)malloc(sizeof(float)*N*N); srand(0); matgen(a,N,N); matgen(b,N,N); clock_ttime1=matmultCUDA(a,N,b,N,c,N,N); clock_ttime2=matmult(a,N,b,N,d,N,N); compare_mat(c,N,d,N,N); doublesec1=(double)time1/CLOCKS_PER_SEC; doublesec2=(double)time2/CLOCKS_PER_SEC; printf("Timeused:%.2fseconds(%.2lfGFLOPS)inCUDA,Timeused:%.2fseconds(%.2lfGFLOPS)inCPU\n",sec1,2.0*N*N*N/(sec1*1E9),sec2,2.0*N*N*N/(sec2*1E9)); return0;}boolInitCUDA(){ intcount; cudaGetDeviceCount(&count); if(count==0){ fprintf(stderr,"Thereisnodevice.\n"); returnfalse; } inti; for(i=0;i<count;i++){ cudaDevicePropprop; if(cudaGetDeviceProperties(&prop,i)==cudaSuccess){ if(prop.major>=1){ break; } } } if(i==count){ fprintf(stderr,"ThereisnodevicesupportingCUDA1.x.\n"); returnfalse; } cudaSetDevice(i); returntrue;}voidmatgen(float*a,intlda,intn){ inti,j; for(i=0;i<n;i++){ for(j=0;j<n;j++){ a[i*lda+j]=(float)rand()/RAND_MAX+ (float)rand()/(RAND_MAX*RAND_MAX); } }}clock_tmatmult(constfloat*a,intlda,constfloat*b,intldb,float*c,intldc,intn){ clock_tstart,end; inti,j,k; start=clock(); for(i=0;i<n;i++){ for(j=0;j<n;j++){ doublet=0; for(k=0;k<n;k++){ t+=a[i*lda+k]*b[k*ldb+j]; } c[i*ldc+j]=t; } } end=clock(); returnend-start;}voidcompare_mat(constfloat*a,intlda,constfloat*b,intldb,intn){ floatmax_err=0; floataverage_err=0; inti,j; for(i=0;i<n;i++){ for(j=0;j<n;j++){ if(b[i*ldb+j]!=0){ floaterr=fabs((a[i*lda+j]- b[i*ldb+j])/b[i*ldb+j]); if(max_err<err)max_err=err; average_err+=err; } } } printf("Maxerror:%gAverageerror:%g\n",max_err,average_err/(n*n));}clock_tmatmultCUDA(constfloat*a,intlda,constfloat*b,intldb,float*c,intldc,intn){ float*ac,*bc,*cc; clock_tstart,end; start=clock(); size_tpitch_a,pitch_b,pitch_c; cudaMallocPitch((void**)&ac,&pitch_a,sizeof(float)*n,n); cudaMallocPitch((void**)&bc,&pitch_b,sizeof(float)*n,n); cudaMallocPitch((void**)&cc,&pitch_c,sizeof(float)*n,n); cudaMemcpy2D(ac,pitch_a,a,sizeof(float)*lda,sizeof(float)*n,n,cudaMemcpyHostToDevice); cudaMemcpy2D(bc,pitch_b,b,sizeof(float)*ldb,sizeof(float)*n,n,cudaMemcpyHostToDevice); //intblocks=(n+NUM_THREADS-1)/NUM_THREADS; matMultCUDA<<<n,NUM_THREADS,sizeof(float)*n>>>(ac,pitch_a/sizeof(float),bc,pitch_b/sizeof(float),cc,pitch_c/sizeof(float),n); cudaMemcpy2D(c,sizeof(float)*ldc,cc,pitch_c,sizeof(float)*n,n,cudaMemcpyDeviceToHost); cudaFree(ac); cudaFree(bc); cudaFree(cc); end=clock(); returnend-start;}__global__staticvoidmatMultCUDA(constfloat*a,size_tlda,constfloat*b,size_tldb,float*c,size_tldc,intn){ extern__shared__floatdata[]; constinttid=threadIdx.x; constintrow=blockIdx.x; inti,j; for(i=tid;i<n;i+=blockDim.x){ data[i]=a[row*lda+i]; } __syncthreads(); for(j=tid;j<n;j+=blockDim.x){ floatt=0; floaty=0; for(i=0;i<n;i++){ floatr; y-=data[i]*b[i*ldb+j]; r=t-y; y=(r-t)+y; t=r; } c[row*ldc+j]=t; }}Win32全局变量Win32事件Win32CriticalSectionWin32MutexesLinux#include<pthread.h>#include<stdlib.h>#defineMAX_THREADS512void*compute_pi(void*);....main(){...pthread_tp_threads[MAX_THREADS];pthread_attr_tattr;pthread_attr_init(&attr);for(i=0;i<num_threads;i++){hits[i]=i;pthread_create(&p_threads[i],&attr,compute_pi,(void*)&hits[i]);}for(i=0;i<num_threads;i++){pthread_join(p_threads[i],NULL);total_hits+=hits[i];}void*compute_pi(void*s){……pthread_exit(0);}Linux#include "unpthread.h"void *copyto(void*);staticint sockfd; /*globalforboththreadstoaccess*/staticFILE *fp;voidstr_cli(FILE*fp_arg,intsockfd_arg){ char recvline[MAXLINE]; pthread_t tid; sockfd=sockfd_arg; /*copyargumentstoexternals*/ fp=fp_arg; Pthread_create(&tid,NULL,copyto,NULL); while(Readline(sockfd,recvline,MAXLINE)>0) Fputs(recvline,stdout);}void*copyto(void*arg){ char sendline[MAXLINE]; while(Fgets(sendline,MAXLINE,fp)!=NULL) Writen(sockfd,sendline,strlen(sendline)); Shutdown(sockfd,SHUT_WR); /*EOFonstdin,sendFIN*/ return(NULL); /*return(i.e.,threadterminates)whenend-of-fileonstdin*/}#include "unpthread.h"staticvoid *doit(void*); /*eachthreadexecutesthisfunction*/intmain(intargc,char**argv){ int listenfd,connfd; socklen_t addrlen,len; structsockaddr *cliaddr; if(argc==2) listenfd=Tcp_listen(NULL,argv[1],&addrlen); elseif(argc==3) listenfd=Tcp_listen(argv[1],argv[2],&addrlen); else err_quit("usage:tcpserv01[<host>]<serviceorport>"); cliaddr=Malloc(addrlen); for(;;){ len=addrlen; connfd=Accept(listenfd,cliaddr,&len); Pthread_create(NULL,NULL,&doit,(void*)connfd); }}staticvoid*doit(void*arg){ Pthread_detach(pthread_self()); str_echo((int)arg); /*samefunctionasbefore*/ Close((int)arg); /*wearedonewithconnectedsocket*/ return(NULL);}Linuxmutexmain(){....pthread_mutex_init(&minimum_value_lock,NULL);....}void*find_min(void*list_ptr){....pthread_mutex_lock(&minimum_value_lock);if(my_min<minimum_value)minimum_value=my_min;/*andunlockthemutex*/pthread_mutex_unlock(&minimum_value_lock);#include<stdio.h>#include<pthread.h>#defineTHREAD_NUMBER10pthread_mutex_tmutex=PTHREAD_MUTEX_INITIALIZER;pthread_cond_tcond=PTHREAD_COND_INITIALIZER;intsum=0;void*th_counter(void*argc){inti;i=*(int*)argc;sleep(1);pthread_mutex_lock(&mutex);sum=sum+i;if(sum>10)pthread_cond_signal(&cond);pthread_mutex_unlock(&mutex);printf("count%disover\n",i);return;}void*waitsum(void*argc){pthread_mutex_lock(&mutex);while(sum<=10)pthread_cond_wait(&cond,&mutex);printf("Getasignalthatthesumhasbeenupto10!\n");pthread_mutex_unlock(&mutex);}intmain(void){pthread_tpt[THREAD_NUMBER];inti;intarg[THREAD_NUMBER];pthread_create(&pt[THREAD_NUMBER-1],NULL,waitsum,NULL);for(i=0;i<THREAD_NUMBER-1;i++){arg[i]=i;pthread_create(&pt[i],NULL,th_counter,(void*)&arg[i]);}for(i=0;i<THREAD_NUMBER;i++)pthread_detach(pt[i]);//pthread_join(pt[i],NULL);printf("Themainthreadiswaitingforallthethreadsfinishing...\n");sleep(5);printf("sumis%d\n",sum);pthread_mutex_destroy(&mutex);pthread_cond_destroy(&cond);return0;}Linux生产者消费者pthread_cond_tcond_queue_empty,cond_queue_full;pthread_mutex_ttask_queue_cond_lock;inttask_available;/*otherdatastructureshere*/main(){/*declarationsandinitializations*/task_available=0;pthread_init();pthread_cond_init(&cond_queue_empty,NULL);pthread_cond_init(&cond_queue_full,NULL);pthread_mutex_init(&task_queue_cond_lock,NULL);/*createandjoinproducerandconsumerthreads*/}void*producer(void*producer_thread_data){intinserted;while(!done()){create_task();pthread_mutex_lock(&task_queue_cond_lock);while(task_available==1)pthread_cond_wait(&cond_queue_empty,task_queue_cond_lock);insert_into_queue();task_available=1;pthread_cond_signal(&cond_queue_full);pthread_mutex_unlock(&task_queue_cond_lock);}}void*consumer(void*consumer_thread_data){while(!done()){pthread_mutex_lock(&task_queue_cond_lock);while(task_available==0)pthread_cond_wait(&cond_queue_full,&task_queue_cond_lock);my_task=extract_from_queue();task_available=0;pthread_cond_signal(&cond_queue_empty);pthread_mutex_unlock(&task_queue_cond_lock);process_task(my_task);}}Linux读写锁typedefstruct{intreaders;intwriter;pthread_cond_treaders_proceed;pthread_cond_twriter_proceed;intpending_writers;pthread_mutex_tread_write_lock;}mylib_rwlock_t;voidmylib_rwlock_init(mylib_rwlock_t*l){l->readers=l->writer=l->pending_writers=0;pthread_mutex_init(&(l->read_write_lock),NULL);pthread_cond_init(&(l->readers_proceed),NULL);pthread_cond_init(&(l->writer_proceed),NULL);}voidmylib_rwlock_rlock(mylib_rwlock_t*l){/*ifthereisawritelockorpendingwriters,performconditionwait..elseincrementcountofreadersandgrantreadlock*/pthread_mutex_lock(&(l->read_write_lock));while((l->pending_writers>0)||(l->writer>0))pthread_cond_wait(&(l->readers_proceed),&(l->read_write_lock));l->readers++;pthread_mutex_unlock(&(l->read_write_lock));}voidmylib_rwlock_wlock(mylib_rwlock_t*l){/*iftherearereadersorwriters,incrementpendingwriterscountandwait.Onbeingwoken,decrementpendingwriterscountandincrementwritercount*/pthread_mutex_lock(&(l->read_write_lock));while((l->writer>0)||(l->readers>0)){l->pending_writers++;pthread_cond_wait(&(l->writer_proceed),&(l->read_write_lock));}l->pending_writers--;l->writer++;pthread_mutex_unlock(&(l->read_write_lock));}voidmylib_rwlock_unlock(mylib_rwlock_t*l){/*ifthereisawritelockthenunlock,elseiftherearereadlocks,decrementcountofreadlocks.Ifthecountis0andthereisapendingwriter,letitthrough,elseiftherearependingreaders,letthemallgothrough*/pthread_mutex_lock(&(l->read_write_lock));if(l->writer>0)l->writer=0;elseif(l->readers>0)l->readers--;pthread_mutex_unlock(&(l->read_write_lock

温馨提示

  • 1. 本站所有资源如无特殊说明,都需要本地电脑安装OFFICE2007和PDF阅读器。图纸软件为CAD,CAXA,PROE,UG,SolidWorks等.压缩文件请下载最新的WinRAR软件解压。
  • 2. 本站的文档不包含任何第三方提供的附件图纸等,如果需要附件,请联系上传者。文件的所有权益归上传用户所有。
  • 3. 本站RAR压缩包中若带图纸,网页内容里面会有图纸预览,若没有图纸预览就没有图纸。
  • 4. 未经权益所有人同意不得将文件中的内容挪作商业或盈利用途。
  • 5. 人人文库网仅提供信息存储空间,仅对用户上传内容的表现方式做保护处理,对用户上传分享的文档内容本身不做任何修改或编辑,并不能对任何下载内容负责。
  • 6. 下载文件中如有侵权或不适当内容,请与我们联系,我们立即纠正。
  • 7. 本站不保证下载资源的准确性、安全性和完整性, 同时也不承担用户因使用这些下载资源对自己和他人造成任何形式的伤害或损失。

评论

0/150

提交评论