If you use prof, gprof or xprofiler and the appropriate compiler flag (-p or -pg), you can profile your program.
The message passing library is not enabled for prof or gprof, profiling counts. You can obtain profiling information by using the name-shifted MPI functions provided.
To use nameshift profiling routines that are written to the C bindings with an MPI program written in C, or the FORTRAN bindings with an MPI program written in FORTRAN, do the following:
See IBM Parallel Environment for AIX: Operation and Use, Volume 1 for more information on these commands.
To use nameshift profiling routines which are written to the C bindings with an MPI program written in FORTRAN, follow these steps:
Based on the above, follow the appropriate steps:
#include <stdio.h> #include "mpi.h" int MPI_Init(int *argc, char ***argv) { int rc; printf("hello from profiling layer MPI_Init...\n"); rc = PMPI_Init(argc, argv); printf("goodbye from profiling layer MPI_Init...\n"); return(rc); }
#!libmyprof.a(newmyprof.o) MPI_Init
#!libmpi.a(mpicore.o) PMPI_Init
cc -c myprof.c -I/usr/lpp/ppe.poe/include
The -I defines the location of mpi.h.
ld -o newmyprof.o myprof.o -bM:SRE -H512 -T512 -bnoentry -bI:mpicore.imp -bE:myprof.exp -lc
ar rv libmyprof.a newmyprof.o
ar -xv /usr/lpp/ppe.poe/lib/libmpi.a mpifort.o
ld -o mpifort.tmp mpifort.o -r -bnso -bnoentry
ar -xv /usr/lpp/ppe.poe/lib/libmpi.a mpicore.o
/usr/bin/dump -nvp mpicore.o | /usr/bin/grep "&hat.\[" | cut -f2- | cut -c26- | grep -y "&hat.exp" | cut -c35- | sort | uniq > mpicore.exp
#!libvtd.a(dynamic.o) VT_instaddr_depth
/usr/bin/dump -nvp mpifort.o | /usr/bin/grep "&hat.\[" | cut -f2- | cut -c26- | grep -y "&hat.exp" | cut -c35- | sort | uniq > mpifort.exp
insert #!libpmpi.a(newmpifort.o) as the first line of the new mpifort.exp file
ld -o newmpifort.o mpifort.tmp -bI: mpicore.exp -bI:myprof.exp -bI:vt.exp -bE:mpifort.exp -bM:SRE -H512 -T512 -bnoentry
ar rv libpmpi.a newmpifort.o
c ------------------------------------- program hwinit include 'mpif.h' integer forterr c call MPI_INIT(forterr) c c Write comments to screen. c write(6,*)'Hello from task ' c call MPI_FINALIZE(forterr) c stop end c
mpxlf -o hwinit hwinit.f -lpmpi -L.
The following is a sample MPI program that uses the name-shifted MPI interface to separate the amount of user and system CPU time used by MPI.
CPU MPI Time Example
#include "mpi.h" #include <sys/types.h> #include <time.h> #include <sys/times.h> #define ARRAY_SIZE 1000000 #define VALUE 123 struct tms mpitms; double mpi_elapsed; void main() { int in[ARRAY_SIZE],out[ARRAY_SIZE],tasks,me,src,dest; int i; MPI_Status status[2]; MPI_Request msgid [2]; for (i=0;i<ARRAY_SIZE;i++)out[i]=VALUE; MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&tasks); MPI_Comm_rank(MPI_COMM_WORLD,&me); mpi_elapsed = MPI_Wtime(); dest = (me==tasks-1) ? 0 : me+1; MPI_Isend(out,ARRAY_SIZE,MPI_INT,dest,5,MPI_COMM_WORLD,&msgid[0]); src = (me==0) ? tasks-1 : me-1; MPI_Irecv(in,ARRAY_SIZE,MPI_INT,src,5,MPI_COMM_WORLD,&msgid[1]); MPI_Waitall(2,msgid,status); for (i=0; i< ARRAY_SIZE; i++) { if(in[i] != VALUE ) printf("ERROR on node %d, in = %d\n",me,in[i]); break; } MPI_Barrier(MPI_COMM_WORLD); mpi_elapsed = MPI_Wtime() - mpi_elapsed; printf("MPI CPU times: user %f, system %f, total %f sec\n", ((float)mpitms.tms_utime)/CLK_TCK, ((float)mpitms.tms_stime)/CLK_TCK, (float)(mpitms.tms_utime+mpitms.tms_stime)/CLK_TCK); printf("MPI Elapsed time: %f sec\n", mpi_elapsed); MPI_Finalize(); } /***************************************/ /* Replacement functions for profiling */ /***************************************/ int MPI_Isend(void* buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request) { struct tms beforetms, aftertms; int rc; times(&beforetms); rc = PMPI_Isend(buf,count,datatype,dest,tag,comm,request); times(&aftertms); mpitms.tms_utime += (aftertms.tms_utime - beforetms.tms_utime); mpitms.tms_stime += (aftertms.tms_stime - beforetms.tms_stime); return (rc); } int MPI_Waitall(int count, MPI_Request *array_of_requests, MPI_Status *array_of_statuses) { struct tms beforetms, aftertms; int rc; times(&beforetms); rc = PMPI_Waitall(count,array_of_requests,array_of_statuses); times(&aftertms); mpitms.tms_utime += (aftertms.tms_utime - beforetms.tms_utime); mpitms.tms_stime += (aftertms.tms_stime - beforetms.tms_stime); return (rc); } int MPI_Irecv(void* buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request) { struct tms beforetms, aftertms; int rc; times(&beforetms); rc = PMPI_Irecv(buf,count,datatype,source,tag,comm,request); times(&aftertms); mpitms.tms_utime += (aftertms.tms_utime - beforetms.tms_utime); mpitms.tms_stime += (aftertms.tms_stime - beforetms.tms_stime); return (rc); } int MPI_Barrier(MPI_Comm comm ) { struct tms beforetms, aftertms; int rc; times(&beforetms); rc = PMPI_Barrier(comm); times(&aftertms); mpitms.tms_utime += (aftertms.tms_utime - beforetms.tms_utime); mpitms.tms_stime += (aftertms.tms_stime - beforetms.tms_stime); return (rc); }