KTH framework for Nek5000 toolboxes; testing version  0.0.1
mxm_wrapper.f
Go to the documentation of this file.
1  subroutine mxm(a,n1,b,n2,c,n3)
2 c
3 c Compute matrix-matrix product C = A*B
4 c for contiguously packed matrices A,B, and C.
5 c
6  real a(n1,n2),b(n2,n3),c(n1,n3)
7 c
8  include 'SIZE'
9  include 'CTIMER'
10  include 'OPCTR'
11  include 'TOTAL'
12 c
13  integer*8 tt
14  parameter(tt = 32)
15 
16 #ifdef TIMER2
17  if (isclld.eq.0) then
18  isclld=1
19  nrout=nrout+1
20  myrout=nrout
21  rname(myrout) = 'mxm '
22  endif
23  isbcnt = n1*n3*(2*n2-1)
24  dct(myrout) = dct(myrout) + (isbcnt)
25  ncall(myrout) = ncall(myrout) + 1
26  dcount = dcount + (isbcnt)
27  etime1 = dnekclock()
28 #endif
29 
30 
31 #ifdef BGQ
32  if (n2 .eq. 8 .and. mod(n1,4) .eq. 0
33  $ .and. mod(loc(a),tt).eq.0
34  $ .and. mod(loc(b),tt).eq.0
35  $ .and. mod(loc(c),tt).eq.0
36  $ ) then
37  call mxm_bgq_8(a, n1, b, n2, c, n3)
38  goto 111
39  endif
40  if (n2 .eq. 16 .and. mod(n1,4) .eq. 0
41  $ .and. mod(loc(a),tt).eq.0
42  $ .and. mod(loc(b),tt).eq.0
43  $ .and. mod(loc(c),tt).eq.0
44  $ ) then
45  call mxm_bgq_16(a, n1, b, n2, c, n3)
46  goto 111
47  endif
48  if (n2 .eq. 10 .and. mod(n1,4) .eq. 0 .and. mod(n3,2) .eq. 0
49  & .and. mod(loc(a),tt).eq.0
50  & .and. mod(loc(b),tt).eq.0
51  & .and. mod(loc(c),tt).eq.0
52  & ) then
53  call mxm_bgq_10(a, n1, b, n2, c, n3)
54  goto 111
55  endif
56  if (n2 .eq. 6 .and. mod(n1,4) .eq. 0 .and. mod(n3,2) .eq. 0
57  & .and. mod(loc(a),tt).eq.0
58  & .and. mod(loc(b),tt).eq.0
59  & .and. mod(loc(c),tt).eq.0
60  & ) then
61  call mxm_bgq_6(a, n1, b, n2, c, n3)
62  goto 111
63  endif
64 #endif
65 
66 #ifdef XSMM
67  if ((n1*n2*n3)**(1./3) .gt. 6) then
68  call libxsmm_dgemm('N','N',n1,n3,n2,1.0,a,n1,b,n2,0.0,c,n1)
69  goto 111
70  else
71  goto 101
72  endif
73 #endif
74 
75 #ifdef BLAS_MXM
76  call dgemm('N','N',n1,n3,n2,1.0,a,n1,b,n2,0.0,c,n1)
77  goto 111
78 #endif
79 
80  101 call mxmf2(a,n1,b,n2,c,n3)
81 
82  111 continue
83 #ifdef TIMER2
84  tmxmf = tmxmf + dnekclock() - etime1
85 #endif
86  return
87  end
88 c-----------------------------------------------------------------------
89  subroutine fgslib_mxm(a,n1,b,n2,c,n3)
90  real a(n1,n2),b(n2,n3),c(n1,n3)
91 
92  call mxmf2(a,n1,b,n2,c,n3)
93 
94  return
95  end
real *8 function dnekclock()
Definition: comm_mpi.f:393
subroutine dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
Definition: dgemm.f:3
subroutine mxm_bgq_16(a, n1, b, n2, c, n3)
Definition: mxm_bgq.f:56
subroutine mxm_bgq_10(a, n1, b, n2, c, n3)
Definition: mxm_bgq.f:212
subroutine mxm_bgq_8(a, n1, b, n2, c, n3)
Definition: mxm_bgq.f:2
subroutine mxm_bgq_6(a, n1, b, n2, c, n3)
Definition: mxm_bgq.f:143
subroutine mxmf2(A, N1, B, N2, C, N3)
Definition: mxm_std.f:3
subroutine fgslib_mxm(a, n1, b, n2, c, n3)
Definition: mxm_wrapper.f:90
subroutine mxm(a, n1, b, n2, c, n3)
Definition: mxm_wrapper.f:2