KTH framework for Nek5000 toolboxes; testing version  0.0.1
mxm_bgq.f
Go to the documentation of this file.
1  subroutine mxm_bgq_8(a,n1,b,n2,c,n3)
2  implicit none
3 
4  integer n1, n2, n3
5  real(8) a(n1,n2),b(n2,n3)
6  real(8) c(n1,n3)
7 
8  integer i, j
9  vector(real(8)) av1, av2, av3, av4, av5, av6, av7, av8
10  vector(real(8)) bv1, bsv1, bsv2, bsv3, bsv4
11  vector(real(8)) bv2, bsv5, bsv6, bsv7, bsv8
12  vector(real(8)) cv
13 
14  call alignx(32, a(1,1))
15  call alignx(32, b(1,1))
16  call alignx(32, c(1,1))
17 
18  do i = 1, n1, 4
19  av1 = vec_ld(0,a(i,1))
20  av2 = vec_ld(0,a(i,2))
21  av3 = vec_ld(0,a(i,3))
22  av4 = vec_ld(0,a(i,4))
23  av5 = vec_ld(0,a(i,5))
24  av6 = vec_ld(0,a(i,6))
25  av7 = vec_ld(0,a(i,7))
26  av8 = vec_ld(0,a(i,8))
27 
28  do j = 1, n3
29  bv1 = vec_ld(0,b(1,j))
30  bv2 = vec_ld(0,b(5,j))
31  bsv1 = vec_splat(bv1, 0)
32  bsv2 = vec_splat(bv1, 1)
33  bsv3 = vec_splat(bv1, 2)
34  bsv4 = vec_splat(bv1, 3)
35  bsv5 = vec_splat(bv2, 0)
36  bsv6 = vec_splat(bv2, 1)
37  bsv7 = vec_splat(bv2, 2)
38  bsv8 = vec_splat(bv2, 3)
39 
40  cv = vec_mul(av1, bsv1)
41  cv = vec_madd(av2, bsv2, cv)
42  cv = vec_madd(av3, bsv3, cv)
43  cv = vec_madd(av4, bsv4, cv)
44  cv = vec_madd(av5, bsv5, cv)
45  cv = vec_madd(av6, bsv6, cv)
46  cv = vec_madd(av7, bsv7, cv)
47  cv = vec_madd(av8, bsv8, cv)
48 
49  call vec_st(cv, 0, c(i,j))
50  end do
51  end do
52  return
53  end subroutine mxm_bgq_8
54 
55  subroutine mxm_bgq_16(a,n1,b,n2,c,n3)
56  implicit none
57 
58  integer n1, n2, n3
59  real(8) a(n1,n2),b(n2,n3)
60  real(8) c(n1,n3)
61 
62  integer i, j
63 
64  vector(real(8)) av1, av2, av3, av4, av5, av6, av7, av8
65  vector(real(8)) av9, av10, av11, av12, av13, av14, av15, av16
66  vector(real(8)) bv1, bsv1, bsv2, bsv3, bsv4
67  vector(real(8)) bv2, bsv5, bsv6, bsv7, bsv8
68  vector(real(8)) bv3, bsv9, bsv10, bsv11, bsv12
69  vector(real(8)) bv4, bsv13, bsv14, bsv15, bsv16
70 
71  vector(real(8)) cv
72 
73  call alignx(32, a(1,1))
74  call alignx(32, b(1,1))
75  call alignx(32, c(1,1))
76 
77  do i = 1, n1, 4
78  av1 = vec_ld(0,a(i,1))
79  av2 = vec_ld(0,a(i,2))
80  av3 = vec_ld(0,a(i,3))
81  av4 = vec_ld(0,a(i,4))
82  av5 = vec_ld(0,a(i,5))
83  av6 = vec_ld(0,a(i,6))
84  av7 = vec_ld(0,a(i,7))
85  av8 = vec_ld(0,a(i,8))
86  av9 = vec_ld(0,a(i,9))
87  av10 = vec_ld(0,a(i,10))
88  av11 = vec_ld(0,a(i,11))
89  av12 = vec_ld(0,a(i,12))
90  av13 = vec_ld(0,a(i,13))
91  av14 = vec_ld(0,a(i,14))
92  av15 = vec_ld(0,a(i,15))
93  av16 = vec_ld(0,a(i,16))
94 
95  do j = 1, n3
96  bv1 = vec_ld(0,b(1,j))
97  bv2 = vec_ld(0,b(5,j))
98  bv3 = vec_ld(0,b(9,j))
99  bv4 = vec_ld(0,b(13,j))
100 
101  bsv1 = vec_splat(bv1, 0)
102  bsv2 = vec_splat(bv1, 1)
103  bsv3 = vec_splat(bv1, 2)
104  bsv4 = vec_splat(bv1, 3)
105  bsv5 = vec_splat(bv2, 0)
106  bsv6 = vec_splat(bv2, 1)
107  bsv7 = vec_splat(bv2, 2)
108  bsv8 = vec_splat(bv2, 3)
109  bsv9 = vec_splat(bv3, 0)
110  bsv10 = vec_splat(bv3, 1)
111  bsv11 = vec_splat(bv3, 2)
112  bsv12 = vec_splat(bv3, 3)
113  bsv13 = vec_splat(bv4, 0)
114  bsv14 = vec_splat(bv4, 1)
115  bsv15 = vec_splat(bv4, 2)
116  bsv16 = vec_splat(bv4, 3)
117 
118  cv = vec_mul(av1, bsv1)
119  cv = vec_madd(av2, bsv2, cv)
120  cv = vec_madd(av3, bsv3, cv)
121  cv = vec_madd(av4, bsv4, cv)
122  cv = vec_madd(av5, bsv5, cv)
123  cv = vec_madd(av6, bsv6, cv)
124  cv = vec_madd(av7, bsv7, cv)
125  cv = vec_madd(av8, bsv8, cv)
126  cv = vec_madd(av9, bsv9, cv)
127  cv = vec_madd(av10, bsv10, cv)
128  cv = vec_madd(av11, bsv11, cv)
129  cv = vec_madd(av12, bsv12, cv)
130  cv = vec_madd(av13, bsv13, cv)
131  cv = vec_madd(av14, bsv14, cv)
132  cv = vec_madd(av15, bsv15, cv)
133  cv = vec_madd(av16, bsv16, cv)
134 
135  call vec_st(cv, 0, c(i,j))
136  end do
137  end do
138  return
139  end subroutine mxm_bgq_16
140 
141 
142  subroutine mxm_bgq_6(a,n1,b,n2,c,n3)
143  implicit none
144 
145  integer n1, n2, n3
146  real(8) a(n1,n2),b(n2,n3)
147  real(8) c(n1,n3)
148 
149  integer i, j
150 
151  vector(real(8)) av1, av2, av3, av4, av5, av6
152  vector(real(8)) bv1, bsv1, bsv2, bsv3, bsv4
153  vector(real(8)) bv2, bsv5, bsv6
154 
155  vector(real(8)) cv
156 
157  call alignx(32, a(1,1))
158  call alignx(32, b(1,1))
159  call alignx(32, c(1,1))
160 
161  do i = 1, n1, 4
162  av1 = vec_ld(0,a(i,1))
163  av2 = vec_ld(0,a(i,2))
164  av3 = vec_ld(0,a(i,3))
165  av4 = vec_ld(0,a(i,4))
166  av5 = vec_ld(0,a(i,5))
167  av6 = vec_ld(0,a(i,6))
168 
169  do j = 1, n3, 2
170  bv1 = vec_ld(0,b(1,j))
171  bv2 = vec_ld(0,b(5,j))
172 
173  bsv1 = vec_splat(bv1, 0)
174  bsv2 = vec_splat(bv1, 1)
175  bsv3 = vec_splat(bv1, 2)
176  bsv4 = vec_splat(bv1, 3)
177  bsv5 = vec_splat(bv2, 0)
178  bsv6 = vec_splat(bv2, 1)
179 
180  cv = vec_mul(av1, bsv1)
181  cv = vec_madd(av2, bsv2, cv)
182  cv = vec_madd(av3, bsv3, cv)
183  cv = vec_madd(av4, bsv4, cv)
184  cv = vec_madd(av5, bsv5, cv)
185  cv = vec_madd(av6, bsv6, cv)
186 
187  call vec_st(cv, 0, c(i,j))
188 
189  bv1 = vec_ld(0,b(9,j))
190 
191  bsv1 = vec_splat(bv2, 2)
192  bsv2 = vec_splat(bv2, 3)
193  bsv3 = vec_splat(bv1, 0)
194  bsv4 = vec_splat(bv1, 1)
195  bsv5 = vec_splat(bv1, 2)
196  bsv6 = vec_splat(bv1, 3)
197 
198  cv = vec_mul(av1, bsv1)
199  cv = vec_madd(av2, bsv2, cv)
200  cv = vec_madd(av3, bsv3, cv)
201  cv = vec_madd(av4, bsv4, cv)
202  cv = vec_madd(av5, bsv5, cv)
203  cv = vec_madd(av6, bsv6, cv)
204 
205  call vec_st(cv, 0, c(i,j+1))
206  end do
207  end do
208  return
209  end subroutine mxm_bgq_6
210 
211  subroutine mxm_bgq_10(a,n1,b,n2,c,n3)
212  implicit none
213 
214  integer n1, n2, n3
215  real(8) a(n1,n2),b(n2,n3)
216  real(8) c(n1,n3)
217 
218  integer i, j
219 
220  vector(real(8)) av1, av2, av3, av4, av5, av6, av7, av8
221  vector(real(8)) av9, av10
222  vector(real(8)) bv1, bsv1, bsv2, bsv3, bsv4
223  vector(real(8)) bv2, bsv5, bsv6, bsv7, bsv8
224  vector(real(8)) bv3, bsv9, bsv10
225  vector(real(8)) cv
226 
227  call alignx(32, a(1,1))
228  call alignx(32, b(1,1))
229  call alignx(32, c(1,1))
230 
231  do i = 1, n1, 4
232  av1 = vec_ld(0,a(i,1))
233  av2 = vec_ld(0,a(i,2))
234  av3 = vec_ld(0,a(i,3))
235  av4 = vec_ld(0,a(i,4))
236  av5 = vec_ld(0,a(i,5))
237  av6 = vec_ld(0,a(i,6))
238  av7 = vec_ld(0,a(i,7))
239  av8 = vec_ld(0,a(i,8))
240  av9 = vec_ld(0,a(i,9))
241  av10 = vec_ld(0,a(i,10))
242 
243  do j = 1, n3, 2
244  bv1 = vec_ld(0,b(1,j))
245  bv2 = vec_ld(0,b(5,j))
246  bv3 = vec_ld(0,b(9,j))
247 
248  bsv1 = vec_splat(bv1, 0)
249  bsv2 = vec_splat(bv1, 1)
250  bsv3 = vec_splat(bv1, 2)
251  bsv4 = vec_splat(bv1, 3)
252  bsv5 = vec_splat(bv2, 0)
253  bsv6 = vec_splat(bv2, 1)
254  bsv7 = vec_splat(bv2, 2)
255  bsv8 = vec_splat(bv2, 3)
256  bsv9 = vec_splat(bv3, 0)
257  bsv10 = vec_splat(bv3, 1)
258 
259  cv = vec_mul(av1, bsv1)
260  cv = vec_madd(av2, bsv2, cv)
261  cv = vec_madd(av3, bsv3, cv)
262  cv = vec_madd(av4, bsv4, cv)
263  cv = vec_madd(av5, bsv5, cv)
264  cv = vec_madd(av6, bsv6, cv)
265  cv = vec_madd(av7, bsv7, cv)
266  cv = vec_madd(av8, bsv8, cv)
267  cv = vec_madd(av9, bsv9, cv)
268  cv = vec_madd(av10, bsv10, cv)
269 
270  call vec_st(cv, 0, c(i,j))
271 
272  bv1 = vec_ld(0,b(13,j))
273  bv2 = vec_ld(0,b(17,j))
274 
275  bsv1 = vec_splat(bv3, 2)
276  bsv2 = vec_splat(bv3, 3)
277  bsv3 = vec_splat(bv1, 0)
278  bsv4 = vec_splat(bv1, 1)
279  bsv5 = vec_splat(bv1, 2)
280  bsv6 = vec_splat(bv1, 3)
281  bsv7 = vec_splat(bv2, 0)
282  bsv8 = vec_splat(bv2, 1)
283  bsv9 = vec_splat(bv2, 2)
284  bsv10 = vec_splat(bv2, 3)
285 
286  cv = vec_mul(av1, bsv1)
287  cv = vec_madd(av2, bsv2, cv)
288  cv = vec_madd(av3, bsv3, cv)
289  cv = vec_madd(av4, bsv4, cv)
290  cv = vec_madd(av5, bsv5, cv)
291  cv = vec_madd(av6, bsv6, cv)
292  cv = vec_madd(av7, bsv7, cv)
293  cv = vec_madd(av8, bsv8, cv)
294  cv = vec_madd(av9, bsv9, cv)
295  cv = vec_madd(av10, bsv10, cv)
296 
297  call vec_st(cv, 0, c(i,j+1))
298  end do
299  end do
300  return
301  end subroutine mxm_bgq_10
302 
subroutine mxm_bgq_16(a, n1, b, n2, c, n3)
Definition: mxm_bgq.f:56
subroutine mxm_bgq_10(a, n1, b, n2, c, n3)
Definition: mxm_bgq.f:212
subroutine mxm_bgq_8(a, n1, b, n2, c, n3)
Definition: mxm_bgq.f:2
subroutine mxm_bgq_6(a, n1, b, n2, c, n3)
Definition: mxm_bgq.f:143