-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmmm.c
More file actions
75 lines (62 loc) · 2.18 KB
/
mmm.c
File metadata and controls
75 lines (62 loc) · 2.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#ifdef __cplusplus
extern "C" {
#endif
void mmm_( int *len, double *a, double *b, double*c );
#ifdef __cplusplus
}
#endif
/* S E R I A L C O D E */
void mmm_( int *len, double *a, double *b, double *c ){
int i, j, k;
int veclen = *len;
int mod;
#ifdef STRIP8
const int stride = 8;
mod = veclen % stride;
for (i=0; i<veclen; i++) {
for (j=0; j<veclen; j++) {
*(c+(i*veclen+j)) = 0.0;
for (k=0;k<mod;k++){
*(c+(i*veclen+j)) += *(a+(i*veclen+k)) * *(b+(k*veclen+j));
}
for (k=mod;k<veclen;k+=stride) {
*(c+(i*veclen+j)) += *(a+(i*veclen+k )) * *(b+( k *veclen+j))
+ *(a+(i*veclen+k+1)) * *(b+((k+1)*veclen+j))
+ *(a+(i*veclen+k+2)) * *(b+((k+2)*veclen+j))
+ *(a+(i*veclen+k+3)) * *(b+((k+3)*veclen+j))
+ *(a+(i*veclen+k+4)) * *(b+((k+4)*veclen+j))
+ *(a+(i*veclen+k+5)) * *(b+((k+5)*veclen+j))
+ *(a+(i*veclen+k+6)) * *(b+((k+6)*veclen+j))
+ *(a+(i*veclen+k+7)) * *(b+((k+7)*veclen+j));
}
}
}
#elif STRIP4
const int stride = 4;
mod = veclen % stride;
for (i=0; i<veclen; i++) {
for (j=0; j<veclen; j++) {
*(c+(i*veclen+j)) = 0.0;
for (k=0;k<mod;k++){
*(c+(i*veclen+j)) += *(a+(i*veclen+k)) * *(b+(k*veclen+j));
}
for (k=mod;k<veclen;k+=stride) {
*(c+(i*veclen+j)) += *(a+(i*veclen+k )) * *(b+( k *veclen+j))
+ *(a+(i*veclen+k+1)) * *(b+((k+1)*veclen+j))
+ *(a+(i*veclen+k+2)) * *(b+((k+2)*veclen+j))
+ *(a+(i*veclen+k+3)) * *(b+((k+3)*veclen+j));
}
}
}
#else
// Normal Matrix Multiplication
for (i=0; i<veclen; i++) {
for (j=0; j<veclen; j++) {
*(c+(i*veclen+j)) = 0.0;
for (k=0;k<veclen;k++){
*(c+(i*veclen+j)) += *(a+(i*veclen+k)) * *(b+(k*veclen+j));
}
}
}
#endif
}