1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
|
//gcc -msse2 -O3 -ftree-vectorize test_dcl.c dclhelpers.c os_generic.c -DFLT=double -lpthread -lcblas && valgrind ./a.out
#include "dclhelpers.h"
#include "os_generic.h"
#include <assert.h>
#include <cblas.h>
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
void compareToCblas() {
FLT em1[12][20];
FLT em2[20][20];
FLT emo[2][20][20] = {};
int x, y;
for (y = 0; y < 12; y++)
for (x = 0; x < 20; x++)
em1[y][x] = (rand() % 1000) / 1000.0;
for (y = 0; y < 20; y++)
for (x = 0; x < 20; x++)
em2[y][x] = (rand() % 1000) / 1000.0;
int m = 12;
int n = 20;
int k = 20;
dclPrint(DMS(em1), 12, 20);
dclPrint(DMS(em2), 20, 12);
double times[2];
for (int z = 0; z < 2; z++) {
double start = OGGetAbsoluteTime();
for (int i = 0; i < 100000; i++) {
dclZero(DMS(emo[z]), 20, 20);
if (z == 0) {
dcldgemm(0, 0, m, n, k, 1.0, DMS(em1), DMS(em2), .1, DMS(emo[z]));
} else {
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, 1.0, DMS(em1), DMS(em2), .1,
DMS(emo[z]));
}
/*void cblas_dgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA,
CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const double alpha, const double *A,
const int lda, const double *B, const int ldb,
const double beta, double *C, const int ldc);*/
}
printf("%s Elapsed: %f\n", z ? "CBlas" : "dcl", times[z] = OGGetAbsoluteTime() - start);
}
printf("%fx difference\n", times[0] / times[1]);
dclPrint(emo[0][0], 12, 20, 12);
dclPrint(emo[1][0], 12, 20, 12);
}
int main()
{
FLT A[2][4] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7} };
FLT B[4][2];
dclPrint( A[0], 4, 2, 4 );
dclTransp( B[0], 2, A[0], 4, 2, 4 );
dclPrint( B[0], 2, 4, 2 );
int i, j;
for( i = 0; i < 8; i++ )
{
printf( "%f\n", ((float*)(B[0]))[i] );
}
FLT M[3][3] = {
{ .32, 1, 0 },
{ 0, 1, 2 },
{ 1, 0, 1 } };
FLT Mo[3][3];
dclInv( Mo[0], 3, M[0], 3, 3 );
dclPrint( Mo[0], 3, 3, 3 );
FLT MM[3][3];
dclMul( MM[0], 3, M[0], 3, Mo[0], 3, 3, 3, 3 );
printf( "The following should be an identity matrix\n" );
dclPrint( MM[0], 3, 3, 3 );
{
FLT A[3][4];
dclIdentity( DMS(A), 3, 4);
dclPrint( DMS(A), 3, 4);
FLT x[4][2] = {
{7, -7}, {8, -8}, {9, -9}, {10, -10},
};
FLT R[4][2];
dclZero( DMS(R), 4, 2 );
// dclMul(R, 1, A[0], 4, x, 1, 4, 1, 3);
dcldgemm(0, 0, 3, 4, 2, 1, A[0], 4, x[0], 2, 0, R[0], 2);
dclPrint(DMS(x), 4, 2);
dclPrint(DMS(R), 3, 2);
for (int j = 0; j < 2; j++) {
for (int i = 0; i < 3; i++)
{
printf( "[%d][%d]\n", i, j );
assert(R[i][j] == x[i][j]);
}
assert(fabs(R[3][j]) < .0000001);
}
}
compareToCblas();
}
|