-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathprobe_heat_circqueue.c
174 lines (153 loc) · 5.85 KB
/
probe_heat_circqueue.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
/* Circular queue stencil code
* Kaushik Datta ([email protected])
* University of California Berkeley
*
* This code implements the circular queue algorithm for stencil codes.
* Intermediate queues, each with three revolving planes, store temporary
* results until the final result is written to the target array. Unlike
* the time skewing algorithm, this algorithm will perform redundant
* computation between adjacent slabs.
*
* NOTE: Only the cache block's y-dimension is used in this code; it
* specifies the size of the circular queue's y-dimension. The grid's
* y-dimension needs to be a multiple of the cache block's y-dimension.
*/
#include <stdio.h>
#include <stdlib.h>
#include "common.h"
#define MAX(x,y) (x > y ? x : y)
double *queuePlanes, *queuePlane0, *queuePlane1, *queuePlane2;
int *queuePlanesIndices;
/* This method creates the circular queues that will be needed for the
circular_queue() method. It is only called when more than one iteration
is being performed. */
void CircularQueueInit(int nx, int ty, int timesteps) {
int numPointsInQueuePlane, t;
queuePlanesIndices = (int *) malloc((timesteps-1) * sizeof(int));
if (queuePlanesIndices==NULL) {
printf("Error on array queuePlanesIndices malloc.\n");
exit(EXIT_FAILURE);
}
int queuePlanesIndexPtr = 0;
for (t=1; t < timesteps; t++) {
queuePlanesIndices[t-1] = queuePlanesIndexPtr;
numPointsInQueuePlane = (ty+2*(timesteps-t)) * nx;
queuePlanesIndexPtr += numPointsInQueuePlane;
}
queuePlanes = (double *) malloc(3 * queuePlanesIndexPtr * sizeof(double));
if (queuePlanes==NULL) {
printf("Error on array queuePlanes malloc.\n");
exit(EXIT_FAILURE);
}
queuePlane0 = queuePlanes;
queuePlane1 = &queuePlanes[queuePlanesIndexPtr];
queuePlane2 = &queuePlanes[2 * queuePlanesIndexPtr];
}
/* This method traverses each slab and uses the circular queues to perform the
specified number of iterations. The circular queue at a given timestep is
shrunken in the y-dimension from the circular queue at the previous timestep. */
#ifdef STENCILTEST
void StencilProbe_circqueue(double *A0, double *Anext, int nx, int ny, int nz,
int tx, int ty, int tz, int timesteps) {
#else
void StencilProbe(double *A0, double *Anext, int nx, int ny, int nz,
int tx, int ty, int tz, int timesteps) {
#endif
double *readQueuePlane0, *readQueuePlane1, *readQueuePlane2, *writeQueuePlane, *tempQueuePlane;
int blockMin_y, blockMax_y;
int writeBlockMin_y, writeBlockMax_y;
int writeBlockRealMin_y, writeBlockRealMax_y;
int readBlockUnitStride_y, writeBlockUnitStride_y;
int readOffset, writeOffset;
int i, j, k, s, t;
double fac = A0[0];
int numBlocks_y = (ny-2)/ty;
for (s=0; s < numBlocks_y; s++) {
for (k=1; k < (nz+timesteps-2); k++) {
for (t=0; t < timesteps; t++) {
if ((k > t) && (k < (nz+t-1))) {
if (t == 0) {
readQueuePlane0 = &A0[Index3D(nx, ny, 0, 0, k-1)];
readQueuePlane1 = &A0[Index3D(nx, ny, 0, 0, k)];
readQueuePlane2 = &A0[Index3D(nx, ny, 0, 0, k+1)];
}
else {
readQueuePlane0 = &queuePlane0[queuePlanesIndices[t-1]];
readQueuePlane1 = &queuePlane1[queuePlanesIndices[t-1]];
readQueuePlane2 = &queuePlane2[queuePlanesIndices[t-1]];
}
// determine the edges of the queues
writeBlockMin_y = s * ty - (timesteps-t) + 2;
writeBlockMax_y = (s+1) * ty + (timesteps-t);
writeBlockRealMin_y = writeBlockMin_y;
writeBlockRealMax_y = writeBlockMax_y;
if (writeBlockMin_y < 1) {
writeBlockMin_y = 0;
writeBlockRealMin_y = 1;
}
if (writeBlockMax_y > (ny-1)) {
writeBlockMax_y = ny;
writeBlockRealMax_y = ny-1;
}
if (t == (timesteps-1)) {
writeQueuePlane = Anext;
writeOffset = 0;
}
else {
writeQueuePlane = &queuePlane2[queuePlanesIndices[t]];
writeOffset = Index3D(nx, ny, 0, writeBlockMin_y, k-t);
}
if ((writeBlockMin_y == 0) || (t == 0)) {
readOffset = Index3D(nx, ny, 0, 0, k-t);
}
else {
readOffset = Index3D(nx, ny, 0, writeBlockMin_y-1, k-t);
}
// use ghost cells for the bottommost and topmost planes
if (k == (t+1)) {
readQueuePlane0 = A0;
}
if (k == (nz+t-2)) {
readQueuePlane2 = &A0[Index3D(nx, ny, 0, 0, nz-1)];
}
// copy ghost cells
if (t < (timesteps-1)) {
for (j=(writeBlockMin_y+1); j < (writeBlockMax_y-1); j++) {
writeQueuePlane[Index3D(nx, ny, 0, j, k-t) - writeOffset] = readQueuePlane1[Index3D(nx, ny, 0, j, k-t) - readOffset];
writeQueuePlane[Index3D(nx, ny, nx-1, j, k-t) - writeOffset] = readQueuePlane1[Index3D(nx, ny, nx-1, j, k-t) - readOffset];
}
if (writeBlockMin_y == 0) {
for (i=1; i < (nx-1); i++) {
writeQueuePlane[Index3D(nx, ny, i, writeBlockMin_y, k-t) - writeOffset] = readQueuePlane1[Index3D(nx, ny, i, writeBlockMin_y, k-t) - readOffset];
}
}
if (writeBlockMax_y == ny) {
for (i=1; i < (nx-1); i++) {
writeQueuePlane[Index3D(nx, ny, i, writeBlockRealMax_y, k-t) - writeOffset] = readQueuePlane1[Index3D(nx, ny, i, writeBlockRealMax_y, k-t) - readOffset];
}
}
}
// actual calculations
for (j=writeBlockRealMin_y; j < writeBlockRealMax_y; j++) {
for (i=1; i < (nx-1); i++) {
writeQueuePlane[Index3D(nx, ny, i, j, k-t) - writeOffset] =
readQueuePlane0[Index3D(nx, ny, i, j, k-t) - readOffset] +
readQueuePlane2[Index3D(nx, ny, i, j, k-t) - readOffset] +
readQueuePlane1[Index3D(nx, ny, i, j-1, k-t) - readOffset] +
readQueuePlane1[Index3D(nx, ny, i-1, j, k-t) - readOffset] +
readQueuePlane1[Index3D(nx, ny, i+1, j, k-t) - readOffset] +
readQueuePlane1[Index3D(nx, ny, i, j+1, k-t) - readOffset]
- 6.0 * readQueuePlane1[Index3D(nx, ny, i, j, k-t) - readOffset] / (fac*fac);
}
}
}
}
if (t > 0) {
tempQueuePlane = queuePlane0;
queuePlane0 = queuePlane1;
queuePlane1 = queuePlane2;
queuePlane2 = tempQueuePlane;
}
}
}
}