#define MISALIGNMENT 0
OrcArray *
-orc_array_new (int n, int m, int element_size, int misalignment)
+orc_array_new (int n, int m, int element_size, int misalignment,
+ int alignment)
{
OrcArray *ar;
void *data;
#ifdef HAVE_POSIX_MEMALIGN
int ret;
#endif
+ int offset;
ar = malloc (sizeof(OrcArray));
memset (ar, 0, sizeof(OrcArray));
#endif
ar->alloc_data = data;
+ if (alignment == 0) alignment = element_size;
+ offset = (alignment * misalignment) & (ALIGNMENT - 1);
+
ar->data = ORC_PTR_OFFSET (ar->alloc_data,
- ar->stride * EXTEND_ROWS + element_size * misalignment);
+ ar->stride * EXTEND_ROWS + offset);
return ar;
}
ORC_PATTERN_FLOAT_DENORMAL
};
-OrcArray *orc_array_new (int n, int m, int element_size, int misalignment);
+OrcArray *orc_array_new (int n, int m, int element_size, int misalignment,
+ int alignment);
void orc_array_free (OrcArray *array);
void orc_array_set_pattern (OrcArray *array, int value);
if (program->vars[i].vartype == ORC_VAR_TYPE_SRC) {
src[i-ORC_VAR_S1] = orc_array_new (n, m, program->vars[i].size,
- misalignment);
+ misalignment, program->vars[i].alignment);
orc_array_set_random (src[i-ORC_VAR_S1], &rand_context);
misalignment++;
} else if (program->vars[i].vartype == ORC_VAR_TYPE_DEST) {
dest_exec[i-ORC_VAR_D1] = orc_array_new (n, m, program->vars[i].size,
- misalignment);
+ misalignment, program->vars[i].alignment);
orc_array_set_pattern (dest_exec[i], ORC_OOB_VALUE);
dest_emul[i-ORC_VAR_D1] = orc_array_new (n, m, program->vars[i].size,
- misalignment);
+ misalignment, program->vars[i].alignment);
orc_array_set_pattern (dest_emul[i], ORC_OOB_VALUE);
misalignment++;
} else if (program->vars[i].vartype == ORC_VAR_TYPE_PARAM) {
if (program->vars[i].vartype == ORC_VAR_TYPE_SRC) {
src[i-ORC_VAR_S1] = orc_array_new (n, m, program->vars[i].size,
- misalignment);
+ misalignment, program->vars[i].alignment);
orc_array_set_random (src[i-ORC_VAR_S1], &rand_context);
misalignment++;
} else if (program->vars[i].vartype == ORC_VAR_TYPE_DEST) {
dest_exec[i-ORC_VAR_D1] = orc_array_new (n, m, program->vars[i].size,
- misalignment);
+ misalignment, program->vars[i].alignment);
orc_array_set_pattern (dest_exec[i], ORC_OOB_VALUE);
dest_emul[i-ORC_VAR_D1] = orc_array_new (n, m, program->vars[i].size,
- misalignment);
+ misalignment, program->vars[i].alignment);
orc_array_set_pattern (dest_emul[i], ORC_OOB_VALUE);
misalignment++;
} else if (program->vars[i].vartype == ORC_VAR_TYPE_PARAM) {
if (program->vars[i].name == NULL) continue;
if (program->vars[i].vartype == ORC_VAR_TYPE_SRC) {
- src[i-ORC_VAR_S1] = orc_array_new (n, m, program->vars[i].size, 0);
+ src[i-ORC_VAR_S1] = orc_array_new (n, m, program->vars[i].size, 0, 0);
orc_array_set_random (src[i-ORC_VAR_S1], &rand_context);
} else if (program->vars[i].vartype == ORC_VAR_TYPE_DEST) {
- dest[i-ORC_VAR_D1] = orc_array_new (n, m, program->vars[i].size, 0);
+ dest[i-ORC_VAR_D1] = orc_array_new (n, m, program->vars[i].size, 0, 0);
orc_array_set_pattern (dest[i], ORC_OOB_VALUE);
} else if (program->vars[i].vartype == ORC_VAR_TYPE_PARAM) {
switch (program->vars[i].param_type) {
avgub v, t1, t2
+.function memcpy_aligned
+.dest 1 d1 align 16 void
+.source 1 s1 align 16 void
+
+copyb d1, s1
+
+
+.function memcpy_large
+.n minimum 4096
+.dest 1 d1 void
+.source 1 s1 void
+
+copyb d1, s1
+
+
+.function memcpy_small
+.n maximum 4096
+.dest 1 d1 void
+.source 1 s1 void
+
+copyb d1, s1
+
+
+.function memcpy_aligned_n16
+.n multiple 16
+.dest 1 d1 align 16 void
+.source 1 s1 align 16 void
+
+copyb d1, s1
+
+