From 768fc5d07c35a583b3a1657ca617041ee4e2f0ee Mon Sep 17 00:00:00 2001 From: David Schleef Date: Sun, 18 Oct 2009 21:02:30 -0700 Subject: [PATCH] sse: Special case for constant n programs --- orc-test/orctest.c | 15 ++++-- orc/orcprogram-sse.c | 109 ++++++++++++++++++++++++++----------------- testsuite/exec_opcodes_sys.c | 75 +++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+), 47 deletions(-) diff --git a/orc-test/orctest.c b/orc-test/orctest.c index 5ba4c7c..e3ef888 100644 --- a/orc-test/orctest.c +++ b/orc-test/orctest.c @@ -292,7 +292,7 @@ OrcTestResult orc_test_compare_output_full (OrcProgram *program, int flags) { OrcExecutor *ex; - int n = 64 + (orc_random(&rand_context)&0xf); + int n; int m; OrcArray *dest_exec[4] = { NULL, NULL, NULL, NULL }; OrcArray *dest_emul[4] = { NULL, NULL, NULL, NULL }; @@ -308,7 +308,6 @@ orc_test_compare_output_full (OrcProgram *program, int flags) ORC_DEBUG ("got here"); -flags |= ORC_TEST_FLAGS_FLOAT; if (!(flags & ORC_TEST_FLAGS_BACKUP)) { OrcTarget *target; unsigned int flags; @@ -322,10 +321,20 @@ flags |= ORC_TEST_FLAGS_FLOAT; } } + if (program->constant_n > 0) { + n = program->constant_n; + } else { + n = 64 + (orc_random(&rand_context)&0xf); + } + ex = orc_executor_new (program); orc_executor_set_n (ex, n); if (program->is_2d) { - m = 8 + (orc_random(&rand_context)&0xf); + if (program->constant_m > 0) { + m = program->constant_m; + } else { + m = 8 + (orc_random(&rand_context)&0xf); + } } else { m = 1; } diff --git a/orc/orcprogram-sse.c b/orc/orcprogram-sse.c index f28b89d..5852716 100644 --- a/orc/orcprogram-sse.c +++ b/orc/orcprogram-sse.c @@ -16,6 +16,7 @@ #define SIZE 65536 +#define ORC_SSE_ALIGNED_DEST_CUTOFF 64 void orc_sse_emit_loop (OrcCompiler *compiler); @@ -535,65 +536,85 @@ orc_compiler_sse_assemble (OrcCompiler *compiler) sse_load_constants_inner (compiler); - if (compiler->loop_shift > 0) { + if (compiler->program->constant_n > 0 && + compiler->program->constant_n <= ORC_SSE_ALIGNED_DEST_CUTOFF) { + int n_left = compiler->program->constant_n; int save_loop_shift; - int l; + int loop_shift; save_loop_shift = compiler->loop_shift; - compiler->vars[align_var].is_aligned = FALSE; - - for (l=0;lloop_shift = l; - ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift); - - orc_x86_emit_test_imm_memoffset (compiler, 4, 1<loop_shift, - (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), compiler->exec_reg); - orc_x86_emit_je (compiler, 12 + compiler->loop_shift); + while (n_left >= (1<loop_shift)) { orc_sse_emit_loop (compiler); - orc_x86_emit_label (compiler, 12 + compiler->loop_shift); - } + n_left -= 1<loop_shift; + } + for(loop_shift = compiler->loop_shift-1; loop_shift>=0; loop_shift--) { + if (n_left >= (1<loop_shift = loop_shift; + orc_sse_emit_loop (compiler); + n_left -= 1<loop_shift = save_loop_shift; + } else { + if (compiler->loop_shift > 0) { + int save_loop_shift; + int l; + + save_loop_shift = compiler->loop_shift; + compiler->vars[align_var].is_aligned = FALSE; + + for (l=0;lloop_shift = l; + ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift); + + orc_x86_emit_test_imm_memoffset (compiler, 4, 1<loop_shift, + (int)ORC_STRUCT_OFFSET(OrcExecutor,counter1), compiler->exec_reg); + orc_x86_emit_je (compiler, 12 + compiler->loop_shift); + orc_sse_emit_loop (compiler); + orc_x86_emit_label (compiler, 12 + compiler->loop_shift); + } - compiler->loop_shift = save_loop_shift; - compiler->vars[align_var].is_aligned = TRUE; - } + compiler->loop_shift = save_loop_shift; + compiler->vars[align_var].is_aligned = TRUE; + } - orc_x86_emit_label (compiler, 1); + orc_x86_emit_label (compiler, 1); - orc_x86_emit_cmp_imm_memoffset (compiler, 4, 0, - (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), compiler->exec_reg); - orc_x86_emit_je (compiler, 3); + orc_x86_emit_cmp_imm_memoffset (compiler, 4, 0, + (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), compiler->exec_reg); + orc_x86_emit_je (compiler, 3); + + ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift); + orc_x86_emit_align (compiler); + orc_x86_emit_label (compiler, 2); + orc_sse_emit_loop (compiler); + orc_x86_emit_dec_memoffset (compiler, 4, + (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), + compiler->exec_reg); + orc_x86_emit_jne (compiler, 2); + orc_x86_emit_label (compiler, 3); - ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift); - orc_x86_emit_align (compiler); - orc_x86_emit_label (compiler, 2); - orc_sse_emit_loop (compiler); - orc_x86_emit_dec_memoffset (compiler, 4, - (int)ORC_STRUCT_OFFSET(OrcExecutor,counter2), - compiler->exec_reg); - orc_x86_emit_jne (compiler, 2); - orc_x86_emit_label (compiler, 3); + if (compiler->loop_shift > 0) { + int save_loop_shift; + int l; - if (compiler->loop_shift > 0) { - int save_loop_shift; - int l; + save_loop_shift = compiler->loop_shift; + compiler->vars[align_var].is_aligned = FALSE; - save_loop_shift = compiler->loop_shift; - compiler->vars[align_var].is_aligned = FALSE; + for(l=save_loop_shift - 1; l >= 0; l--) { + compiler->loop_shift = l; + ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift); - for(l=save_loop_shift - 1; l >= 0; l--) { - compiler->loop_shift = l; - ORC_ASM_CODE(compiler, "# LOOP SHIFT %d\n", compiler->loop_shift); + orc_x86_emit_test_imm_memoffset (compiler, 4, 1<loop_shift, + (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), compiler->exec_reg); + orc_x86_emit_je (compiler, 8 + compiler->loop_shift); + orc_sse_emit_loop (compiler); + orc_x86_emit_label (compiler, 8 + compiler->loop_shift); + } - orc_x86_emit_test_imm_memoffset (compiler, 4, 1<loop_shift, - (int)ORC_STRUCT_OFFSET(OrcExecutor,counter3), compiler->exec_reg); - orc_x86_emit_je (compiler, 8 + compiler->loop_shift); - orc_sse_emit_loop (compiler); - orc_x86_emit_label (compiler, 8 + compiler->loop_shift); + compiler->loop_shift = save_loop_shift; } - - compiler->loop_shift = save_loop_shift; } if (compiler->program->is_2d) { diff --git a/testsuite/exec_opcodes_sys.c b/testsuite/exec_opcodes_sys.c index 84b880c..a6cd99f 100644 --- a/testsuite/exec_opcodes_sys.c +++ b/testsuite/exec_opcodes_sys.c @@ -14,6 +14,8 @@ void test_opcode_const (OrcStaticOpcode *opcode); void test_opcode_param (OrcStaticOpcode *opcode); void test_opcode_inplace (OrcStaticOpcode *opcode); void test_opcode_src_2d (OrcStaticOpcode *opcode); +void test_opcode_src_const_n (OrcStaticOpcode *opcode); +void test_opcode_src_const_n_2d (OrcStaticOpcode *opcode); int main (int argc, char *argv[]) @@ -66,6 +68,22 @@ main (int argc, char *argv[]) opcode_set->opcodes[i].src_size[1]); test_opcode_src_2d (opcode_set->opcodes + i); } + for(i=0;in_opcodes;i++){ + printf("/* %s src const n %d,%d,%d */\n", + opcode_set->opcodes[i].name, + opcode_set->opcodes[i].dest_size[0], + opcode_set->opcodes[i].src_size[0], + opcode_set->opcodes[i].src_size[1]); + test_opcode_src_const_n (opcode_set->opcodes + i); + } + for(i=0;in_opcodes;i++){ + printf("/* %s src const n 2d %d,%d,%d */\n", + opcode_set->opcodes[i].name, + opcode_set->opcodes[i].dest_size[0], + opcode_set->opcodes[i].src_size[0], + opcode_set->opcodes[i].src_size[1]); + test_opcode_src_const_n_2d (opcode_set->opcodes + i); + } if (error) return 1; return 0; @@ -207,3 +225,60 @@ test_opcode_src_2d (OrcStaticOpcode *opcode) orc_program_free (p); } +void +test_opcode_src_const_n (OrcStaticOpcode *opcode) +{ + OrcProgram *p; + char s[40]; + int ret; + + if (opcode->src_size[1] == 0) { + p = orc_program_new_ds (opcode->dest_size[0], opcode->src_size[0]); + } else { + p = orc_program_new_dss (opcode->dest_size[0], opcode->src_size[0], + opcode->src_size[1]); + } + + sprintf(s, "test_s_%s", opcode->name); + orc_program_set_name (p, s); + orc_program_set_constant_n (p, 8); + + orc_program_append_str (p, opcode->name, "d1", "s1", "s2"); + + ret = orc_test_compare_output (p); + if (!ret) { + error = TRUE; + } + + orc_program_free (p); +} + +void +test_opcode_src_const_n_2d (OrcStaticOpcode *opcode) +{ + OrcProgram *p; + char s[40]; + int ret; + + if (opcode->src_size[1] == 0) { + p = orc_program_new_ds (opcode->dest_size[0], opcode->src_size[0]); + } else { + p = orc_program_new_dss (opcode->dest_size[0], opcode->src_size[0], + opcode->src_size[1]); + } + + sprintf(s, "test_s_%s", opcode->name); + orc_program_set_name (p, s); + orc_program_set_2d (p); + orc_program_set_constant_n (p, 8); + + orc_program_append_str (p, opcode->name, "d1", "s1", "s2"); + + ret = orc_test_compare_output (p); + if (!ret) { + error = TRUE; + } + + orc_program_free (p); +} + -- 2.7.4