/* Generate reloads for matching OUT and INS (array of input operand
numbers with end marker -1) with reg class GOAL_CLASS. Add input
- and output reloads correspondingly to the lists *BEFORE and
- *AFTER. */
+ and output reloads correspondingly to the lists *BEFORE and *AFTER.
+ OUT might be negative. In this case we generate input reloads for
+ matched input operands INS. */
static void
match_reload (signed char out, signed char *ins, enum reg_class goal_class,
rtx *before, rtx *after)
rtx new_in_reg, new_out_reg, reg, clobber;
enum machine_mode inmode, outmode;
rtx in_rtx = *curr_id->operand_loc[ins[0]];
- rtx out_rtx = *curr_id->operand_loc[out];
+ rtx out_rtx = out < 0 ? in_rtx : *curr_id->operand_loc[out];
- outmode = curr_operand_mode[out];
inmode = curr_operand_mode[ins[0]];
+ outmode = out < 0 ? inmode : curr_operand_mode[out];
push_to_sequence (*before);
if (inmode != outmode)
{
= lra_create_new_reg_with_unique_value (outmode, out_rtx,
goal_class, "");
}
- /* In and out operand can be got from transformations before
- processing insn constraints. One example of such transformations
- is subreg reloading (see function simplify_operand_subreg). The
- new pseudos created by the transformations might have inaccurate
+ /* In operand can be got from transformations before processing insn
+ constraints. One example of such transformations is subreg
+ reloading (see function simplify_operand_subreg). The new
+ pseudos created by the transformations might have inaccurate
class (ALL_REGS) and we should make their classes more
accurate. */
narrow_reload_pseudo_class (in_rtx, goal_class);
- narrow_reload_pseudo_class (out_rtx, goal_class);
lra_emit_move (copy_rtx (new_in_reg), in_rtx);
*before = get_insns ();
end_sequence ();
*curr_id->operand_loc[in] = new_in_reg;
}
lra_update_dups (curr_id, ins);
+ if (out < 0)
+ return;
+ /* See a comment for the input operand above. */
+ narrow_reload_pseudo_class (out_rtx, goal_class);
if (find_reg_note (curr_insn, REG_UNUSED, out_rtx) == NULL_RTX)
{
start_sequence ();
int n_alternatives;
int commutative;
signed char goal_alt_matched[MAX_RECOG_OPERANDS][MAX_RECOG_OPERANDS];
+ signed char match_inputs[MAX_RECOG_OPERANDS + 1];
rtx before, after;
bool alt_p = false;
/* Flag that the insn has been changed through a transformation. */
&& (curr_static_id->operand[goal_alt_matched[i][0]].type
== OP_OUT))
{
- signed char arr[2];
-
- arr[0] = i;
- arr[1] = -1;
- match_reload (goal_alt_matched[i][0], arr,
+ /* generate reloads for input and matched outputs. */
+ match_inputs[0] = i;
+ match_inputs[1] = -1;
+ match_reload (goal_alt_matched[i][0], match_inputs,
goal_alt[i], &before, &after);
}
else if (curr_static_id->operand[i].type == OP_OUT
&& (curr_static_id->operand[goal_alt_matched[i][0]].type
== OP_IN))
+ /* Generate reloads for output and matched inputs. */
match_reload (i, goal_alt_matched[i], goal_alt[i], &before, &after);
+ else if (curr_static_id->operand[i].type == OP_IN
+ && (curr_static_id->operand[goal_alt_matched[i][0]].type
+ == OP_IN))
+ {
+ /* Generate reloads for matched inputs. */
+ match_inputs[0] = i;
+ for (j = 0; (k = goal_alt_matched[i][j]) >= 0; j++)
+ match_inputs[j + 1] = k;
+ match_inputs[j + 1] = -1;
+ match_reload (-1, match_inputs, goal_alt[i], &before, &after);
+ }
else
/* We must generate code in any case when function
process_alt_operands decides that it is possible. */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse3 -fno-expensive-optimizations" } */
+
+typedef double __m128d __attribute__ ((__vector_size__ (16)));
+
+extern double p1[];
+extern double p2[];
+extern double ck[];
+extern int n;
+
+__attribute__((__noinline__, __noclone__)) int chk_pd (double *v1, double *v2)
+{
+ return v2[n] != v1[n];
+}
+
+static inline void sse3_test_movddup_reg_subsume_ldsd (double *i1, double *r)
+{
+ __m128d t1 = (__m128d){*i1, 0};
+ __m128d t2 = __builtin_ia32_shufpd (t1, t1, 0);
+ __builtin_ia32_storeupd (r, t2);
+}
+
+int sse3_test (void)
+{
+ int i = 0;
+ int fail = 0;
+ for (; i < 80; i += 1)
+ {
+ ck[0] = p1[0];
+ fail += chk_pd (ck, p2);
+ sse3_test_movddup_reg_subsume_ldsd (p1, p2);
+ }
+ return fail;
+}