1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
5 Free Software Foundation, Inc.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
33 #include "insn-config.h"
38 #include "langhooks.h"
42 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
43 unsigned HOST_WIDE_INT,
44 unsigned HOST_WIDE_INT, rtx);
45 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
46 unsigned HOST_WIDE_INT, rtx);
47 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
48 unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT, rtx, int);
51 static rtx mask_rtx (enum machine_mode, int, int, int);
52 static rtx lshift_value (enum machine_mode, rtx, int, int);
53 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
54 unsigned HOST_WIDE_INT, int);
55 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
56 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
57 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
59 /* Test whether a value is zero of a power of two. */
60 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
62 /* Nonzero means divides or modulus operations are relatively cheap for
63 powers of two, so don't use branches; emit the operation instead.
64 Usually, this will mean that the MD file will emit non-branch
67 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
68 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
70 #ifndef SLOW_UNALIGNED_ACCESS
71 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
74 /* For compilers that support multiple targets with different word sizes,
75 MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD. An example
76 is the H8/300(H) compiler. */
78 #ifndef MAX_BITS_PER_WORD
79 #define MAX_BITS_PER_WORD BITS_PER_WORD
82 /* Reduce conditional compilation elsewhere. */
85 #define CODE_FOR_insv CODE_FOR_nothing
86 #define gen_insv(a,b,c,d) NULL_RTX
90 #define CODE_FOR_extv CODE_FOR_nothing
91 #define gen_extv(a,b,c,d) NULL_RTX
95 #define CODE_FOR_extzv CODE_FOR_nothing
96 #define gen_extzv(a,b,c,d) NULL_RTX
99 /* Cost of various pieces of RTL. Note that some of these are indexed by
100 shift count and some by mode. */
101 static int zero_cost;
102 static int add_cost[NUM_MACHINE_MODES];
103 static int neg_cost[NUM_MACHINE_MODES];
104 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
105 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
106 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
107 static int mul_cost[NUM_MACHINE_MODES];
108 static int sdiv_cost[NUM_MACHINE_MODES];
109 static int udiv_cost[NUM_MACHINE_MODES];
110 static int mul_widen_cost[NUM_MACHINE_MODES];
111 static int mul_highpart_cost[NUM_MACHINE_MODES];
118 struct rtx_def reg; rtunion reg_fld[2];
119 struct rtx_def plus; rtunion plus_fld1;
121 struct rtx_def mult; rtunion mult_fld1;
122 struct rtx_def sdiv; rtunion sdiv_fld1;
123 struct rtx_def udiv; rtunion udiv_fld1;
125 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
126 struct rtx_def smod_32; rtunion smod_32_fld1;
127 struct rtx_def wide_mult; rtunion wide_mult_fld1;
128 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
129 struct rtx_def wide_trunc;
130 struct rtx_def shift; rtunion shift_fld1;
131 struct rtx_def shift_mult; rtunion shift_mult_fld1;
132 struct rtx_def shift_add; rtunion shift_add_fld1;
133 struct rtx_def shift_sub; rtunion shift_sub_fld1;
136 rtx pow2[MAX_BITS_PER_WORD];
137 rtx cint[MAX_BITS_PER_WORD];
139 enum machine_mode mode, wider_mode;
141 zero_cost = rtx_cost (const0_rtx, 0);
143 for (m = 1; m < MAX_BITS_PER_WORD; m++)
145 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
146 cint[m] = GEN_INT (m);
149 memset (&all, 0, sizeof all);
151 PUT_CODE (&all.reg, REG);
152 /* Avoid using hard regs in ways which may be unsupported. */
153 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
155 PUT_CODE (&all.plus, PLUS);
156 XEXP (&all.plus, 0) = &all.reg;
157 XEXP (&all.plus, 1) = &all.reg;
159 PUT_CODE (&all.neg, NEG);
160 XEXP (&all.neg, 0) = &all.reg;
162 PUT_CODE (&all.mult, MULT);
163 XEXP (&all.mult, 0) = &all.reg;
164 XEXP (&all.mult, 1) = &all.reg;
166 PUT_CODE (&all.sdiv, DIV);
167 XEXP (&all.sdiv, 0) = &all.reg;
168 XEXP (&all.sdiv, 1) = &all.reg;
170 PUT_CODE (&all.udiv, UDIV);
171 XEXP (&all.udiv, 0) = &all.reg;
172 XEXP (&all.udiv, 1) = &all.reg;
174 PUT_CODE (&all.sdiv_32, DIV);
175 XEXP (&all.sdiv_32, 0) = &all.reg;
176 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
178 PUT_CODE (&all.smod_32, MOD);
179 XEXP (&all.smod_32, 0) = &all.reg;
180 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
182 PUT_CODE (&all.zext, ZERO_EXTEND);
183 XEXP (&all.zext, 0) = &all.reg;
185 PUT_CODE (&all.wide_mult, MULT);
186 XEXP (&all.wide_mult, 0) = &all.zext;
187 XEXP (&all.wide_mult, 1) = &all.zext;
189 PUT_CODE (&all.wide_lshr, LSHIFTRT);
190 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
192 PUT_CODE (&all.wide_trunc, TRUNCATE);
193 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
195 PUT_CODE (&all.shift, ASHIFT);
196 XEXP (&all.shift, 0) = &all.reg;
198 PUT_CODE (&all.shift_mult, MULT);
199 XEXP (&all.shift_mult, 0) = &all.reg;
201 PUT_CODE (&all.shift_add, PLUS);
202 XEXP (&all.shift_add, 0) = &all.shift_mult;
203 XEXP (&all.shift_add, 1) = &all.reg;
205 PUT_CODE (&all.shift_sub, MINUS);
206 XEXP (&all.shift_sub, 0) = &all.shift_mult;
207 XEXP (&all.shift_sub, 1) = &all.reg;
209 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
211 mode = GET_MODE_WIDER_MODE (mode))
213 PUT_MODE (&all.reg, mode);
214 PUT_MODE (&all.plus, mode);
215 PUT_MODE (&all.neg, mode);
216 PUT_MODE (&all.mult, mode);
217 PUT_MODE (&all.sdiv, mode);
218 PUT_MODE (&all.udiv, mode);
219 PUT_MODE (&all.sdiv_32, mode);
220 PUT_MODE (&all.smod_32, mode);
221 PUT_MODE (&all.wide_trunc, mode);
222 PUT_MODE (&all.shift, mode);
223 PUT_MODE (&all.shift_mult, mode);
224 PUT_MODE (&all.shift_add, mode);
225 PUT_MODE (&all.shift_sub, mode);
227 add_cost[mode] = rtx_cost (&all.plus, SET);
228 neg_cost[mode] = rtx_cost (&all.neg, SET);
229 mul_cost[mode] = rtx_cost (&all.mult, SET);
230 sdiv_cost[mode] = rtx_cost (&all.sdiv, SET);
231 udiv_cost[mode] = rtx_cost (&all.udiv, SET);
233 sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET)
234 <= 2 * add_cost[mode]);
235 smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET)
236 <= 4 * add_cost[mode]);
238 wider_mode = GET_MODE_WIDER_MODE (mode);
239 if (wider_mode != VOIDmode)
241 PUT_MODE (&all.zext, wider_mode);
242 PUT_MODE (&all.wide_mult, wider_mode);
243 PUT_MODE (&all.wide_lshr, wider_mode);
244 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
246 mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
247 mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
250 shift_cost[mode][0] = 0;
251 shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
253 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
254 for (m = 1; m < n; m++)
256 XEXP (&all.shift, 1) = cint[m];
257 XEXP (&all.shift_mult, 1) = pow2[m];
259 shift_cost[mode][m] = rtx_cost (&all.shift, SET);
260 shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
261 shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
266 /* Return an rtx representing minus the value of X.
267 MODE is the intended mode of the result,
268 useful if X is a CONST_INT. */
271 negate_rtx (enum machine_mode mode, rtx x)
273 rtx result = simplify_unary_operation (NEG, mode, x, mode);
276 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
281 /* Report on the availability of insv/extv/extzv and the desired mode
282 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
283 is false; else the mode of the specified operand. If OPNO is -1,
284 all the caller cares about is whether the insn is available. */
286 mode_for_extraction (enum extraction_pattern pattern, int opno)
288 const struct insn_data *data;
295 data = &insn_data[CODE_FOR_insv];
298 return MAX_MACHINE_MODE;
303 data = &insn_data[CODE_FOR_extv];
306 return MAX_MACHINE_MODE;
311 data = &insn_data[CODE_FOR_extzv];
314 return MAX_MACHINE_MODE;
323 /* Everyone who uses this function used to follow it with
324 if (result == VOIDmode) result = word_mode; */
325 if (data->operand[opno].mode == VOIDmode)
327 return data->operand[opno].mode;
330 /* Return true if X, of mode MODE, matches the predicate for operand
331 OPNO of instruction ICODE. Allow volatile memories, regardless of
332 the ambient volatile_ok setting. */
335 check_predicate_volatile_ok (enum insn_code icode, int opno,
336 rtx x, enum machine_mode mode)
338 bool save_volatile_ok, result;
340 save_volatile_ok = volatile_ok;
341 result = insn_data[(int) icode].operand[opno].predicate (x, mode);
342 volatile_ok = save_volatile_ok;
346 /* A subroutine of store_bit_field, with the same arguments. Return true
347 if the operation could be implemented.
349 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
350 no other way of implementing the operation. If FALLBACK_P is false,
351 return false instead. */
354 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
355 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
356 rtx value, bool fallback_p)
359 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
360 unsigned HOST_WIDE_INT offset, bitpos;
365 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
367 while (GET_CODE (op0) == SUBREG)
369 /* The following line once was done only if WORDS_BIG_ENDIAN,
370 but I think that is a mistake. WORDS_BIG_ENDIAN is
371 meaningful at a much higher level; when structures are copied
372 between memory and regs, the higher-numbered regs
373 always get higher addresses. */
374 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
375 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
379 /* Paradoxical subregs need special handling on big endian machines. */
380 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
382 int difference = inner_mode_size - outer_mode_size;
384 if (WORDS_BIG_ENDIAN)
385 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
386 if (BYTES_BIG_ENDIAN)
387 byte_offset += difference % UNITS_PER_WORD;
390 byte_offset = SUBREG_BYTE (op0);
392 bitnum += byte_offset * BITS_PER_UNIT;
393 op0 = SUBREG_REG (op0);
396 /* No action is needed if the target is a register and if the field
397 lies completely outside that register. This can occur if the source
398 code contains an out-of-bounds access to a small array. */
399 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
402 /* Use vec_set patterns for inserting parts of vectors whenever
404 if (VECTOR_MODE_P (GET_MODE (op0))
406 && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code
408 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
409 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
410 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
412 enum machine_mode outermode = GET_MODE (op0);
413 enum machine_mode innermode = GET_MODE_INNER (outermode);
414 int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code;
415 int pos = bitnum / GET_MODE_BITSIZE (innermode);
416 rtx rtxpos = GEN_INT (pos);
420 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
421 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
422 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
426 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
427 src = copy_to_mode_reg (mode1, src);
429 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
430 rtxpos = copy_to_mode_reg (mode1, rtxpos);
432 /* We could handle this, but we should always be called with a pseudo
433 for our targets and all insns should take them as outputs. */
434 gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
435 && (*insn_data[icode].operand[1].predicate) (src, mode1)
436 && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
437 pat = GEN_FCN (icode) (dest, src, rtxpos);
448 /* If the target is a register, overwriting the entire object, or storing
449 a full-word or multi-word field can be done with just a SUBREG.
451 If the target is memory, storing any naturally aligned field can be
452 done with a simple store. For targets that support fast unaligned
453 memory, any naturally sized, unit aligned field can be done directly. */
455 offset = bitnum / unit;
456 bitpos = bitnum % unit;
457 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
458 + (offset * UNITS_PER_WORD);
461 && bitsize == GET_MODE_BITSIZE (fieldmode)
463 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
464 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
465 && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
466 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
467 || (offset * BITS_PER_UNIT % bitsize == 0
468 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
471 op0 = adjust_address (op0, fieldmode, offset);
472 else if (GET_MODE (op0) != fieldmode)
473 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
475 emit_move_insn (op0, value);
479 /* Make sure we are playing with integral modes. Pun with subregs
480 if we aren't. This must come after the entire register case above,
481 since that case is valid for any mode. The following cases are only
482 valid for integral modes. */
484 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
485 if (imode != GET_MODE (op0))
488 op0 = adjust_address (op0, imode, 0);
491 gcc_assert (imode != BLKmode);
492 op0 = gen_lowpart (imode, op0);
497 /* We may be accessing data outside the field, which means
498 we can alias adjacent data. */
501 op0 = shallow_copy_rtx (op0);
502 set_mem_alias_set (op0, 0);
503 set_mem_expr (op0, 0);
506 /* If OP0 is a register, BITPOS must count within a word.
507 But as we have it, it counts within whatever size OP0 now has.
508 On a bigendian machine, these are not the same, so convert. */
511 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
512 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
514 /* Storing an lsb-aligned field in a register
515 can be done with a movestrict instruction. */
518 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
519 && bitsize == GET_MODE_BITSIZE (fieldmode)
520 && (optab_handler (movstrict_optab, fieldmode)->insn_code
521 != CODE_FOR_nothing))
523 int icode = optab_handler (movstrict_optab, fieldmode)->insn_code;
525 rtx start = get_last_insn ();
527 /* Get appropriate low part of the value being stored. */
528 if (GET_CODE (value) == CONST_INT || REG_P (value))
529 value = gen_lowpart (fieldmode, value);
530 else if (!(GET_CODE (value) == SYMBOL_REF
531 || GET_CODE (value) == LABEL_REF
532 || GET_CODE (value) == CONST))
533 value = convert_to_mode (fieldmode, value, 0);
535 if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
536 value = copy_to_mode_reg (fieldmode, value);
538 if (GET_CODE (op0) == SUBREG)
540 /* Else we've got some float mode source being extracted into
541 a different float mode destination -- this combination of
542 subregs results in Severe Tire Damage. */
543 gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
544 || GET_MODE_CLASS (fieldmode) == MODE_INT
545 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
546 op0 = SUBREG_REG (op0);
549 insn = (GEN_FCN (icode)
550 (gen_rtx_SUBREG (fieldmode, op0,
551 (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
552 + (offset * UNITS_PER_WORD)),
559 delete_insns_since (start);
562 /* Handle fields bigger than a word. */
564 if (bitsize > BITS_PER_WORD)
566 /* Here we transfer the words of the field
567 in the order least significant first.
568 This is because the most significant word is the one which may
570 However, only do that if the value is not BLKmode. */
572 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
573 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
577 /* This is the mode we must force value to, so that there will be enough
578 subwords to extract. Note that fieldmode will often (always?) be
579 VOIDmode, because that is what store_field uses to indicate that this
580 is a bit field, but passing VOIDmode to operand_subword_force
582 fieldmode = GET_MODE (value);
583 if (fieldmode == VOIDmode)
584 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
586 last = get_last_insn ();
587 for (i = 0; i < nwords; i++)
589 /* If I is 0, use the low-order word in both field and target;
590 if I is 1, use the next to lowest word; and so on. */
591 unsigned int wordnum = (backwards ? nwords - i - 1 : i);
592 unsigned int bit_offset = (backwards
593 ? MAX ((int) bitsize - ((int) i + 1)
596 : (int) i * BITS_PER_WORD);
597 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
599 if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
600 bitsize - i * BITS_PER_WORD),
601 bitnum + bit_offset, word_mode,
602 value_word, fallback_p))
604 delete_insns_since (last);
611 /* From here on we can assume that the field to be stored in is
612 a full-word (whatever type that is), since it is shorter than a word. */
614 /* OFFSET is the number of words or bytes (UNIT says which)
615 from STR_RTX to the first word or byte containing part of the field. */
620 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
624 /* Since this is a destination (lvalue), we can't copy
625 it to a pseudo. We can remove a SUBREG that does not
626 change the size of the operand. Such a SUBREG may
627 have been added above. */
628 gcc_assert (GET_CODE (op0) == SUBREG
629 && (GET_MODE_SIZE (GET_MODE (op0))
630 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
631 op0 = SUBREG_REG (op0);
633 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
634 op0, (offset * UNITS_PER_WORD));
639 /* If VALUE has a floating-point or complex mode, access it as an
640 integer of the corresponding size. This can occur on a machine
641 with 64 bit registers that uses SFmode for float. It can also
642 occur for unaligned float or complex fields. */
644 if (GET_MODE (value) != VOIDmode
645 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
646 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
648 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
649 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
652 /* Now OFFSET is nonzero only if OP0 is memory
653 and is therefore always measured in bytes. */
656 && GET_MODE (value) != BLKmode
658 && GET_MODE_BITSIZE (op_mode) >= bitsize
659 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
660 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
661 && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
663 && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
665 int xbitpos = bitpos;
668 rtx last = get_last_insn ();
671 /* Add OFFSET into OP0's address. */
673 xop0 = adjust_address (xop0, byte_mode, offset);
675 /* If xop0 is a register, we need it in OP_MODE
676 to make it acceptable to the format of insv. */
677 if (GET_CODE (xop0) == SUBREG)
678 /* We can't just change the mode, because this might clobber op0,
679 and we will need the original value of op0 if insv fails. */
680 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
681 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
682 xop0 = gen_rtx_SUBREG (op_mode, xop0, 0);
684 /* On big-endian machines, we count bits from the most significant.
685 If the bit field insn does not, we must invert. */
687 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
688 xbitpos = unit - bitsize - xbitpos;
690 /* We have been counting XBITPOS within UNIT.
691 Count instead within the size of the register. */
692 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
693 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
695 unit = GET_MODE_BITSIZE (op_mode);
697 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
699 if (GET_MODE (value) != op_mode)
701 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
703 /* Optimization: Don't bother really extending VALUE
704 if it has all the bits we will actually use. However,
705 if we must narrow it, be sure we do it correctly. */
707 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
711 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
713 tmp = simplify_gen_subreg (op_mode,
714 force_reg (GET_MODE (value),
716 GET_MODE (value), 0);
720 value1 = gen_lowpart (op_mode, value1);
722 else if (GET_CODE (value) == CONST_INT)
723 value1 = gen_int_mode (INTVAL (value), op_mode);
725 /* Parse phase is supposed to make VALUE's data type
726 match that of the component reference, which is a type
727 at least as wide as the field; so VALUE should have
728 a mode that corresponds to that type. */
729 gcc_assert (CONSTANT_P (value));
732 /* If this machine's insv insists on a register,
733 get VALUE1 into a register. */
734 if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
736 value1 = force_reg (op_mode, value1);
738 pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
744 delete_insns_since (last);
747 /* If OP0 is a memory, try copying it to a register and seeing if a
748 cheap register alternative is available. */
749 if (HAVE_insv && MEM_P (op0))
751 enum machine_mode bestmode;
753 /* Get the mode to use for inserting into this field. If OP0 is
754 BLKmode, get the smallest mode consistent with the alignment. If
755 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
756 mode. Otherwise, use the smallest mode containing the field. */
758 if (GET_MODE (op0) == BLKmode
759 || (op_mode != MAX_MACHINE_MODE
760 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
761 bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
762 (op_mode == MAX_MACHINE_MODE
763 ? VOIDmode : op_mode),
764 MEM_VOLATILE_P (op0));
766 bestmode = GET_MODE (op0);
768 if (bestmode != VOIDmode
769 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
770 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
771 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
773 rtx last, tempreg, xop0;
774 unsigned HOST_WIDE_INT xoffset, xbitpos;
776 last = get_last_insn ();
778 /* Adjust address to point to the containing unit of
779 that mode. Compute the offset as a multiple of this unit,
780 counting in bytes. */
781 unit = GET_MODE_BITSIZE (bestmode);
782 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
783 xbitpos = bitnum % unit;
784 xop0 = adjust_address (op0, bestmode, xoffset);
786 /* Fetch that unit, store the bitfield in it, then store
788 tempreg = copy_to_reg (xop0);
789 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
790 fieldmode, orig_value, false))
792 emit_move_insn (xop0, tempreg);
795 delete_insns_since (last);
802 store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
806 /* Generate code to store value from rtx VALUE
807 into a bit-field within structure STR_RTX
808 containing BITSIZE bits starting at bit BITNUM.
809 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
812 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
813 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
816 if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
820 /* Use shifts and boolean operations to store VALUE
821 into a bit field of width BITSIZE
822 in a memory location specified by OP0 except offset by OFFSET bytes.
823 (OFFSET must be 0 if OP0 is a register.)
824 The field starts at position BITPOS within the byte.
825 (If OP0 is a register, it may be a full word or a narrower mode,
826 but BITPOS still counts within a full word,
827 which is significant on bigendian machines.) */
830 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
831 unsigned HOST_WIDE_INT bitsize,
832 unsigned HOST_WIDE_INT bitpos, rtx value)
834 enum machine_mode mode;
835 unsigned int total_bits = BITS_PER_WORD;
840 /* There is a case not handled here:
841 a structure with a known alignment of just a halfword
842 and a field split across two aligned halfwords within the structure.
843 Or likewise a structure with a known alignment of just a byte
844 and a field split across two bytes.
845 Such cases are not supposed to be able to occur. */
847 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
849 gcc_assert (!offset);
850 /* Special treatment for a bit field split across two registers. */
851 if (bitsize + bitpos > BITS_PER_WORD)
853 store_split_bit_field (op0, bitsize, bitpos, value);
859 /* Get the proper mode to use for this field. We want a mode that
860 includes the entire field. If such a mode would be larger than
861 a word, we won't be doing the extraction the normal way.
862 We don't want a mode bigger than the destination. */
864 mode = GET_MODE (op0);
865 if (GET_MODE_BITSIZE (mode) == 0
866 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
868 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
869 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
871 if (mode == VOIDmode)
873 /* The only way this should occur is if the field spans word
875 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
880 total_bits = GET_MODE_BITSIZE (mode);
882 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
883 be in the range 0 to total_bits-1, and put any excess bytes in
885 if (bitpos >= total_bits)
887 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
888 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
892 /* Get ref to an aligned byte, halfword, or word containing the field.
893 Adjust BITPOS to be position within a word,
894 and OFFSET to be the offset of that word.
895 Then alter OP0 to refer to that word. */
896 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
897 offset -= (offset % (total_bits / BITS_PER_UNIT));
898 op0 = adjust_address (op0, mode, offset);
901 mode = GET_MODE (op0);
903 /* Now MODE is either some integral mode for a MEM as OP0,
904 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
905 The bit field is contained entirely within OP0.
906 BITPOS is the starting bit number within OP0.
907 (OP0's mode may actually be narrower than MODE.) */
909 if (BYTES_BIG_ENDIAN)
910 /* BITPOS is the distance between our msb
911 and that of the containing datum.
912 Convert it to the distance from the lsb. */
913 bitpos = total_bits - bitsize - bitpos;
915 /* Now BITPOS is always the distance between our lsb
918 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
919 we must first convert its mode to MODE. */
921 if (GET_CODE (value) == CONST_INT)
923 HOST_WIDE_INT v = INTVAL (value);
925 if (bitsize < HOST_BITS_PER_WIDE_INT)
926 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
930 else if ((bitsize < HOST_BITS_PER_WIDE_INT
931 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
932 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
935 value = lshift_value (mode, value, bitpos, bitsize);
939 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
940 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
942 if (GET_MODE (value) != mode)
944 if ((REG_P (value) || GET_CODE (value) == SUBREG)
945 && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
946 value = gen_lowpart (mode, value);
948 value = convert_to_mode (mode, value, 1);
952 value = expand_binop (mode, and_optab, value,
953 mask_rtx (mode, 0, bitsize, 0),
954 NULL_RTX, 1, OPTAB_LIB_WIDEN);
956 value = expand_shift (LSHIFT_EXPR, mode, value,
957 build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
960 /* Now clear the chosen bits in OP0,
961 except that if VALUE is -1 we need not bother. */
962 /* We keep the intermediates in registers to allow CSE to combine
963 consecutive bitfield assignments. */
965 temp = force_reg (mode, op0);
969 temp = expand_binop (mode, and_optab, temp,
970 mask_rtx (mode, bitpos, bitsize, 1),
971 NULL_RTX, 1, OPTAB_LIB_WIDEN);
972 temp = force_reg (mode, temp);
975 /* Now logical-or VALUE into OP0, unless it is zero. */
979 temp = expand_binop (mode, ior_optab, temp, value,
980 NULL_RTX, 1, OPTAB_LIB_WIDEN);
981 temp = force_reg (mode, temp);
986 op0 = copy_rtx (op0);
987 emit_move_insn (op0, temp);
991 /* Store a bit field that is split across multiple accessible memory objects.
993 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
994 BITSIZE is the field width; BITPOS the position of its first bit
996 VALUE is the value to store.
998 This does not yet handle fields wider than BITS_PER_WORD. */
1001 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1002 unsigned HOST_WIDE_INT bitpos, rtx value)
1005 unsigned int bitsdone = 0;
1007 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1009 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1010 unit = BITS_PER_WORD;
1012 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1014 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1015 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1016 that VALUE might be a floating-point constant. */
1017 if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
1019 rtx word = gen_lowpart_common (word_mode, value);
1021 if (word && (value != word))
1024 value = gen_lowpart_common (word_mode,
1025 force_reg (GET_MODE (value) != VOIDmode
1027 : word_mode, value));
1030 while (bitsdone < bitsize)
1032 unsigned HOST_WIDE_INT thissize;
1034 unsigned HOST_WIDE_INT thispos;
1035 unsigned HOST_WIDE_INT offset;
1037 offset = (bitpos + bitsdone) / unit;
1038 thispos = (bitpos + bitsdone) % unit;
1040 /* THISSIZE must not overrun a word boundary. Otherwise,
1041 store_fixed_bit_field will call us again, and we will mutually
1043 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1044 thissize = MIN (thissize, unit - thispos);
1046 if (BYTES_BIG_ENDIAN)
1050 /* We must do an endian conversion exactly the same way as it is
1051 done in extract_bit_field, so that the two calls to
1052 extract_fixed_bit_field will have comparable arguments. */
1053 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1054 total_bits = BITS_PER_WORD;
1056 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1058 /* Fetch successively less significant portions. */
1059 if (GET_CODE (value) == CONST_INT)
1060 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1061 >> (bitsize - bitsdone - thissize))
1062 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1064 /* The args are chosen so that the last part includes the
1065 lsb. Give extract_bit_field the value it needs (with
1066 endianness compensation) to fetch the piece we want. */
1067 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1068 total_bits - bitsize + bitsdone,
1073 /* Fetch successively more significant portions. */
1074 if (GET_CODE (value) == CONST_INT)
1075 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1077 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1079 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1080 bitsdone, NULL_RTX, 1);
1083 /* If OP0 is a register, then handle OFFSET here.
1085 When handling multiword bitfields, extract_bit_field may pass
1086 down a word_mode SUBREG of a larger REG for a bitfield that actually
1087 crosses a word boundary. Thus, for a SUBREG, we must find
1088 the current word starting from the base register. */
1089 if (GET_CODE (op0) == SUBREG)
1091 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1092 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1093 GET_MODE (SUBREG_REG (op0)));
1096 else if (REG_P (op0))
1098 word = operand_subword_force (op0, offset, GET_MODE (op0));
1104 /* OFFSET is in UNITs, and UNIT is in bits.
1105 store_fixed_bit_field wants offset in bytes. */
1106 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1108 bitsdone += thissize;
1112 /* A subroutine of extract_bit_field_1 that converts return value X
1113 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1114 to extract_bit_field. */
1117 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1118 enum machine_mode tmode, bool unsignedp)
1120 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1123 /* If the x mode is not a scalar integral, first convert to the
1124 integer mode of that size and then access it as a floating-point
1125 value via a SUBREG. */
1126 if (!SCALAR_INT_MODE_P (tmode))
1128 enum machine_mode smode;
1130 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1131 x = convert_to_mode (smode, x, unsignedp);
1132 x = force_reg (smode, x);
1133 return gen_lowpart (tmode, x);
1136 return convert_to_mode (tmode, x, unsignedp);
1139 /* A subroutine of extract_bit_field, with the same arguments.
1140 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1141 if we can find no other means of implementing the operation.
1142 if FALLBACK_P is false, return NULL instead. */
1145 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1146 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1147 enum machine_mode mode, enum machine_mode tmode,
1151 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1152 unsigned HOST_WIDE_INT offset, bitpos;
1154 enum machine_mode int_mode;
1155 enum machine_mode ext_mode;
1156 enum machine_mode mode1;
1157 enum insn_code icode;
1160 if (tmode == VOIDmode)
1163 while (GET_CODE (op0) == SUBREG)
1165 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1166 op0 = SUBREG_REG (op0);
1169 /* If we have an out-of-bounds access to a register, just return an
1170 uninitialized register of the required mode. This can occur if the
1171 source code contains an out-of-bounds access to a small array. */
1172 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1173 return gen_reg_rtx (tmode);
1176 && mode == GET_MODE (op0)
1178 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1180 /* We're trying to extract a full register from itself. */
1184 /* See if we can get a better vector mode before extracting. */
1185 if (VECTOR_MODE_P (GET_MODE (op0))
1187 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1189 enum machine_mode new_mode;
1190 int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1192 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1193 new_mode = MIN_MODE_VECTOR_FLOAT;
1194 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1195 new_mode = MIN_MODE_VECTOR_FRACT;
1196 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1197 new_mode = MIN_MODE_VECTOR_UFRACT;
1198 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1199 new_mode = MIN_MODE_VECTOR_ACCUM;
1200 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1201 new_mode = MIN_MODE_VECTOR_UACCUM;
1203 new_mode = MIN_MODE_VECTOR_INT;
1205 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1206 if (GET_MODE_NUNITS (new_mode) == nunits
1207 && GET_MODE_INNER (new_mode) == tmode
1208 && targetm.vector_mode_supported_p (new_mode))
1210 if (new_mode != VOIDmode)
1211 op0 = gen_lowpart (new_mode, op0);
1214 /* Use vec_extract patterns for extracting parts of vectors whenever
1216 if (VECTOR_MODE_P (GET_MODE (op0))
1218 && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code
1219 != CODE_FOR_nothing)
1220 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1221 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1223 enum machine_mode outermode = GET_MODE (op0);
1224 enum machine_mode innermode = GET_MODE_INNER (outermode);
1225 int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code;
1226 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1227 rtx rtxpos = GEN_INT (pos);
1229 rtx dest = NULL, pat, seq;
1230 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1231 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1232 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1234 if (innermode == tmode || innermode == mode)
1238 dest = gen_reg_rtx (innermode);
1242 if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1243 dest = copy_to_mode_reg (mode0, dest);
1245 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1246 src = copy_to_mode_reg (mode1, src);
1248 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1249 rtxpos = copy_to_mode_reg (mode1, rtxpos);
1251 /* We could handle this, but we should always be called with a pseudo
1252 for our targets and all insns should take them as outputs. */
1253 gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1254 && (*insn_data[icode].operand[1].predicate) (src, mode1)
1255 && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1257 pat = GEN_FCN (icode) (dest, src, rtxpos);
1265 return gen_lowpart (tmode, dest);
1270 /* Make sure we are playing with integral modes. Pun with subregs
1273 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1274 if (imode != GET_MODE (op0))
1277 op0 = adjust_address (op0, imode, 0);
1280 gcc_assert (imode != BLKmode);
1281 op0 = gen_lowpart (imode, op0);
1283 /* If we got a SUBREG, force it into a register since we
1284 aren't going to be able to do another SUBREG on it. */
1285 if (GET_CODE (op0) == SUBREG)
1286 op0 = force_reg (imode, op0);
1291 /* We may be accessing data outside the field, which means
1292 we can alias adjacent data. */
1295 op0 = shallow_copy_rtx (op0);
1296 set_mem_alias_set (op0, 0);
1297 set_mem_expr (op0, 0);
1300 /* Extraction of a full-word or multi-word value from a structure
1301 in a register or aligned memory can be done with just a SUBREG.
1302 A subword value in the least significant part of a register
1303 can also be extracted with a SUBREG. For this, we need the
1304 byte offset of the value in op0. */
1306 bitpos = bitnum % unit;
1307 offset = bitnum / unit;
1308 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1310 /* If OP0 is a register, BITPOS must count within a word.
1311 But as we have it, it counts within whatever size OP0 now has.
1312 On a bigendian machine, these are not the same, so convert. */
1313 if (BYTES_BIG_ENDIAN
1315 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1316 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1318 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1319 If that's wrong, the solution is to test for it and set TARGET to 0
1322 /* Only scalar integer modes can be converted via subregs. There is an
1323 additional problem for FP modes here in that they can have a precision
1324 which is different from the size. mode_for_size uses precision, but
1325 we want a mode based on the size, so we must avoid calling it for FP
1327 mode1 = (SCALAR_INT_MODE_P (tmode)
1328 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1331 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1332 && bitpos % BITS_PER_WORD == 0)
1333 || (mode1 != BLKmode
1334 /* ??? The big endian test here is wrong. This is correct
1335 if the value is in a register, and if mode_for_size is not
1336 the same mode as op0. This causes us to get unnecessarily
1337 inefficient code from the Thumb port when -mbig-endian. */
1338 && (BYTES_BIG_ENDIAN
1339 ? bitpos + bitsize == BITS_PER_WORD
1342 && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1343 GET_MODE_BITSIZE (GET_MODE (op0)))
1344 && GET_MODE_SIZE (mode1) != 0
1345 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1347 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1348 || (offset * BITS_PER_UNIT % bitsize == 0
1349 && MEM_ALIGN (op0) % bitsize == 0)))))
1352 op0 = adjust_address (op0, mode1, offset);
1353 else if (mode1 != GET_MODE (op0))
1355 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1358 goto no_subreg_mode_swap;
1362 return convert_to_mode (tmode, op0, unsignedp);
1365 no_subreg_mode_swap:
1367 /* Handle fields bigger than a word. */
1369 if (bitsize > BITS_PER_WORD)
1371 /* Here we transfer the words of the field
1372 in the order least significant first.
1373 This is because the most significant word is the one which may
1374 be less than full. */
1376 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1379 if (target == 0 || !REG_P (target))
1380 target = gen_reg_rtx (mode);
1382 /* Indicate for flow that the entire target reg is being set. */
1383 emit_clobber (target);
1385 for (i = 0; i < nwords; i++)
1387 /* If I is 0, use the low-order word in both field and target;
1388 if I is 1, use the next to lowest word; and so on. */
1389 /* Word number in TARGET to use. */
1390 unsigned int wordnum
1392 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1394 /* Offset from start of field in OP0. */
1395 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1396 ? MAX (0, ((int) bitsize - ((int) i + 1)
1397 * (int) BITS_PER_WORD))
1398 : (int) i * BITS_PER_WORD);
1399 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1401 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1402 bitsize - i * BITS_PER_WORD),
1403 bitnum + bit_offset, 1, target_part, mode,
1406 gcc_assert (target_part);
1408 if (result_part != target_part)
1409 emit_move_insn (target_part, result_part);
1414 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1415 need to be zero'd out. */
1416 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1418 unsigned int i, total_words;
1420 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1421 for (i = nwords; i < total_words; i++)
1423 (operand_subword (target,
1424 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1431 /* Signed bit field: sign-extend with two arithmetic shifts. */
1432 target = expand_shift (LSHIFT_EXPR, mode, target,
1433 build_int_cst (NULL_TREE,
1434 GET_MODE_BITSIZE (mode) - bitsize),
1436 return expand_shift (RSHIFT_EXPR, mode, target,
1437 build_int_cst (NULL_TREE,
1438 GET_MODE_BITSIZE (mode) - bitsize),
1442 /* From here on we know the desired field is smaller than a word. */
1444 /* Check if there is a correspondingly-sized integer field, so we can
1445 safely extract it as one size of integer, if necessary; then
1446 truncate or extend to the size that is wanted; then use SUBREGs or
1447 convert_to_mode to get one of the modes we really wanted. */
1449 int_mode = int_mode_for_mode (tmode);
1450 if (int_mode == BLKmode)
1451 int_mode = int_mode_for_mode (mode);
1452 /* Should probably push op0 out to memory and then do a load. */
1453 gcc_assert (int_mode != BLKmode);
1455 /* OFFSET is the number of words or bytes (UNIT says which)
1456 from STR_RTX to the first word or byte containing part of the field. */
1460 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1463 op0 = copy_to_reg (op0);
1464 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1465 op0, (offset * UNITS_PER_WORD));
1470 /* Now OFFSET is nonzero only for memory operands. */
1471 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1472 icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1473 if (ext_mode != MAX_MACHINE_MODE
1475 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1476 /* If op0 is a register, we need it in EXT_MODE to make it
1477 acceptable to the format of ext(z)v. */
1478 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1479 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1480 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1481 && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1483 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1484 rtx bitsize_rtx, bitpos_rtx;
1485 rtx last = get_last_insn ();
1487 rtx xtarget = target;
1488 rtx xspec_target = target;
1489 rtx xspec_target_subreg = 0;
1492 /* If op0 is a register, we need it in EXT_MODE to make it
1493 acceptable to the format of ext(z)v. */
1494 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1495 xop0 = gen_rtx_SUBREG (ext_mode, xop0, 0);
1497 /* Get ref to first byte containing part of the field. */
1498 xop0 = adjust_address (xop0, byte_mode, xoffset);
1500 /* On big-endian machines, we count bits from the most significant.
1501 If the bit field insn does not, we must invert. */
1502 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1503 xbitpos = unit - bitsize - xbitpos;
1505 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1506 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1507 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1509 unit = GET_MODE_BITSIZE (ext_mode);
1512 xtarget = xspec_target = gen_reg_rtx (tmode);
1514 if (GET_MODE (xtarget) != ext_mode)
1516 if (REG_P (xtarget))
1518 xtarget = gen_lowpart (ext_mode, xtarget);
1519 if (GET_MODE_SIZE (ext_mode)
1520 > GET_MODE_SIZE (GET_MODE (xspec_target)))
1521 xspec_target_subreg = xtarget;
1524 xtarget = gen_reg_rtx (ext_mode);
1527 /* If this machine's ext(z)v insists on a register target,
1528 make sure we have one. */
1529 if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1530 xtarget = gen_reg_rtx (ext_mode);
1532 bitsize_rtx = GEN_INT (bitsize);
1533 bitpos_rtx = GEN_INT (xbitpos);
1536 ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1537 : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1541 if (xtarget == xspec_target)
1543 if (xtarget == xspec_target_subreg)
1544 return xspec_target;
1545 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1547 delete_insns_since (last);
1550 /* If OP0 is a memory, try copying it to a register and seeing if a
1551 cheap register alternative is available. */
1552 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1554 enum machine_mode bestmode;
1556 /* Get the mode to use for inserting into this field. If
1557 OP0 is BLKmode, get the smallest mode consistent with the
1558 alignment. If OP0 is a non-BLKmode object that is no
1559 wider than EXT_MODE, use its mode. Otherwise, use the
1560 smallest mode containing the field. */
1562 if (GET_MODE (op0) == BLKmode
1563 || (ext_mode != MAX_MACHINE_MODE
1564 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1565 bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1566 (ext_mode == MAX_MACHINE_MODE
1567 ? VOIDmode : ext_mode),
1568 MEM_VOLATILE_P (op0));
1570 bestmode = GET_MODE (op0);
1572 if (bestmode != VOIDmode
1573 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1574 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1576 unsigned HOST_WIDE_INT xoffset, xbitpos;
1578 /* Compute the offset as a multiple of this unit,
1579 counting in bytes. */
1580 unit = GET_MODE_BITSIZE (bestmode);
1581 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1582 xbitpos = bitnum % unit;
1584 /* Make sure the register is big enough for the whole field. */
1585 if (xoffset * BITS_PER_UNIT + unit
1586 >= offset * BITS_PER_UNIT + bitsize)
1588 rtx last, result, xop0;
1590 last = get_last_insn ();
1592 /* Fetch it to a register in that size. */
1593 xop0 = adjust_address (op0, bestmode, xoffset);
1594 xop0 = force_reg (bestmode, xop0);
1595 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1597 mode, tmode, false);
1601 delete_insns_since (last);
1609 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1610 bitpos, target, unsignedp);
1611 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1614 /* Generate code to extract a byte-field from STR_RTX
1615 containing BITSIZE bits, starting at BITNUM,
1616 and put it in TARGET if possible (if TARGET is nonzero).
1617 Regardless of TARGET, we return the rtx for where the value is placed.
1619 STR_RTX is the structure containing the byte (a REG or MEM).
1620 UNSIGNEDP is nonzero if this is an unsigned bit field.
1621 MODE is the natural mode of the field value once extracted.
1622 TMODE is the mode the caller would like the value to have;
1623 but the value may be returned with type MODE instead.
1625 If a TARGET is specified and we can store in it at no extra cost,
1626 we do so, and return TARGET.
1627 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1628 if they are equally easy. */
1631 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1632 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1633 enum machine_mode mode, enum machine_mode tmode)
1635 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1636 target, mode, tmode, true);
1639 /* Extract a bit field using shifts and boolean operations
1640 Returns an rtx to represent the value.
1641 OP0 addresses a register (word) or memory (byte).
1642 BITPOS says which bit within the word or byte the bit field starts in.
1643 OFFSET says how many bytes farther the bit field starts;
1644 it is 0 if OP0 is a register.
1645 BITSIZE says how many bits long the bit field is.
1646 (If OP0 is a register, it may be narrower than a full word,
1647 but BITPOS still counts within a full word,
1648 which is significant on bigendian machines.)
1650 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1651 If TARGET is nonzero, attempts to store the value there
1652 and return TARGET, but this is not guaranteed.
1653 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1656 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1657 unsigned HOST_WIDE_INT offset,
1658 unsigned HOST_WIDE_INT bitsize,
1659 unsigned HOST_WIDE_INT bitpos, rtx target,
1662 unsigned int total_bits = BITS_PER_WORD;
1663 enum machine_mode mode;
1665 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1667 /* Special treatment for a bit field split across two registers. */
1668 if (bitsize + bitpos > BITS_PER_WORD)
1669 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1673 /* Get the proper mode to use for this field. We want a mode that
1674 includes the entire field. If such a mode would be larger than
1675 a word, we won't be doing the extraction the normal way. */
1677 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1678 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1680 if (mode == VOIDmode)
1681 /* The only way this should occur is if the field spans word
1683 return extract_split_bit_field (op0, bitsize,
1684 bitpos + offset * BITS_PER_UNIT,
1687 total_bits = GET_MODE_BITSIZE (mode);
1689 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1690 be in the range 0 to total_bits-1, and put any excess bytes in
1692 if (bitpos >= total_bits)
1694 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1695 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1699 /* Get ref to an aligned byte, halfword, or word containing the field.
1700 Adjust BITPOS to be position within a word,
1701 and OFFSET to be the offset of that word.
1702 Then alter OP0 to refer to that word. */
1703 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1704 offset -= (offset % (total_bits / BITS_PER_UNIT));
1705 op0 = adjust_address (op0, mode, offset);
1708 mode = GET_MODE (op0);
1710 if (BYTES_BIG_ENDIAN)
1711 /* BITPOS is the distance between our msb and that of OP0.
1712 Convert it to the distance from the lsb. */
1713 bitpos = total_bits - bitsize - bitpos;
1715 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1716 We have reduced the big-endian case to the little-endian case. */
1722 /* If the field does not already start at the lsb,
1723 shift it so it does. */
1724 tree amount = build_int_cst (NULL_TREE, bitpos);
1725 /* Maybe propagate the target for the shift. */
1726 /* But not if we will return it--could confuse integrate.c. */
1727 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1728 if (tmode != mode) subtarget = 0;
1729 op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1731 /* Convert the value to the desired mode. */
1733 op0 = convert_to_mode (tmode, op0, 1);
1735 /* Unless the msb of the field used to be the msb when we shifted,
1736 mask out the upper bits. */
1738 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1739 return expand_binop (GET_MODE (op0), and_optab, op0,
1740 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1741 target, 1, OPTAB_LIB_WIDEN);
1745 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1746 then arithmetic-shift its lsb to the lsb of the word. */
1747 op0 = force_reg (mode, op0);
1751 /* Find the narrowest integer mode that contains the field. */
1753 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1754 mode = GET_MODE_WIDER_MODE (mode))
1755 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1757 op0 = convert_to_mode (mode, op0, 0);
1761 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1764 = build_int_cst (NULL_TREE,
1765 GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1766 /* Maybe propagate the target for the shift. */
1767 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1768 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1771 return expand_shift (RSHIFT_EXPR, mode, op0,
1772 build_int_cst (NULL_TREE,
1773 GET_MODE_BITSIZE (mode) - bitsize),
1777 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1778 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1779 complement of that if COMPLEMENT. The mask is truncated if
1780 necessary to the width of mode MODE. The mask is zero-extended if
1781 BITSIZE+BITPOS is too small for MODE. */
1784 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1786 HOST_WIDE_INT masklow, maskhigh;
1790 else if (bitpos < HOST_BITS_PER_WIDE_INT)
1791 masklow = (HOST_WIDE_INT) -1 << bitpos;
1795 if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1796 masklow &= ((unsigned HOST_WIDE_INT) -1
1797 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1799 if (bitpos <= HOST_BITS_PER_WIDE_INT)
1802 maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1806 else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1807 maskhigh &= ((unsigned HOST_WIDE_INT) -1
1808 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1814 maskhigh = ~maskhigh;
1818 return immed_double_const (masklow, maskhigh, mode);
1821 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1822 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1825 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1827 unsigned HOST_WIDE_INT v = INTVAL (value);
1828 HOST_WIDE_INT low, high;
1830 if (bitsize < HOST_BITS_PER_WIDE_INT)
1831 v &= ~((HOST_WIDE_INT) -1 << bitsize);
1833 if (bitpos < HOST_BITS_PER_WIDE_INT)
1836 high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1841 high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1844 return immed_double_const (low, high, mode);
1847 /* Extract a bit field that is split across two words
1848 and return an RTX for the result.
1850 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1851 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1852 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1855 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1856 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1859 unsigned int bitsdone = 0;
1860 rtx result = NULL_RTX;
1863 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1865 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1866 unit = BITS_PER_WORD;
1868 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1870 while (bitsdone < bitsize)
1872 unsigned HOST_WIDE_INT thissize;
1874 unsigned HOST_WIDE_INT thispos;
1875 unsigned HOST_WIDE_INT offset;
1877 offset = (bitpos + bitsdone) / unit;
1878 thispos = (bitpos + bitsdone) % unit;
1880 /* THISSIZE must not overrun a word boundary. Otherwise,
1881 extract_fixed_bit_field will call us again, and we will mutually
1883 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1884 thissize = MIN (thissize, unit - thispos);
1886 /* If OP0 is a register, then handle OFFSET here.
1888 When handling multiword bitfields, extract_bit_field may pass
1889 down a word_mode SUBREG of a larger REG for a bitfield that actually
1890 crosses a word boundary. Thus, for a SUBREG, we must find
1891 the current word starting from the base register. */
1892 if (GET_CODE (op0) == SUBREG)
1894 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1895 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1896 GET_MODE (SUBREG_REG (op0)));
1899 else if (REG_P (op0))
1901 word = operand_subword_force (op0, offset, GET_MODE (op0));
1907 /* Extract the parts in bit-counting order,
1908 whose meaning is determined by BYTES_PER_UNIT.
1909 OFFSET is in UNITs, and UNIT is in bits.
1910 extract_fixed_bit_field wants offset in bytes. */
1911 part = extract_fixed_bit_field (word_mode, word,
1912 offset * unit / BITS_PER_UNIT,
1913 thissize, thispos, 0, 1);
1914 bitsdone += thissize;
1916 /* Shift this part into place for the result. */
1917 if (BYTES_BIG_ENDIAN)
1919 if (bitsize != bitsdone)
1920 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1921 build_int_cst (NULL_TREE, bitsize - bitsdone),
1926 if (bitsdone != thissize)
1927 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1928 build_int_cst (NULL_TREE,
1929 bitsdone - thissize), 0, 1);
1935 /* Combine the parts with bitwise or. This works
1936 because we extracted each part as an unsigned bit field. */
1937 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1943 /* Unsigned bit field: we are done. */
1946 /* Signed bit field: sign-extend with two arithmetic shifts. */
1947 result = expand_shift (LSHIFT_EXPR, word_mode, result,
1948 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1950 return expand_shift (RSHIFT_EXPR, word_mode, result,
1951 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1955 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1956 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
1957 MODE, fill the upper bits with zeros. Fail if the layout of either
1958 mode is unknown (as for CC modes) or if the extraction would involve
1959 unprofitable mode punning. Return the value on success, otherwise
1962 This is different from gen_lowpart* in these respects:
1964 - the returned value must always be considered an rvalue
1966 - when MODE is wider than SRC_MODE, the extraction involves
1969 - when MODE is smaller than SRC_MODE, the extraction involves
1970 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1972 In other words, this routine performs a computation, whereas the
1973 gen_lowpart* routines are conceptually lvalue or rvalue subreg
1977 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1979 enum machine_mode int_mode, src_int_mode;
1981 if (mode == src_mode)
1984 if (CONSTANT_P (src))
1985 return simplify_gen_subreg (mode, src, src_mode,
1986 subreg_lowpart_offset (mode, src_mode));
1988 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
1991 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
1992 && MODES_TIEABLE_P (mode, src_mode))
1994 rtx x = gen_lowpart_common (mode, src);
1999 src_int_mode = int_mode_for_mode (src_mode);
2000 int_mode = int_mode_for_mode (mode);
2001 if (src_int_mode == BLKmode || int_mode == BLKmode)
2004 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2006 if (!MODES_TIEABLE_P (int_mode, mode))
2009 src = gen_lowpart (src_int_mode, src);
2010 src = convert_modes (int_mode, src_int_mode, src, true);
2011 src = gen_lowpart (mode, src);
2015 /* Add INC into TARGET. */
2018 expand_inc (rtx target, rtx inc)
2020 rtx value = expand_binop (GET_MODE (target), add_optab,
2022 target, 0, OPTAB_LIB_WIDEN);
2023 if (value != target)
2024 emit_move_insn (target, value);
2027 /* Subtract DEC from TARGET. */
2030 expand_dec (rtx target, rtx dec)
2032 rtx value = expand_binop (GET_MODE (target), sub_optab,
2034 target, 0, OPTAB_LIB_WIDEN);
2035 if (value != target)
2036 emit_move_insn (target, value);
2039 /* Output a shift instruction for expression code CODE,
2040 with SHIFTED being the rtx for the value to shift,
2041 and AMOUNT the tree for the amount to shift by.
2042 Store the result in the rtx TARGET, if that is convenient.
2043 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2044 Return the rtx for where the value is. */
2047 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2048 tree amount, rtx target, int unsignedp)
2051 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2052 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2053 optab lshift_optab = ashl_optab;
2054 optab rshift_arith_optab = ashr_optab;
2055 optab rshift_uns_optab = lshr_optab;
2056 optab lrotate_optab = rotl_optab;
2057 optab rrotate_optab = rotr_optab;
2058 enum machine_mode op1_mode;
2061 op1 = expand_normal (amount);
2062 op1_mode = GET_MODE (op1);
2064 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2065 shift amount is a vector, use the vector/vector shift patterns. */
2066 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2068 lshift_optab = vashl_optab;
2069 rshift_arith_optab = vashr_optab;
2070 rshift_uns_optab = vlshr_optab;
2071 lrotate_optab = vrotl_optab;
2072 rrotate_optab = vrotr_optab;
2075 /* Previously detected shift-counts computed by NEGATE_EXPR
2076 and shifted in the other direction; but that does not work
2079 if (SHIFT_COUNT_TRUNCATED)
2081 if (GET_CODE (op1) == CONST_INT
2082 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2083 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2084 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2085 % GET_MODE_BITSIZE (mode));
2086 else if (GET_CODE (op1) == SUBREG
2087 && subreg_lowpart_p (op1))
2088 op1 = SUBREG_REG (op1);
2091 if (op1 == const0_rtx)
2094 /* Check whether its cheaper to implement a left shift by a constant
2095 bit count by a sequence of additions. */
2096 if (code == LSHIFT_EXPR
2097 && GET_CODE (op1) == CONST_INT
2099 && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2100 && INTVAL (op1) < MAX_BITS_PER_WORD
2101 && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode]
2102 && shift_cost[mode][INTVAL (op1)] != MAX_COST)
2105 for (i = 0; i < INTVAL (op1); i++)
2107 temp = force_reg (mode, shifted);
2108 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2109 unsignedp, OPTAB_LIB_WIDEN);
2114 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2116 enum optab_methods methods;
2119 methods = OPTAB_DIRECT;
2120 else if (attempt == 1)
2121 methods = OPTAB_WIDEN;
2123 methods = OPTAB_LIB_WIDEN;
2127 /* Widening does not work for rotation. */
2128 if (methods == OPTAB_WIDEN)
2130 else if (methods == OPTAB_LIB_WIDEN)
2132 /* If we have been unable to open-code this by a rotation,
2133 do it as the IOR of two shifts. I.e., to rotate A
2134 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2135 where C is the bitsize of A.
2137 It is theoretically possible that the target machine might
2138 not be able to perform either shift and hence we would
2139 be making two libcalls rather than just the one for the
2140 shift (similarly if IOR could not be done). We will allow
2141 this extremely unlikely lossage to avoid complicating the
2144 rtx subtarget = target == shifted ? 0 : target;
2145 tree new_amount, other_amount;
2147 tree type = TREE_TYPE (amount);
2148 if (GET_MODE (op1) != TYPE_MODE (type)
2149 && GET_MODE (op1) != VOIDmode)
2150 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2151 new_amount = make_tree (type, op1);
2153 = fold_build2 (MINUS_EXPR, type,
2154 build_int_cst (type, GET_MODE_BITSIZE (mode)),
2157 shifted = force_reg (mode, shifted);
2159 temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2160 mode, shifted, new_amount, 0, 1);
2161 temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2162 mode, shifted, other_amount, subtarget, 1);
2163 return expand_binop (mode, ior_optab, temp, temp1, target,
2164 unsignedp, methods);
2167 temp = expand_binop (mode,
2168 left ? lrotate_optab : rrotate_optab,
2169 shifted, op1, target, unsignedp, methods);
2172 temp = expand_binop (mode,
2173 left ? lshift_optab : rshift_uns_optab,
2174 shifted, op1, target, unsignedp, methods);
2176 /* Do arithmetic shifts.
2177 Also, if we are going to widen the operand, we can just as well
2178 use an arithmetic right-shift instead of a logical one. */
2179 if (temp == 0 && ! rotate
2180 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2182 enum optab_methods methods1 = methods;
2184 /* If trying to widen a log shift to an arithmetic shift,
2185 don't accept an arithmetic shift of the same size. */
2187 methods1 = OPTAB_MUST_WIDEN;
2189 /* Arithmetic shift */
2191 temp = expand_binop (mode,
2192 left ? lshift_optab : rshift_arith_optab,
2193 shifted, op1, target, unsignedp, methods1);
2196 /* We used to try extzv here for logical right shifts, but that was
2197 only useful for one machine, the VAX, and caused poor code
2198 generation there for lshrdi3, so the code was deleted and a
2199 define_expand for lshrsi3 was added to vax.md. */
2219 /* This structure holds the "cost" of a multiply sequence. The
2220 "cost" field holds the total rtx_cost of every operator in the
2221 synthetic multiplication sequence, hence cost(a op b) is defined
2222 as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2223 The "latency" field holds the minimum possible latency of the
2224 synthetic multiply, on a hypothetical infinitely parallel CPU.
2225 This is the critical path, or the maximum height, of the expression
2226 tree which is the sum of rtx_costs on the most expensive path from
2227 any leaf to the root. Hence latency(a op b) is defined as zero for
2228 leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise. */
2231 short cost; /* Total rtx_cost of the multiplication sequence. */
2232 short latency; /* The latency of the multiplication sequence. */
2235 /* This macro is used to compare a pointer to a mult_cost against an
2236 single integer "rtx_cost" value. This is equivalent to the macro
2237 CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}. */
2238 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y) \
2239 || ((X)->cost == (Y) && (X)->latency < (Y)))
2241 /* This macro is used to compare two pointers to mult_costs against
2242 each other. The macro returns true if X is cheaper than Y.
2243 Currently, the cheaper of two mult_costs is the one with the
2244 lower "cost". If "cost"s are tied, the lower latency is cheaper. */
2245 #define CHEAPER_MULT_COST(X,Y) ((X)->cost < (Y)->cost \
2246 || ((X)->cost == (Y)->cost \
2247 && (X)->latency < (Y)->latency))
2249 /* This structure records a sequence of operations.
2250 `ops' is the number of operations recorded.
2251 `cost' is their total cost.
2252 The operations are stored in `op' and the corresponding
2253 logarithms of the integer coefficients in `log'.
2255 These are the operations:
2256 alg_zero total := 0;
2257 alg_m total := multiplicand;
2258 alg_shift total := total * coeff
2259 alg_add_t_m2 total := total + multiplicand * coeff;
2260 alg_sub_t_m2 total := total - multiplicand * coeff;
2261 alg_add_factor total := total * coeff + total;
2262 alg_sub_factor total := total * coeff - total;
2263 alg_add_t2_m total := total * coeff + multiplicand;
2264 alg_sub_t2_m total := total * coeff - multiplicand;
2266 The first operand must be either alg_zero or alg_m. */
2270 struct mult_cost cost;
2272 /* The size of the OP and LOG fields are not directly related to the
2273 word size, but the worst-case algorithms will be if we have few
2274 consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2275 In that case we will generate shift-by-2, add, shift-by-2, add,...,
2276 in total wordsize operations. */
2277 enum alg_code op[MAX_BITS_PER_WORD];
2278 char log[MAX_BITS_PER_WORD];
2281 /* The entry for our multiplication cache/hash table. */
2282 struct alg_hash_entry {
2283 /* The number we are multiplying by. */
2284 unsigned HOST_WIDE_INT t;
2286 /* The mode in which we are multiplying something by T. */
2287 enum machine_mode mode;
2289 /* The best multiplication algorithm for t. */
2292 /* The cost of multiplication if ALG_CODE is not alg_impossible.
2293 Otherwise, the cost within which multiplication by T is
2295 struct mult_cost cost;
2298 /* The number of cache/hash entries. */
2299 #if HOST_BITS_PER_WIDE_INT == 64
2300 #define NUM_ALG_HASH_ENTRIES 1031
2302 #define NUM_ALG_HASH_ENTRIES 307
2305 /* Each entry of ALG_HASH caches alg_code for some integer. This is
2306 actually a hash table. If we have a collision, that the older
2307 entry is kicked out. */
2308 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2310 /* Indicates the type of fixup needed after a constant multiplication.
2311 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2312 the result should be negated, and ADD_VARIANT means that the
2313 multiplicand should be added to the result. */
2314 enum mult_variant {basic_variant, negate_variant, add_variant};
2316 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2317 const struct mult_cost *, enum machine_mode mode);
2318 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2319 struct algorithm *, enum mult_variant *, int);
2320 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2321 const struct algorithm *, enum mult_variant);
2322 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2323 int, rtx *, int *, int *);
2324 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2325 static rtx extract_high_half (enum machine_mode, rtx);
2326 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2327 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2329 /* Compute and return the best algorithm for multiplying by T.
2330 The algorithm must cost less than cost_limit
2331 If retval.cost >= COST_LIMIT, no algorithm was found and all
2332 other field of the returned struct are undefined.
2333 MODE is the machine mode of the multiplication. */
2336 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2337 const struct mult_cost *cost_limit, enum machine_mode mode)
2340 struct algorithm *alg_in, *best_alg;
2341 struct mult_cost best_cost;
2342 struct mult_cost new_limit;
2343 int op_cost, op_latency;
2344 unsigned HOST_WIDE_INT q;
2345 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2347 bool cache_hit = false;
2348 enum alg_code cache_alg = alg_zero;
2350 /* Indicate that no algorithm is yet found. If no algorithm
2351 is found, this value will be returned and indicate failure. */
2352 alg_out->cost.cost = cost_limit->cost + 1;
2353 alg_out->cost.latency = cost_limit->latency + 1;
2355 if (cost_limit->cost < 0
2356 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2359 /* Restrict the bits of "t" to the multiplication's mode. */
2360 t &= GET_MODE_MASK (mode);
2362 /* t == 1 can be done in zero cost. */
2366 alg_out->cost.cost = 0;
2367 alg_out->cost.latency = 0;
2368 alg_out->op[0] = alg_m;
2372 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2376 if (MULT_COST_LESS (cost_limit, zero_cost))
2381 alg_out->cost.cost = zero_cost;
2382 alg_out->cost.latency = zero_cost;
2383 alg_out->op[0] = alg_zero;
2388 /* We'll be needing a couple extra algorithm structures now. */
2390 alg_in = XALLOCA (struct algorithm);
2391 best_alg = XALLOCA (struct algorithm);
2392 best_cost = *cost_limit;
2394 /* Compute the hash index. */
2395 hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2397 /* See if we already know what to do for T. */
2398 if (alg_hash[hash_index].t == t
2399 && alg_hash[hash_index].mode == mode
2400 && alg_hash[hash_index].alg != alg_unknown)
2402 cache_alg = alg_hash[hash_index].alg;
2404 if (cache_alg == alg_impossible)
2406 /* The cache tells us that it's impossible to synthesize
2407 multiplication by T within alg_hash[hash_index].cost. */
2408 if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2409 /* COST_LIMIT is at least as restrictive as the one
2410 recorded in the hash table, in which case we have no
2411 hope of synthesizing a multiplication. Just
2415 /* If we get here, COST_LIMIT is less restrictive than the
2416 one recorded in the hash table, so we may be able to
2417 synthesize a multiplication. Proceed as if we didn't
2418 have the cache entry. */
2422 if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2423 /* The cached algorithm shows that this multiplication
2424 requires more cost than COST_LIMIT. Just return. This
2425 way, we don't clobber this cache entry with
2426 alg_impossible but retain useful information. */
2438 goto do_alg_addsub_t_m2;
2440 case alg_add_factor:
2441 case alg_sub_factor:
2442 goto do_alg_addsub_factor;
2445 goto do_alg_add_t2_m;
2448 goto do_alg_sub_t2_m;
2456 /* If we have a group of zero bits at the low-order part of T, try
2457 multiplying by the remaining bits and then doing a shift. */
2462 m = floor_log2 (t & -t); /* m = number of low zero bits */
2466 /* The function expand_shift will choose between a shift and
2467 a sequence of additions, so the observed cost is given as
2468 MIN (m * add_cost[mode], shift_cost[mode][m]). */
2469 op_cost = m * add_cost[mode];
2470 if (shift_cost[mode][m] < op_cost)
2471 op_cost = shift_cost[mode][m];
2472 new_limit.cost = best_cost.cost - op_cost;
2473 new_limit.latency = best_cost.latency - op_cost;
2474 synth_mult (alg_in, q, &new_limit, mode);
2476 alg_in->cost.cost += op_cost;
2477 alg_in->cost.latency += op_cost;
2478 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2480 struct algorithm *x;
2481 best_cost = alg_in->cost;
2482 x = alg_in, alg_in = best_alg, best_alg = x;
2483 best_alg->log[best_alg->ops] = m;
2484 best_alg->op[best_alg->ops] = alg_shift;
2491 /* If we have an odd number, add or subtract one. */
2494 unsigned HOST_WIDE_INT w;
2497 for (w = 1; (w & t) != 0; w <<= 1)
2499 /* If T was -1, then W will be zero after the loop. This is another
2500 case where T ends with ...111. Handling this with (T + 1) and
2501 subtract 1 produces slightly better code and results in algorithm
2502 selection much faster than treating it like the ...0111 case
2506 /* Reject the case where t is 3.
2507 Thus we prefer addition in that case. */
2510 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2512 op_cost = add_cost[mode];
2513 new_limit.cost = best_cost.cost - op_cost;
2514 new_limit.latency = best_cost.latency - op_cost;
2515 synth_mult (alg_in, t + 1, &new_limit, mode);
2517 alg_in->cost.cost += op_cost;
2518 alg_in->cost.latency += op_cost;
2519 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2521 struct algorithm *x;
2522 best_cost = alg_in->cost;
2523 x = alg_in, alg_in = best_alg, best_alg = x;
2524 best_alg->log[best_alg->ops] = 0;
2525 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2530 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2532 op_cost = add_cost[mode];
2533 new_limit.cost = best_cost.cost - op_cost;
2534 new_limit.latency = best_cost.latency - op_cost;
2535 synth_mult (alg_in, t - 1, &new_limit, mode);
2537 alg_in->cost.cost += op_cost;
2538 alg_in->cost.latency += op_cost;
2539 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2541 struct algorithm *x;
2542 best_cost = alg_in->cost;
2543 x = alg_in, alg_in = best_alg, best_alg = x;
2544 best_alg->log[best_alg->ops] = 0;
2545 best_alg->op[best_alg->ops] = alg_add_t_m2;
2552 /* Look for factors of t of the form
2553 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2554 If we find such a factor, we can multiply by t using an algorithm that
2555 multiplies by q, shift the result by m and add/subtract it to itself.
2557 We search for large factors first and loop down, even if large factors
2558 are less probable than small; if we find a large factor we will find a
2559 good sequence quickly, and therefore be able to prune (by decreasing
2560 COST_LIMIT) the search. */
2562 do_alg_addsub_factor:
2563 for (m = floor_log2 (t - 1); m >= 2; m--)
2565 unsigned HOST_WIDE_INT d;
2567 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2568 if (t % d == 0 && t > d && m < maxm
2569 && (!cache_hit || cache_alg == alg_add_factor))
2571 /* If the target has a cheap shift-and-add instruction use
2572 that in preference to a shift insn followed by an add insn.
2573 Assume that the shift-and-add is "atomic" with a latency
2574 equal to its cost, otherwise assume that on superscalar
2575 hardware the shift may be executed concurrently with the
2576 earlier steps in the algorithm. */
2577 op_cost = add_cost[mode] + shift_cost[mode][m];
2578 if (shiftadd_cost[mode][m] < op_cost)
2580 op_cost = shiftadd_cost[mode][m];
2581 op_latency = op_cost;
2584 op_latency = add_cost[mode];
2586 new_limit.cost = best_cost.cost - op_cost;
2587 new_limit.latency = best_cost.latency - op_latency;
2588 synth_mult (alg_in, t / d, &new_limit, mode);
2590 alg_in->cost.cost += op_cost;
2591 alg_in->cost.latency += op_latency;
2592 if (alg_in->cost.latency < op_cost)
2593 alg_in->cost.latency = op_cost;
2594 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2596 struct algorithm *x;
2597 best_cost = alg_in->cost;
2598 x = alg_in, alg_in = best_alg, best_alg = x;
2599 best_alg->log[best_alg->ops] = m;
2600 best_alg->op[best_alg->ops] = alg_add_factor;
2602 /* Other factors will have been taken care of in the recursion. */
2606 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2607 if (t % d == 0 && t > d && m < maxm
2608 && (!cache_hit || cache_alg == alg_sub_factor))
2610 /* If the target has a cheap shift-and-subtract insn use
2611 that in preference to a shift insn followed by a sub insn.
2612 Assume that the shift-and-sub is "atomic" with a latency
2613 equal to it's cost, otherwise assume that on superscalar
2614 hardware the shift may be executed concurrently with the
2615 earlier steps in the algorithm. */
2616 op_cost = add_cost[mode] + shift_cost[mode][m];
2617 if (shiftsub_cost[mode][m] < op_cost)
2619 op_cost = shiftsub_cost[mode][m];
2620 op_latency = op_cost;
2623 op_latency = add_cost[mode];
2625 new_limit.cost = best_cost.cost - op_cost;
2626 new_limit.latency = best_cost.latency - op_latency;
2627 synth_mult (alg_in, t / d, &new_limit, mode);
2629 alg_in->cost.cost += op_cost;
2630 alg_in->cost.latency += op_latency;
2631 if (alg_in->cost.latency < op_cost)
2632 alg_in->cost.latency = op_cost;
2633 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2635 struct algorithm *x;
2636 best_cost = alg_in->cost;
2637 x = alg_in, alg_in = best_alg, best_alg = x;
2638 best_alg->log[best_alg->ops] = m;
2639 best_alg->op[best_alg->ops] = alg_sub_factor;
2647 /* Try shift-and-add (load effective address) instructions,
2648 i.e. do a*3, a*5, a*9. */
2655 if (m >= 0 && m < maxm)
2657 op_cost = shiftadd_cost[mode][m];
2658 new_limit.cost = best_cost.cost - op_cost;
2659 new_limit.latency = best_cost.latency - op_cost;
2660 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2662 alg_in->cost.cost += op_cost;
2663 alg_in->cost.latency += op_cost;
2664 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2666 struct algorithm *x;
2667 best_cost = alg_in->cost;
2668 x = alg_in, alg_in = best_alg, best_alg = x;
2669 best_alg->log[best_alg->ops] = m;
2670 best_alg->op[best_alg->ops] = alg_add_t2_m;
2680 if (m >= 0 && m < maxm)
2682 op_cost = shiftsub_cost[mode][m];
2683 new_limit.cost = best_cost.cost - op_cost;
2684 new_limit.latency = best_cost.latency - op_cost;
2685 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2687 alg_in->cost.cost += op_cost;
2688 alg_in->cost.latency += op_cost;
2689 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2691 struct algorithm *x;
2692 best_cost = alg_in->cost;
2693 x = alg_in, alg_in = best_alg, best_alg = x;
2694 best_alg->log[best_alg->ops] = m;
2695 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2703 /* If best_cost has not decreased, we have not found any algorithm. */
2704 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2706 /* We failed to find an algorithm. Record alg_impossible for
2707 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2708 we are asked to find an algorithm for T within the same or
2709 lower COST_LIMIT, we can immediately return to the
2711 alg_hash[hash_index].t = t;
2712 alg_hash[hash_index].mode = mode;
2713 alg_hash[hash_index].alg = alg_impossible;
2714 alg_hash[hash_index].cost = *cost_limit;
2718 /* Cache the result. */
2721 alg_hash[hash_index].t = t;
2722 alg_hash[hash_index].mode = mode;
2723 alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2724 alg_hash[hash_index].cost.cost = best_cost.cost;
2725 alg_hash[hash_index].cost.latency = best_cost.latency;
2728 /* If we are getting a too long sequence for `struct algorithm'
2729 to record, make this search fail. */
2730 if (best_alg->ops == MAX_BITS_PER_WORD)
2733 /* Copy the algorithm from temporary space to the space at alg_out.
2734 We avoid using structure assignment because the majority of
2735 best_alg is normally undefined, and this is a critical function. */
2736 alg_out->ops = best_alg->ops + 1;
2737 alg_out->cost = best_cost;
2738 memcpy (alg_out->op, best_alg->op,
2739 alg_out->ops * sizeof *alg_out->op);
2740 memcpy (alg_out->log, best_alg->log,
2741 alg_out->ops * sizeof *alg_out->log);
2744 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2745 Try three variations:
2747 - a shift/add sequence based on VAL itself
2748 - a shift/add sequence based on -VAL, followed by a negation
2749 - a shift/add sequence based on VAL - 1, followed by an addition.
2751 Return true if the cheapest of these cost less than MULT_COST,
2752 describing the algorithm in *ALG and final fixup in *VARIANT. */
2755 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2756 struct algorithm *alg, enum mult_variant *variant,
2759 struct algorithm alg2;
2760 struct mult_cost limit;
2763 /* Fail quickly for impossible bounds. */
2767 /* Ensure that mult_cost provides a reasonable upper bound.
2768 Any constant multiplication can be performed with less
2769 than 2 * bits additions. */
2770 op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
2771 if (mult_cost > op_cost)
2772 mult_cost = op_cost;
2774 *variant = basic_variant;
2775 limit.cost = mult_cost;
2776 limit.latency = mult_cost;
2777 synth_mult (alg, val, &limit, mode);
2779 /* This works only if the inverted value actually fits in an
2781 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2783 op_cost = neg_cost[mode];
2784 if (MULT_COST_LESS (&alg->cost, mult_cost))
2786 limit.cost = alg->cost.cost - op_cost;
2787 limit.latency = alg->cost.latency - op_cost;
2791 limit.cost = mult_cost - op_cost;
2792 limit.latency = mult_cost - op_cost;
2795 synth_mult (&alg2, -val, &limit, mode);
2796 alg2.cost.cost += op_cost;
2797 alg2.cost.latency += op_cost;
2798 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2799 *alg = alg2, *variant = negate_variant;
2802 /* This proves very useful for division-by-constant. */
2803 op_cost = add_cost[mode];
2804 if (MULT_COST_LESS (&alg->cost, mult_cost))
2806 limit.cost = alg->cost.cost - op_cost;
2807 limit.latency = alg->cost.latency - op_cost;
2811 limit.cost = mult_cost - op_cost;
2812 limit.latency = mult_cost - op_cost;
2815 synth_mult (&alg2, val - 1, &limit, mode);
2816 alg2.cost.cost += op_cost;
2817 alg2.cost.latency += op_cost;
2818 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2819 *alg = alg2, *variant = add_variant;
2821 return MULT_COST_LESS (&alg->cost, mult_cost);
2824 /* A subroutine of expand_mult, used for constant multiplications.
2825 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2826 convenient. Use the shift/add sequence described by ALG and apply
2827 the final fixup specified by VARIANT. */
2830 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2831 rtx target, const struct algorithm *alg,
2832 enum mult_variant variant)
2834 HOST_WIDE_INT val_so_far;
2835 rtx insn, accum, tem;
2837 enum machine_mode nmode;
2839 /* Avoid referencing memory over and over and invalid sharing
2841 op0 = force_reg (mode, op0);
2843 /* ACCUM starts out either as OP0 or as a zero, depending on
2844 the first operation. */
2846 if (alg->op[0] == alg_zero)
2848 accum = copy_to_mode_reg (mode, const0_rtx);
2851 else if (alg->op[0] == alg_m)
2853 accum = copy_to_mode_reg (mode, op0);
2859 for (opno = 1; opno < alg->ops; opno++)
2861 int log = alg->log[opno];
2862 rtx shift_subtarget = optimize ? 0 : accum;
2864 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2867 rtx accum_target = optimize ? 0 : accum;
2869 switch (alg->op[opno])
2872 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2873 build_int_cst (NULL_TREE, log),
2879 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2880 build_int_cst (NULL_TREE, log),
2882 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2883 add_target ? add_target : accum_target);
2884 val_so_far += (HOST_WIDE_INT) 1 << log;
2888 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2889 build_int_cst (NULL_TREE, log),
2891 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2892 add_target ? add_target : accum_target);
2893 val_so_far -= (HOST_WIDE_INT) 1 << log;
2897 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2898 build_int_cst (NULL_TREE, log),
2901 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2902 add_target ? add_target : accum_target);
2903 val_so_far = (val_so_far << log) + 1;
2907 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2908 build_int_cst (NULL_TREE, log),
2909 shift_subtarget, 0);
2910 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2911 add_target ? add_target : accum_target);
2912 val_so_far = (val_so_far << log) - 1;
2915 case alg_add_factor:
2916 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2917 build_int_cst (NULL_TREE, log),
2919 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2920 add_target ? add_target : accum_target);
2921 val_so_far += val_so_far << log;
2924 case alg_sub_factor:
2925 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2926 build_int_cst (NULL_TREE, log),
2928 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2930 ? add_target : (optimize ? 0 : tem)));
2931 val_so_far = (val_so_far << log) - val_so_far;
2938 /* Write a REG_EQUAL note on the last insn so that we can cse
2939 multiplication sequences. Note that if ACCUM is a SUBREG,
2940 we've set the inner register and must properly indicate
2943 tem = op0, nmode = mode;
2944 if (GET_CODE (accum) == SUBREG)
2946 nmode = GET_MODE (SUBREG_REG (accum));
2947 tem = gen_lowpart (nmode, op0);
2950 insn = get_last_insn ();
2951 set_unique_reg_note (insn, REG_EQUAL,
2952 gen_rtx_MULT (nmode, tem,
2953 GEN_INT (val_so_far)));
2956 if (variant == negate_variant)
2958 val_so_far = -val_so_far;
2959 accum = expand_unop (mode, neg_optab, accum, target, 0);
2961 else if (variant == add_variant)
2963 val_so_far = val_so_far + 1;
2964 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2967 /* Compare only the bits of val and val_so_far that are significant
2968 in the result mode, to avoid sign-/zero-extension confusion. */
2969 val &= GET_MODE_MASK (mode);
2970 val_so_far &= GET_MODE_MASK (mode);
2971 gcc_assert (val == val_so_far);
2976 /* Perform a multiplication and return an rtx for the result.
2977 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2978 TARGET is a suggestion for where to store the result (an rtx).
2980 We check specially for a constant integer as OP1.
2981 If you want this check for OP0 as well, then before calling
2982 you should swap the two operands if OP0 would be constant. */
2985 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2988 enum mult_variant variant;
2989 struct algorithm algorithm;
2992 /* Handling const0_rtx here allows us to use zero as a rogue value for
2994 if (op1 == const0_rtx)
2996 if (op1 == const1_rtx)
2998 if (op1 == constm1_rtx)
2999 return expand_unop (mode,
3000 GET_MODE_CLASS (mode) == MODE_INT
3001 && !unsignedp && flag_trapv
3002 ? negv_optab : neg_optab,
3005 /* These are the operations that are potentially turned into a sequence
3006 of shifts and additions. */
3007 if (SCALAR_INT_MODE_P (mode)
3008 && (unsignedp || !flag_trapv))
3010 HOST_WIDE_INT coeff = 0;
3011 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3013 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3014 less than or equal in size to `unsigned int' this doesn't matter.
3015 If the mode is larger than `unsigned int', then synth_mult works
3016 only if the constant value exactly fits in an `unsigned int' without
3017 any truncation. This means that multiplying by negative values does
3018 not work; results are off by 2^32 on a 32 bit machine. */
3020 if (GET_CODE (op1) == CONST_INT)
3022 /* Attempt to handle multiplication of DImode values by negative
3023 coefficients, by performing the multiplication by a positive
3024 multiplier and then inverting the result. */
3025 if (INTVAL (op1) < 0
3026 && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3028 /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3029 result is interpreted as an unsigned coefficient.
3030 Exclude cost of op0 from max_cost to match the cost
3031 calculation of the synth_mult. */
3032 max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
3035 && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3036 &variant, max_cost))
3038 rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3039 NULL_RTX, &algorithm,
3041 return expand_unop (mode, neg_optab, temp, target, 0);
3044 else coeff = INTVAL (op1);
3046 else if (GET_CODE (op1) == CONST_DOUBLE)
3048 /* If we are multiplying in DImode, it may still be a win
3049 to try to work with shifts and adds. */
3050 if (CONST_DOUBLE_HIGH (op1) == 0)
3051 coeff = CONST_DOUBLE_LOW (op1);
3052 else if (CONST_DOUBLE_LOW (op1) == 0
3053 && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3055 int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3056 + HOST_BITS_PER_WIDE_INT;
3057 return expand_shift (LSHIFT_EXPR, mode, op0,
3058 build_int_cst (NULL_TREE, shift),
3063 /* We used to test optimize here, on the grounds that it's better to
3064 produce a smaller program when -O is not used. But this causes
3065 such a terrible slowdown sometimes that it seems better to always
3069 /* Special case powers of two. */
3070 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3071 return expand_shift (LSHIFT_EXPR, mode, op0,
3072 build_int_cst (NULL_TREE, floor_log2 (coeff)),
3075 /* Exclude cost of op0 from max_cost to match the cost
3076 calculation of the synth_mult. */
3077 max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
3078 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3080 return expand_mult_const (mode, op0, coeff, target,
3081 &algorithm, variant);
3085 if (GET_CODE (op0) == CONST_DOUBLE)
3092 /* Expand x*2.0 as x+x. */
3093 if (GET_CODE (op1) == CONST_DOUBLE
3094 && SCALAR_FLOAT_MODE_P (mode))
3097 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3099 if (REAL_VALUES_EQUAL (d, dconst2))
3101 op0 = force_reg (GET_MODE (op0), op0);
3102 return expand_binop (mode, add_optab, op0, op0,
3103 target, unsignedp, OPTAB_LIB_WIDEN);
3107 /* This used to use umul_optab if unsigned, but for non-widening multiply
3108 there is no difference between signed and unsigned. */
3109 op0 = expand_binop (mode,
3111 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3112 ? smulv_optab : smul_optab,
3113 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3118 /* Return the smallest n such that 2**n >= X. */
3121 ceil_log2 (unsigned HOST_WIDE_INT x)
3123 return floor_log2 (x - 1) + 1;
3126 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3127 replace division by D, and put the least significant N bits of the result
3128 in *MULTIPLIER_PTR and return the most significant bit.
3130 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3131 needed precision is in PRECISION (should be <= N).
3133 PRECISION should be as small as possible so this function can choose
3134 multiplier more freely.
3136 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3137 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3139 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3140 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3143 unsigned HOST_WIDE_INT
3144 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3145 rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3147 HOST_WIDE_INT mhigh_hi, mlow_hi;
3148 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3149 int lgup, post_shift;
3151 unsigned HOST_WIDE_INT nl, dummy1;
3152 HOST_WIDE_INT nh, dummy2;
3154 /* lgup = ceil(log2(divisor)); */
3155 lgup = ceil_log2 (d);
3157 gcc_assert (lgup <= n);
3160 pow2 = n + lgup - precision;
3162 /* We could handle this with some effort, but this case is much
3163 better handled directly with a scc insn, so rely on caller using
3165 gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3167 /* mlow = 2^(N + lgup)/d */
3168 if (pow >= HOST_BITS_PER_WIDE_INT)
3170 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3176 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3178 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3179 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3181 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3182 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3183 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3185 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3186 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3187 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3189 gcc_assert (!mhigh_hi || nh - d < d);
3190 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3191 /* Assert that mlow < mhigh. */
3192 gcc_assert (mlow_hi < mhigh_hi
3193 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3195 /* If precision == N, then mlow, mhigh exceed 2^N
3196 (but they do not exceed 2^(N+1)). */
3198 /* Reduce to lowest terms. */
3199 for (post_shift = lgup; post_shift > 0; post_shift--)
3201 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3202 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3212 *post_shift_ptr = post_shift;
3214 if (n < HOST_BITS_PER_WIDE_INT)
3216 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3217 *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3218 return mhigh_lo >= mask;
3222 *multiplier_ptr = GEN_INT (mhigh_lo);
3227 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3228 congruent to 1 (mod 2**N). */
3230 static unsigned HOST_WIDE_INT
3231 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3233 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3235 /* The algorithm notes that the choice y = x satisfies
3236 x*y == 1 mod 2^3, since x is assumed odd.
3237 Each iteration doubles the number of bits of significance in y. */
3239 unsigned HOST_WIDE_INT mask;
3240 unsigned HOST_WIDE_INT y = x;
3243 mask = (n == HOST_BITS_PER_WIDE_INT
3244 ? ~(unsigned HOST_WIDE_INT) 0
3245 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3249 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3255 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3256 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3257 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3258 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3261 The result is put in TARGET if that is convenient.
3263 MODE is the mode of operation. */
3266 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3267 rtx op1, rtx target, int unsignedp)
3270 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3272 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3273 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3275 tem = expand_and (mode, tem, op1, NULL_RTX);
3277 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3280 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3281 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3283 tem = expand_and (mode, tem, op0, NULL_RTX);
3284 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3290 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
3293 extract_high_half (enum machine_mode mode, rtx op)
3295 enum machine_mode wider_mode;
3297 if (mode == word_mode)
3298 return gen_highpart (mode, op);
3300 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3302 wider_mode = GET_MODE_WIDER_MODE (mode);
3303 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3304 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3305 return convert_modes (mode, wider_mode, op, 0);
3308 /* Like expand_mult_highpart, but only consider using a multiplication
3309 optab. OP1 is an rtx for the constant operand. */
3312 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3313 rtx target, int unsignedp, int max_cost)
3315 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3316 enum machine_mode wider_mode;
3321 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3323 wider_mode = GET_MODE_WIDER_MODE (mode);
3324 size = GET_MODE_BITSIZE (mode);
3326 /* Firstly, try using a multiplication insn that only generates the needed
3327 high part of the product, and in the sign flavor of unsignedp. */
3328 if (mul_highpart_cost[mode] < max_cost)
3330 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3331 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3332 unsignedp, OPTAB_DIRECT);
3337 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3338 Need to adjust the result after the multiplication. */
3339 if (size - 1 < BITS_PER_WORD
3340 && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3341 + 4 * add_cost[mode] < max_cost))
3343 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3344 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3345 unsignedp, OPTAB_DIRECT);
3347 /* We used the wrong signedness. Adjust the result. */
3348 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3352 /* Try widening multiplication. */
3353 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3354 if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3355 && mul_widen_cost[wider_mode] < max_cost)
3357 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3358 unsignedp, OPTAB_WIDEN);
3360 return extract_high_half (mode, tem);
3363 /* Try widening the mode and perform a non-widening multiplication. */
3364 if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
3365 && size - 1 < BITS_PER_WORD
3366 && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3368 rtx insns, wop0, wop1;
3370 /* We need to widen the operands, for example to ensure the
3371 constant multiplier is correctly sign or zero extended.
3372 Use a sequence to clean-up any instructions emitted by
3373 the conversions if things don't work out. */
3375 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3376 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3377 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3378 unsignedp, OPTAB_WIDEN);
3379 insns = get_insns ();
3385 return extract_high_half (mode, tem);
3389 /* Try widening multiplication of opposite signedness, and adjust. */
3390 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3391 if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3392 && size - 1 < BITS_PER_WORD
3393 && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3394 + 4 * add_cost[mode] < max_cost))
3396 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3397 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3400 tem = extract_high_half (mode, tem);
3401 /* We used the wrong signedness. Adjust the result. */
3402 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3410 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3411 putting the high half of the result in TARGET if that is convenient,
3412 and return where the result is. If the operation can not be performed,
3415 MODE is the mode of operation and result.
3417 UNSIGNEDP nonzero means unsigned multiply.
3419 MAX_COST is the total allowed cost for the expanded RTL. */
3422 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3423 rtx target, int unsignedp, int max_cost)
3425 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3426 unsigned HOST_WIDE_INT cnst1;
3428 bool sign_adjust = false;
3429 enum mult_variant variant;
3430 struct algorithm alg;
3433 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3434 /* We can't support modes wider than HOST_BITS_PER_INT. */
3435 gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3437 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3439 /* We can't optimize modes wider than BITS_PER_WORD.
3440 ??? We might be able to perform double-word arithmetic if
3441 mode == word_mode, however all the cost calculations in
3442 synth_mult etc. assume single-word operations. */
3443 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3444 return expand_mult_highpart_optab (mode, op0, op1, target,
3445 unsignedp, max_cost);
3447 extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3449 /* Check whether we try to multiply by a negative constant. */
3450 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3453 extra_cost += add_cost[mode];
3456 /* See whether shift/add multiplication is cheap enough. */
3457 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3458 max_cost - extra_cost))
3460 /* See whether the specialized multiplication optabs are
3461 cheaper than the shift/add version. */
3462 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3463 alg.cost.cost + extra_cost);
3467 tem = convert_to_mode (wider_mode, op0, unsignedp);
3468 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3469 tem = extract_high_half (mode, tem);
3471 /* Adjust result for signedness. */
3473 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3477 return expand_mult_highpart_optab (mode, op0, op1, target,
3478 unsignedp, max_cost);
3482 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3485 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3487 unsigned HOST_WIDE_INT masklow, maskhigh;
3488 rtx result, temp, shift, label;
3491 logd = floor_log2 (d);
3492 result = gen_reg_rtx (mode);
3494 /* Avoid conditional branches when they're expensive. */
3495 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3496 && optimize_insn_for_speed_p ())
3498 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3502 signmask = force_reg (mode, signmask);
3503 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3504 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3506 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3507 which instruction sequence to use. If logical right shifts
3508 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3509 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3511 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3512 if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
3513 || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3515 temp = expand_binop (mode, xor_optab, op0, signmask,
3516 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3517 temp = expand_binop (mode, sub_optab, temp, signmask,
3518 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3519 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3520 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3521 temp = expand_binop (mode, xor_optab, temp, signmask,
3522 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3523 temp = expand_binop (mode, sub_optab, temp, signmask,
3524 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3528 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3529 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3530 signmask = force_reg (mode, signmask);
3532 temp = expand_binop (mode, add_optab, op0, signmask,
3533 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3534 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3535 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3536 temp = expand_binop (mode, sub_optab, temp, signmask,
3537 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3543 /* Mask contains the mode's signbit and the significant bits of the
3544 modulus. By including the signbit in the operation, many targets
3545 can avoid an explicit compare operation in the following comparison
3548 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3549 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3551 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3555 maskhigh = (HOST_WIDE_INT) -1
3556 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3558 temp = expand_binop (mode, and_optab, op0,
3559 immed_double_const (masklow, maskhigh, mode),
3560 result, 1, OPTAB_LIB_WIDEN);
3562 emit_move_insn (result, temp);
3564 label = gen_label_rtx ();
3565 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3567 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3568 0, OPTAB_LIB_WIDEN);
3569 masklow = (HOST_WIDE_INT) -1 << logd;
3571 temp = expand_binop (mode, ior_optab, temp,
3572 immed_double_const (masklow, maskhigh, mode),
3573 result, 1, OPTAB_LIB_WIDEN);
3574 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3575 0, OPTAB_LIB_WIDEN);
3577 emit_move_insn (result, temp);
3582 /* Expand signed division of OP0 by a power of two D in mode MODE.
3583 This routine is only called for positive values of D. */
3586 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3592 logd = floor_log2 (d);
3593 shift = build_int_cst (NULL_TREE, logd);
3596 && BRANCH_COST (optimize_insn_for_speed_p (),
3599 temp = gen_reg_rtx (mode);
3600 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3601 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3602 0, OPTAB_LIB_WIDEN);
3603 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3606 #ifdef HAVE_conditional_move
3607 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3612 /* ??? emit_conditional_move forces a stack adjustment via
3613 compare_from_rtx so, if the sequence is discarded, it will
3614 be lost. Do it now instead. */
3615 do_pending_stack_adjust ();
3618 temp2 = copy_to_mode_reg (mode, op0);
3619 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3620 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3621 temp = force_reg (mode, temp);
3623 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3624 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3625 mode, temp, temp2, mode, 0);
3628 rtx seq = get_insns ();
3631 return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3637 if (BRANCH_COST (optimize_insn_for_speed_p (),
3640 int ushift = GET_MODE_BITSIZE (mode) - logd;
3642 temp = gen_reg_rtx (mode);
3643 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3644 if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3645 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3646 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3648 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3649 build_int_cst (NULL_TREE, ushift),
3651 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3652 0, OPTAB_LIB_WIDEN);
3653 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3656 label = gen_label_rtx ();
3657 temp = copy_to_mode_reg (mode, op0);
3658 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3659 expand_inc (temp, GEN_INT (d - 1));
3661 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3664 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3665 if that is convenient, and returning where the result is.
3666 You may request either the quotient or the remainder as the result;
3667 specify REM_FLAG nonzero to get the remainder.
3669 CODE is the expression code for which kind of division this is;
3670 it controls how rounding is done. MODE is the machine mode to use.
3671 UNSIGNEDP nonzero means do unsigned division. */
3673 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3674 and then correct it by or'ing in missing high bits
3675 if result of ANDI is nonzero.
3676 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3677 This could optimize to a bfexts instruction.
3678 But C doesn't use these operations, so their optimizations are
3680 /* ??? For modulo, we don't actually need the highpart of the first product,
3681 the low part will do nicely. And for small divisors, the second multiply
3682 can also be a low-part only multiply or even be completely left out.
3683 E.g. to calculate the remainder of a division by 3 with a 32 bit
3684 multiply, multiply with 0x55555556 and extract the upper two bits;
3685 the result is exact for inputs up to 0x1fffffff.
3686 The input range can be reduced by using cross-sum rules.
3687 For odd divisors >= 3, the following table gives right shift counts
3688 so that if a number is shifted by an integer multiple of the given
3689 amount, the remainder stays the same:
3690 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3691 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3692 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3693 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3694 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3696 Cross-sum rules for even numbers can be derived by leaving as many bits
3697 to the right alone as the divisor has zeros to the right.
3698 E.g. if x is an unsigned 32 bit number:
3699 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3703 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3704 rtx op0, rtx op1, rtx target, int unsignedp)
3706 enum machine_mode compute_mode;
3708 rtx quotient = 0, remainder = 0;
3712 optab optab1, optab2;
3713 int op1_is_constant, op1_is_pow2 = 0;
3714 int max_cost, extra_cost;
3715 static HOST_WIDE_INT last_div_const = 0;
3716 static HOST_WIDE_INT ext_op1;
3718 op1_is_constant = GET_CODE (op1) == CONST_INT;
3719 if (op1_is_constant)
3721 ext_op1 = INTVAL (op1);
3723 ext_op1 &= GET_MODE_MASK (mode);
3724 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3725 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3729 This is the structure of expand_divmod:
3731 First comes code to fix up the operands so we can perform the operations
3732 correctly and efficiently.
3734 Second comes a switch statement with code specific for each rounding mode.
3735 For some special operands this code emits all RTL for the desired
3736 operation, for other cases, it generates only a quotient and stores it in
3737 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3738 to indicate that it has not done anything.
3740 Last comes code that finishes the operation. If QUOTIENT is set and
3741 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3742 QUOTIENT is not set, it is computed using trunc rounding.
3744 We try to generate special code for division and remainder when OP1 is a
3745 constant. If |OP1| = 2**n we can use shifts and some other fast
3746 operations. For other values of OP1, we compute a carefully selected
3747 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3750 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3751 half of the product. Different strategies for generating the product are
3752 implemented in expand_mult_highpart.
3754 If what we actually want is the remainder, we generate that by another
3755 by-constant multiplication and a subtraction. */
3757 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3758 code below will malfunction if we are, so check here and handle
3759 the special case if so. */
3760 if (op1 == const1_rtx)
3761 return rem_flag ? const0_rtx : op0;
3763 /* When dividing by -1, we could get an overflow.
3764 negv_optab can handle overflows. */
3765 if (! unsignedp && op1 == constm1_rtx)
3769 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3770 ? negv_optab : neg_optab, op0, target, 0);
3774 /* Don't use the function value register as a target
3775 since we have to read it as well as write it,
3776 and function-inlining gets confused by this. */
3777 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3778 /* Don't clobber an operand while doing a multi-step calculation. */
3779 || ((rem_flag || op1_is_constant)
3780 && (reg_mentioned_p (target, op0)
3781 || (MEM_P (op0) && MEM_P (target))))
3782 || reg_mentioned_p (target, op1)
3783 || (MEM_P (op1) && MEM_P (target))))
3786 /* Get the mode in which to perform this computation. Normally it will
3787 be MODE, but sometimes we can't do the desired operation in MODE.
3788 If so, pick a wider mode in which we can do the operation. Convert
3789 to that mode at the start to avoid repeated conversions.
3791 First see what operations we need. These depend on the expression
3792 we are evaluating. (We assume that divxx3 insns exist under the
3793 same conditions that modxx3 insns and that these insns don't normally
3794 fail. If these assumptions are not correct, we may generate less
3795 efficient code in some cases.)
3797 Then see if we find a mode in which we can open-code that operation
3798 (either a division, modulus, or shift). Finally, check for the smallest
3799 mode for which we can do the operation with a library call. */
3801 /* We might want to refine this now that we have division-by-constant
3802 optimization. Since expand_mult_highpart tries so many variants, it is
3803 not straightforward to generalize this. Maybe we should make an array
3804 of possible modes in init_expmed? Save this for GCC 2.7. */
3806 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3807 ? (unsignedp ? lshr_optab : ashr_optab)
3808 : (unsignedp ? udiv_optab : sdiv_optab));
3809 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3811 : (unsignedp ? udivmod_optab : sdivmod_optab));
3813 for (compute_mode = mode; compute_mode != VOIDmode;
3814 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3815 if (optab_handler (optab1, compute_mode)->insn_code != CODE_FOR_nothing
3816 || optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing)
3819 if (compute_mode == VOIDmode)
3820 for (compute_mode = mode; compute_mode != VOIDmode;
3821 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3822 if (optab_libfunc (optab1, compute_mode)
3823 || optab_libfunc (optab2, compute_mode))
3826 /* If we still couldn't find a mode, use MODE, but expand_binop will
3828 if (compute_mode == VOIDmode)
3829 compute_mode = mode;
3831 if (target && GET_MODE (target) == compute_mode)
3834 tquotient = gen_reg_rtx (compute_mode);
3836 size = GET_MODE_BITSIZE (compute_mode);
3838 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3839 (mode), and thereby get better code when OP1 is a constant. Do that
3840 later. It will require going over all usages of SIZE below. */
3841 size = GET_MODE_BITSIZE (mode);
3844 /* Only deduct something for a REM if the last divide done was
3845 for a different constant. Then set the constant of the last
3847 max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode];
3848 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3849 && INTVAL (op1) == last_div_const))
3850 max_cost -= mul_cost[compute_mode] + add_cost[compute_mode];
3852 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3854 /* Now convert to the best mode to use. */
3855 if (compute_mode != mode)
3857 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3858 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3860 /* convert_modes may have placed op1 into a register, so we
3861 must recompute the following. */
3862 op1_is_constant = GET_CODE (op1) == CONST_INT;
3863 op1_is_pow2 = (op1_is_constant
3864 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3866 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3869 /* If one of the operands is a volatile MEM, copy it into a register. */
3871 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3872 op0 = force_reg (compute_mode, op0);
3873 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3874 op1 = force_reg (compute_mode, op1);
3876 /* If we need the remainder or if OP1 is constant, we need to
3877 put OP0 in a register in case it has any queued subexpressions. */
3878 if (rem_flag || op1_is_constant)
3879 op0 = force_reg (compute_mode, op0);
3881 last = get_last_insn ();
3883 /* Promote floor rounding to trunc rounding for unsigned operations. */
3886 if (code == FLOOR_DIV_EXPR)
3887 code = TRUNC_DIV_EXPR;
3888 if (code == FLOOR_MOD_EXPR)
3889 code = TRUNC_MOD_EXPR;
3890 if (code == EXACT_DIV_EXPR && op1_is_pow2)
3891 code = TRUNC_DIV_EXPR;
3894 if (op1 != const0_rtx)
3897 case TRUNC_MOD_EXPR:
3898 case TRUNC_DIV_EXPR:
3899 if (op1_is_constant)
3903 unsigned HOST_WIDE_INT mh;
3904 int pre_shift, post_shift;
3907 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3908 & GET_MODE_MASK (compute_mode));
3910 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3912 pre_shift = floor_log2 (d);
3916 = expand_binop (compute_mode, and_optab, op0,
3917 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3921 return gen_lowpart (mode, remainder);
3923 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3924 build_int_cst (NULL_TREE,
3928 else if (size <= HOST_BITS_PER_WIDE_INT)
3930 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3932 /* Most significant bit of divisor is set; emit an scc
3934 quotient = emit_store_flag (tquotient, GEU, op0, op1,
3935 compute_mode, 1, 1);
3941 /* Find a suitable multiplier and right shift count
3942 instead of multiplying with D. */
3944 mh = choose_multiplier (d, size, size,
3945 &ml, &post_shift, &dummy);
3947 /* If the suggested multiplier is more than SIZE bits,
3948 we can do better for even divisors, using an
3949 initial right shift. */
3950 if (mh != 0 && (d & 1) == 0)
3952 pre_shift = floor_log2 (d & -d);
3953 mh = choose_multiplier (d >> pre_shift, size,
3955 &ml, &post_shift, &dummy);
3965 if (post_shift - 1 >= BITS_PER_WORD)
3969 = (shift_cost[compute_mode][post_shift - 1]
3970 + shift_cost[compute_mode][1]
3971 + 2 * add_cost[compute_mode]);
3972 t1 = expand_mult_highpart (compute_mode, op0, ml,
3974 max_cost - extra_cost);
3977 t2 = force_operand (gen_rtx_MINUS (compute_mode,
3981 (RSHIFT_EXPR, compute_mode, t2,
3982 build_int_cst (NULL_TREE, 1),
3984 t4 = force_operand (gen_rtx_PLUS (compute_mode,
3987 quotient = expand_shift
3988 (RSHIFT_EXPR, compute_mode, t4,
3989 build_int_cst (NULL_TREE, post_shift - 1),
3996 if (pre_shift >= BITS_PER_WORD
3997 || post_shift >= BITS_PER_WORD)
4001 (RSHIFT_EXPR, compute_mode, op0,
4002 build_int_cst (NULL_TREE, pre_shift),
4005 = (shift_cost[compute_mode][pre_shift]
4006 + shift_cost[compute_mode][post_shift]);
4007 t2 = expand_mult_highpart (compute_mode, t1, ml,
4009 max_cost - extra_cost);
4012 quotient = expand_shift
4013 (RSHIFT_EXPR, compute_mode, t2,
4014 build_int_cst (NULL_TREE, post_shift),
4019 else /* Too wide mode to use tricky code */
4022 insn = get_last_insn ();
4024 && (set = single_set (insn)) != 0
4025 && SET_DEST (set) == quotient)
4026 set_unique_reg_note (insn,
4028 gen_rtx_UDIV (compute_mode, op0, op1));
4030 else /* TRUNC_DIV, signed */
4032 unsigned HOST_WIDE_INT ml;
4033 int lgup, post_shift;
4035 HOST_WIDE_INT d = INTVAL (op1);
4036 unsigned HOST_WIDE_INT abs_d;
4038 /* Since d might be INT_MIN, we have to cast to
4039 unsigned HOST_WIDE_INT before negating to avoid
4040 undefined signed overflow. */
4042 ? (unsigned HOST_WIDE_INT) d
4043 : - (unsigned HOST_WIDE_INT) d);
4045 /* n rem d = n rem -d */
4046 if (rem_flag && d < 0)
4049 op1 = gen_int_mode (abs_d, compute_mode);
4055 quotient = expand_unop (compute_mode, neg_optab, op0,
4057 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4059 /* This case is not handled correctly below. */
4060 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4061 compute_mode, 1, 1);
4065 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4066 && (rem_flag ? smod_pow2_cheap[compute_mode]
4067 : sdiv_pow2_cheap[compute_mode])
4068 /* We assume that cheap metric is true if the
4069 optab has an expander for this mode. */
4070 && ((optab_handler ((rem_flag ? smod_optab
4072 compute_mode)->insn_code
4073 != CODE_FOR_nothing)
4074 || (optab_handler(sdivmod_optab,
4076 ->insn_code != CODE_FOR_nothing)))
4078 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4082 remainder = expand_smod_pow2 (compute_mode, op0, d);
4084 return gen_lowpart (mode, remainder);
4087 if (sdiv_pow2_cheap[compute_mode]
4088 && ((optab_handler (sdiv_optab, compute_mode)->insn_code
4089 != CODE_FOR_nothing)
4090 || (optab_handler (sdivmod_optab, compute_mode)->insn_code
4091 != CODE_FOR_nothing)))
4092 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4094 gen_int_mode (abs_d,
4098 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4100 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4101 negate the quotient. */
4104 insn = get_last_insn ();
4106 && (set = single_set (insn)) != 0
4107 && SET_DEST (set) == quotient
4108 && abs_d < ((unsigned HOST_WIDE_INT) 1
4109 << (HOST_BITS_PER_WIDE_INT - 1)))
4110 set_unique_reg_note (insn,
4112 gen_rtx_DIV (compute_mode,
4119 quotient = expand_unop (compute_mode, neg_optab,
4120 quotient, quotient, 0);
4123 else if (size <= HOST_BITS_PER_WIDE_INT)
4125 choose_multiplier (abs_d, size, size - 1,
4126 &mlr, &post_shift, &lgup);
4127 ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4128 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4132 if (post_shift >= BITS_PER_WORD
4133 || size - 1 >= BITS_PER_WORD)
4136 extra_cost = (shift_cost[compute_mode][post_shift]
4137 + shift_cost[compute_mode][size - 1]
4138 + add_cost[compute_mode]);
4139 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4141 max_cost - extra_cost);
4145 (RSHIFT_EXPR, compute_mode, t1,
4146 build_int_cst (NULL_TREE, post_shift),
4149 (RSHIFT_EXPR, compute_mode, op0,
4150 build_int_cst (NULL_TREE, size - 1),
4154 = force_operand (gen_rtx_MINUS (compute_mode,
4159 = force_operand (gen_rtx_MINUS (compute_mode,
4167 if (post_shift >= BITS_PER_WORD
4168 || size - 1 >= BITS_PER_WORD)
4171 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4172 mlr = gen_int_mode (ml, compute_mode);
4173 extra_cost = (shift_cost[compute_mode][post_shift]
4174 + shift_cost[compute_mode][size - 1]
4175 + 2 * add_cost[compute_mode]);
4176 t1 = expand_mult_highpart (compute_mode, op0, mlr,
4178 max_cost - extra_cost);
4181 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4185 (RSHIFT_EXPR, compute_mode, t2,
4186 build_int_cst (NULL_TREE, post_shift),
4189 (RSHIFT_EXPR, compute_mode, op0,
4190 build_int_cst (NULL_TREE, size - 1),
4194 = force_operand (gen_rtx_MINUS (compute_mode,
4199 = force_operand (gen_rtx_MINUS (compute_mode,
4204 else /* Too wide mode to use tricky code */
4207 insn = get_last_insn ();
4209 && (set = single_set (insn)) != 0
4210 && SET_DEST (set) == quotient)
4211 set_unique_reg_note (insn,
4213 gen_rtx_DIV (compute_mode, op0, op1));
4218 delete_insns_since (last);
4221 case FLOOR_DIV_EXPR:
4222 case FLOOR_MOD_EXPR:
4223 /* We will come here only for signed operations. */
4224 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4226 unsigned HOST_WIDE_INT mh;
4227 int pre_shift, lgup, post_shift;
4228 HOST_WIDE_INT d = INTVAL (op1);
4233 /* We could just as easily deal with negative constants here,
4234 but it does not seem worth the trouble for GCC 2.6. */
4235 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4237 pre_shift = floor_log2 (d);
4240 remainder = expand_binop (compute_mode, and_optab, op0,
4241 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4242 remainder, 0, OPTAB_LIB_WIDEN);
4244 return gen_lowpart (mode, remainder);
4246 quotient = expand_shift
4247 (RSHIFT_EXPR, compute_mode, op0,
4248 build_int_cst (NULL_TREE, pre_shift),
4255 mh = choose_multiplier (d, size, size - 1,
4256 &ml, &post_shift, &lgup);
4259 if (post_shift < BITS_PER_WORD
4260 && size - 1 < BITS_PER_WORD)
4263 (RSHIFT_EXPR, compute_mode, op0,
4264 build_int_cst (NULL_TREE, size - 1),
4266 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4267 NULL_RTX, 0, OPTAB_WIDEN);
4268 extra_cost = (shift_cost[compute_mode][post_shift]
4269 + shift_cost[compute_mode][size - 1]
4270 + 2 * add_cost[compute_mode]);
4271 t3 = expand_mult_highpart (compute_mode, t2, ml,
4273 max_cost - extra_cost);
4277 (RSHIFT_EXPR, compute_mode, t3,
4278 build_int_cst (NULL_TREE, post_shift),
4280 quotient = expand_binop (compute_mode, xor_optab,
4281 t4, t1, tquotient, 0,
4289 rtx nsign, t1, t2, t3, t4;
4290 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4291 op0, constm1_rtx), NULL_RTX);
4292 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4294 nsign = expand_shift
4295 (RSHIFT_EXPR, compute_mode, t2,
4296 build_int_cst (NULL_TREE, size - 1),
4298 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4300 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4305 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4307 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4316 delete_insns_since (last);
4318 /* Try using an instruction that produces both the quotient and
4319 remainder, using truncation. We can easily compensate the quotient
4320 or remainder to get floor rounding, once we have the remainder.
4321 Notice that we compute also the final remainder value here,
4322 and return the result right away. */
4323 if (target == 0 || GET_MODE (target) != compute_mode)
4324 target = gen_reg_rtx (compute_mode);
4329 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4330 quotient = gen_reg_rtx (compute_mode);
4335 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4336 remainder = gen_reg_rtx (compute_mode);
4339 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4340 quotient, remainder, 0))
4342 /* This could be computed with a branch-less sequence.
4343 Save that for later. */
4345 rtx label = gen_label_rtx ();
4346 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4347 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4348 NULL_RTX, 0, OPTAB_WIDEN);
4349 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4350 expand_dec (quotient, const1_rtx);
4351 expand_inc (remainder, op1);
4353 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4356 /* No luck with division elimination or divmod. Have to do it
4357 by conditionally adjusting op0 *and* the result. */
4359 rtx label1, label2, label3, label4, label5;
4363 quotient = gen_reg_rtx (compute_mode);
4364 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4365 label1 = gen_label_rtx ();
4366 label2 = gen_label_rtx ();
4367 label3 = gen_label_rtx ();
4368 label4 = gen_label_rtx ();
4369 label5 = gen_label_rtx ();
4370 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4371 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4372 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4373 quotient, 0, OPTAB_LIB_WIDEN);
4374 if (tem != quotient)
4375 emit_move_insn (quotient, tem);
4376 emit_jump_insn (gen_jump (label5));
4378 emit_label (label1);
4379 expand_inc (adjusted_op0, const1_rtx);
4380 emit_jump_insn (gen_jump (label4));
4382 emit_label (label2);
4383 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4384 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4385 quotient, 0, OPTAB_LIB_WIDEN);
4386 if (tem != quotient)
4387 emit_move_insn (quotient, tem);
4388 emit_jump_insn (gen_jump (label5));
4390 emit_label (label3);
4391 expand_dec (adjusted_op0, const1_rtx);
4392 emit_label (label4);
4393 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4394 quotient, 0, OPTAB_LIB_WIDEN);
4395 if (tem != quotient)
4396 emit_move_insn (quotient, tem);
4397 expand_dec (quotient, const1_rtx);
4398 emit_label (label5);
4406 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4409 unsigned HOST_WIDE_INT d = INTVAL (op1);
4410 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4411 build_int_cst (NULL_TREE, floor_log2 (d)),
4413 t2 = expand_binop (compute_mode, and_optab, op0,
4415 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4416 t3 = gen_reg_rtx (compute_mode);
4417 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4418 compute_mode, 1, 1);
4422 lab = gen_label_rtx ();
4423 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4424 expand_inc (t1, const1_rtx);
4429 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4435 /* Try using an instruction that produces both the quotient and
4436 remainder, using truncation. We can easily compensate the
4437 quotient or remainder to get ceiling rounding, once we have the
4438 remainder. Notice that we compute also the final remainder
4439 value here, and return the result right away. */
4440 if (target == 0 || GET_MODE (target) != compute_mode)
4441 target = gen_reg_rtx (compute_mode);
4445 remainder = (REG_P (target)
4446 ? target : gen_reg_rtx (compute_mode));
4447 quotient = gen_reg_rtx (compute_mode);
4451 quotient = (REG_P (target)
4452 ? target : gen_reg_rtx (compute_mode));
4453 remainder = gen_reg_rtx (compute_mode);
4456 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4459 /* This could be computed with a branch-less sequence.
4460 Save that for later. */
4461 rtx label = gen_label_rtx ();
4462 do_cmp_and_jump (remainder, const0_rtx, EQ,
4463 compute_mode, label);
4464 expand_inc (quotient, const1_rtx);
4465 expand_dec (remainder, op1);
4467 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4470 /* No luck with division elimination or divmod. Have to do it
4471 by conditionally adjusting op0 *and* the result. */
4474 rtx adjusted_op0, tem;
4476 quotient = gen_reg_rtx (compute_mode);
4477 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4478 label1 = gen_label_rtx ();
4479 label2 = gen_label_rtx ();
4480 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4481 compute_mode, label1);
4482 emit_move_insn (quotient, const0_rtx);
4483 emit_jump_insn (gen_jump (label2));
4485 emit_label (label1);
4486 expand_dec (adjusted_op0, const1_rtx);
4487 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4488 quotient, 1, OPTAB_LIB_WIDEN);
4489 if (tem != quotient)
4490 emit_move_insn (quotient, tem);
4491 expand_inc (quotient, const1_rtx);
4492 emit_label (label2);
4497 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4498 && INTVAL (op1) >= 0)
4500 /* This is extremely similar to the code for the unsigned case
4501 above. For 2.7 we should merge these variants, but for
4502 2.6.1 I don't want to touch the code for unsigned since that
4503 get used in C. The signed case will only be used by other
4507 unsigned HOST_WIDE_INT d = INTVAL (op1);
4508 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4509 build_int_cst (NULL_TREE, floor_log2 (d)),
4511 t2 = expand_binop (compute_mode, and_optab, op0,
4513 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4514 t3 = gen_reg_rtx (compute_mode);
4515 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4516 compute_mode, 1, 1);
4520 lab = gen_label_rtx ();
4521 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4522 expand_inc (t1, const1_rtx);
4527 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4533 /* Try using an instruction that produces both the quotient and
4534 remainder, using truncation. We can easily compensate the
4535 quotient or remainder to get ceiling rounding, once we have the
4536 remainder. Notice that we compute also the final remainder
4537 value here, and return the result right away. */
4538 if (target == 0 || GET_MODE (target) != compute_mode)
4539 target = gen_reg_rtx (compute_mode);
4542 remainder= (REG_P (target)
4543 ? target : gen_reg_rtx (compute_mode));
4544 quotient = gen_reg_rtx (compute_mode);
4548 quotient = (REG_P (target)
4549 ? target : gen_reg_rtx (compute_mode));
4550 remainder = gen_reg_rtx (compute_mode);
4553 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4556 /* This could be computed with a branch-less sequence.
4557 Save that for later. */
4559 rtx label = gen_label_rtx ();
4560 do_cmp_and_jump (remainder, const0_rtx, EQ,
4561 compute_mode, label);
4562 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4563 NULL_RTX, 0, OPTAB_WIDEN);
4564 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4565 expand_inc (quotient, const1_rtx);
4566 expand_dec (remainder, op1);
4568 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4571 /* No luck with division elimination or divmod. Have to do it
4572 by conditionally adjusting op0 *and* the result. */
4574 rtx label1, label2, label3, label4, label5;
4578 quotient = gen_reg_rtx (compute_mode);
4579 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4580 label1 = gen_label_rtx ();
4581 label2 = gen_label_rtx ();
4582 label3 = gen_label_rtx ();
4583 label4 = gen_label_rtx ();
4584 label5 = gen_label_rtx ();
4585 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4586 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4587 compute_mode, label1);
4588 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4589 quotient, 0, OPTAB_LIB_WIDEN);
4590 if (tem != quotient)
4591 emit_move_insn (quotient, tem);
4592 emit_jump_insn (gen_jump (label5));
4594 emit_label (label1);
4595 expand_dec (adjusted_op0, const1_rtx);
4596 emit_jump_insn (gen_jump (label4));
4598 emit_label (label2);
4599 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4600 compute_mode, label3);
4601 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4602 quotient, 0, OPTAB_LIB_WIDEN);
4603 if (tem != quotient)
4604 emit_move_insn (quotient, tem);
4605 emit_jump_insn (gen_jump (label5));
4607 emit_label (label3);
4608 expand_inc (adjusted_op0, const1_rtx);
4609 emit_label (label4);
4610 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4611 quotient, 0, OPTAB_LIB_WIDEN);
4612 if (tem != quotient)
4613 emit_move_insn (quotient, tem);
4614 expand_inc (quotient, const1_rtx);
4615 emit_label (label5);
4620 case EXACT_DIV_EXPR:
4621 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4623 HOST_WIDE_INT d = INTVAL (op1);
4624 unsigned HOST_WIDE_INT ml;
4628 pre_shift = floor_log2 (d & -d);
4629 ml = invert_mod2n (d >> pre_shift, size);
4630 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4631 build_int_cst (NULL_TREE, pre_shift),
4632 NULL_RTX, unsignedp);
4633 quotient = expand_mult (compute_mode, t1,
4634 gen_int_mode (ml, compute_mode),
4637 insn = get_last_insn ();
4638 set_unique_reg_note (insn,
4640 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4646 case ROUND_DIV_EXPR:
4647 case ROUND_MOD_EXPR:
4652 label = gen_label_rtx ();
4653 quotient = gen_reg_rtx (compute_mode);
4654 remainder = gen_reg_rtx (compute_mode);
4655 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4658 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4659 quotient, 1, OPTAB_LIB_WIDEN);
4660 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4661 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4662 remainder, 1, OPTAB_LIB_WIDEN);
4664 tem = plus_constant (op1, -1);
4665 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4666 build_int_cst (NULL_TREE, 1),
4668 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4669 expand_inc (quotient, const1_rtx);
4670 expand_dec (remainder, op1);
4675 rtx abs_rem, abs_op1, tem, mask;
4677 label = gen_label_rtx ();
4678 quotient = gen_reg_rtx (compute_mode);
4679 remainder = gen_reg_rtx (compute_mode);
4680 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4683 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4684 quotient, 0, OPTAB_LIB_WIDEN);
4685 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4686 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4687 remainder, 0, OPTAB_LIB_WIDEN);
4689 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4690 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4691 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4692 build_int_cst (NULL_TREE, 1),
4694 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4695 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4696 NULL_RTX, 0, OPTAB_WIDEN);
4697 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4698 build_int_cst (NULL_TREE, size - 1),
4700 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4701 NULL_RTX, 0, OPTAB_WIDEN);
4702 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4703 NULL_RTX, 0, OPTAB_WIDEN);
4704 expand_inc (quotient, tem);
4705 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4706 NULL_RTX, 0, OPTAB_WIDEN);
4707 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4708 NULL_RTX, 0, OPTAB_WIDEN);
4709 expand_dec (remainder, tem);
4712 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4720 if (target && GET_MODE (target) != compute_mode)
4725 /* Try to produce the remainder without producing the quotient.
4726 If we seem to have a divmod pattern that does not require widening,
4727 don't try widening here. We should really have a WIDEN argument
4728 to expand_twoval_binop, since what we'd really like to do here is
4729 1) try a mod insn in compute_mode
4730 2) try a divmod insn in compute_mode
4731 3) try a div insn in compute_mode and multiply-subtract to get
4733 4) try the same things with widening allowed. */
4735 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4738 ((optab_handler (optab2, compute_mode)->insn_code
4739 != CODE_FOR_nothing)
4740 ? OPTAB_DIRECT : OPTAB_WIDEN));
4743 /* No luck there. Can we do remainder and divide at once
4744 without a library call? */
4745 remainder = gen_reg_rtx (compute_mode);
4746 if (! expand_twoval_binop ((unsignedp
4750 NULL_RTX, remainder, unsignedp))
4755 return gen_lowpart (mode, remainder);
4758 /* Produce the quotient. Try a quotient insn, but not a library call.
4759 If we have a divmod in this mode, use it in preference to widening
4760 the div (for this test we assume it will not fail). Note that optab2
4761 is set to the one of the two optabs that the call below will use. */
4763 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4764 op0, op1, rem_flag ? NULL_RTX : target,
4766 ((optab_handler (optab2, compute_mode)->insn_code
4767 != CODE_FOR_nothing)
4768 ? OPTAB_DIRECT : OPTAB_WIDEN));
4772 /* No luck there. Try a quotient-and-remainder insn,
4773 keeping the quotient alone. */
4774 quotient = gen_reg_rtx (compute_mode);
4775 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4777 quotient, NULL_RTX, unsignedp))
4781 /* Still no luck. If we are not computing the remainder,
4782 use a library call for the quotient. */
4783 quotient = sign_expand_binop (compute_mode,
4784 udiv_optab, sdiv_optab,
4786 unsignedp, OPTAB_LIB_WIDEN);
4793 if (target && GET_MODE (target) != compute_mode)
4798 /* No divide instruction either. Use library for remainder. */
4799 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4801 unsignedp, OPTAB_LIB_WIDEN);
4802 /* No remainder function. Try a quotient-and-remainder
4803 function, keeping the remainder. */
4806 remainder = gen_reg_rtx (compute_mode);
4807 if (!expand_twoval_binop_libfunc
4808 (unsignedp ? udivmod_optab : sdivmod_optab,
4810 NULL_RTX, remainder,
4811 unsignedp ? UMOD : MOD))
4812 remainder = NULL_RTX;
4817 /* We divided. Now finish doing X - Y * (X / Y). */
4818 remainder = expand_mult (compute_mode, quotient, op1,
4819 NULL_RTX, unsignedp);
4820 remainder = expand_binop (compute_mode, sub_optab, op0,
4821 remainder, target, unsignedp,
4826 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4829 /* Return a tree node with data type TYPE, describing the value of X.
4830 Usually this is an VAR_DECL, if there is no obvious better choice.
4831 X may be an expression, however we only support those expressions
4832 generated by loop.c. */
4835 make_tree (tree type, rtx x)
4839 switch (GET_CODE (x))
4843 HOST_WIDE_INT hi = 0;
4846 && !(TYPE_UNSIGNED (type)
4847 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4848 < HOST_BITS_PER_WIDE_INT)))
4851 t = build_int_cst_wide (type, INTVAL (x), hi);
4857 if (GET_MODE (x) == VOIDmode)
4858 t = build_int_cst_wide (type,
4859 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4864 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4865 t = build_real (type, d);
4872 int units = CONST_VECTOR_NUNITS (x);
4873 tree itype = TREE_TYPE (type);
4878 /* Build a tree with vector elements. */
4879 for (i = units - 1; i >= 0; --i)
4881 rtx elt = CONST_VECTOR_ELT (x, i);
4882 t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4885 return build_vector (type, t);
4889 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4890 make_tree (type, XEXP (x, 1)));
4893 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4894 make_tree (type, XEXP (x, 1)));
4897 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4900 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4901 make_tree (type, XEXP (x, 1)));
4904 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4905 make_tree (type, XEXP (x, 1)));
4908 t = unsigned_type_for (type);
4909 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4910 make_tree (t, XEXP (x, 0)),
4911 make_tree (type, XEXP (x, 1))));
4914 t = signed_type_for (type);
4915 return fold_convert (type, build2 (RSHIFT_EXPR, t,
4916 make_tree (t, XEXP (x, 0)),
4917 make_tree (type, XEXP (x, 1))));
4920 if (TREE_CODE (type) != REAL_TYPE)
4921 t = signed_type_for (type);
4925 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4926 make_tree (t, XEXP (x, 0)),
4927 make_tree (t, XEXP (x, 1))));
4929 t = unsigned_type_for (type);
4930 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4931 make_tree (t, XEXP (x, 0)),
4932 make_tree (t, XEXP (x, 1))));
4936 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4937 GET_CODE (x) == ZERO_EXTEND);
4938 return fold_convert (type, make_tree (t, XEXP (x, 0)));
4941 return make_tree (type, XEXP (x, 0));
4944 t = SYMBOL_REF_DECL (x);
4946 return fold_convert (type, build_fold_addr_expr (t));
4947 /* else fall through. */
4950 t = build_decl (VAR_DECL, NULL_TREE, type);
4952 /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4953 ptr_mode. So convert. */
4954 if (POINTER_TYPE_P (type))
4955 x = convert_memory_address (TYPE_MODE (type), x);
4957 /* Note that we do *not* use SET_DECL_RTL here, because we do not
4958 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
4959 t->decl_with_rtl.rtl = x;
4965 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4966 and returning TARGET.
4968 If TARGET is 0, a pseudo-register or constant is returned. */
4971 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4975 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4976 tem = simplify_binary_operation (AND, mode, op0, op1);
4978 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4982 else if (tem != target)
4983 emit_move_insn (target, tem);
4987 /* Helper function for emit_store_flag. */
4989 emit_store_flag_1 (rtx target, rtx subtarget, enum machine_mode mode,
4993 enum machine_mode target_mode = GET_MODE (target);
4995 /* If we are converting to a wider mode, first convert to
4996 TARGET_MODE, then normalize. This produces better combining
4997 opportunities on machines that have a SIGN_EXTRACT when we are
4998 testing a single bit. This mostly benefits the 68k.
5000 If STORE_FLAG_VALUE does not have the sign bit set when
5001 interpreted in MODE, we can do this conversion as unsigned, which
5002 is usually more efficient. */
5003 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5005 convert_move (target, subtarget,
5006 (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5007 && 0 == (STORE_FLAG_VALUE
5008 & ((HOST_WIDE_INT) 1
5009 << (GET_MODE_BITSIZE (mode) -1))));
5016 /* If we want to keep subexpressions around, don't reuse our last
5021 /* Now normalize to the proper value in MODE. Sometimes we don't
5022 have to do anything. */
5023 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5025 /* STORE_FLAG_VALUE might be the most negative number, so write
5026 the comparison this way to avoid a compiler-time warning. */
5027 else if (- normalizep == STORE_FLAG_VALUE)
5028 op0 = expand_unop (mode, neg_optab, op0, subtarget, 0);
5030 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5031 it hard to use a value of just the sign bit due to ANSI integer
5032 constant typing rules. */
5033 else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5034 && (STORE_FLAG_VALUE
5035 & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))))
5036 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5037 size_int (GET_MODE_BITSIZE (mode) - 1), subtarget,
5041 gcc_assert (STORE_FLAG_VALUE & 1);
5043 op0 = expand_and (mode, op0, const1_rtx, subtarget);
5044 if (normalizep == -1)
5045 op0 = expand_unop (mode, neg_optab, op0, op0, 0);
5048 /* If we were converting to a smaller mode, do the conversion now. */
5049 if (target_mode != mode)
5051 convert_move (target, op0, 0);
5058 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5059 and storing in TARGET. Normally return TARGET.
5060 Return 0 if that cannot be done.
5062 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5063 it is VOIDmode, they cannot both be CONST_INT.
5065 UNSIGNEDP is for the case where we have to widen the operands
5066 to perform the operation. It says to use zero-extension.
5068 NORMALIZEP is 1 if we should convert the result to be either zero
5069 or one. Normalize is -1 if we should convert the result to be
5070 either zero or -1. If NORMALIZEP is zero, the result will be left
5071 "raw" out of the scc insn. */
5074 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5075 enum machine_mode mode, int unsignedp, int normalizep)
5078 enum insn_code icode;
5079 enum machine_mode compare_mode;
5080 enum machine_mode target_mode = GET_MODE (target);
5082 rtx last = get_last_insn ();
5083 rtx pattern, comparison;
5086 code = unsigned_condition (code);
5088 /* If one operand is constant, make it the second one. Only do this
5089 if the other operand is not constant as well. */
5091 if (swap_commutative_operands_p (op0, op1))
5096 code = swap_condition (code);
5099 if (mode == VOIDmode)
5100 mode = GET_MODE (op0);
5102 /* For some comparisons with 1 and -1, we can convert this to
5103 comparisons with zero. This will often produce more opportunities for
5104 store-flag insns. */
5109 if (op1 == const1_rtx)
5110 op1 = const0_rtx, code = LE;
5113 if (op1 == constm1_rtx)
5114 op1 = const0_rtx, code = LT;
5117 if (op1 == const1_rtx)
5118 op1 = const0_rtx, code = GT;
5121 if (op1 == constm1_rtx)
5122 op1 = const0_rtx, code = GE;
5125 if (op1 == const1_rtx)
5126 op1 = const0_rtx, code = NE;
5129 if (op1 == const1_rtx)
5130 op1 = const0_rtx, code = EQ;
5136 /* If we are comparing a double-word integer with zero or -1, we can
5137 convert the comparison into one involving a single word. */
5138 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5139 && GET_MODE_CLASS (mode) == MODE_INT
5140 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5142 if ((code == EQ || code == NE)
5143 && (op1 == const0_rtx || op1 == constm1_rtx))
5145 rtx op00, op01, op0both;
5147 /* Do a logical OR or AND of the two words and compare the
5149 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5150 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5151 op0both = expand_binop (word_mode,
5152 op1 == const0_rtx ? ior_optab : and_optab,
5153 op00, op01, NULL_RTX, unsignedp,
5157 return emit_store_flag (target, code, op0both, op1, word_mode,
5158 unsignedp, normalizep);
5160 else if ((code == LT || code == GE) && op1 == const0_rtx)
5164 /* If testing the sign bit, can just test on high word. */
5165 op0h = simplify_gen_subreg (word_mode, op0, mode,
5166 subreg_highpart_offset (word_mode,
5168 return emit_store_flag (target, code, op0h, op1, word_mode,
5169 unsignedp, normalizep);
5173 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5174 complement of A (for GE) and shifting the sign bit to the low bit. */
5175 if (op1 == const0_rtx && (code == LT || code == GE)
5176 && GET_MODE_CLASS (mode) == MODE_INT
5177 && (normalizep || STORE_FLAG_VALUE == 1
5178 || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5179 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5180 == ((unsigned HOST_WIDE_INT) 1
5181 << (GET_MODE_BITSIZE (mode) - 1))))))
5185 /* If the result is to be wider than OP0, it is best to convert it
5186 first. If it is to be narrower, it is *incorrect* to convert it
5188 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5190 op0 = convert_modes (target_mode, mode, op0, 0);
5194 if (target_mode != mode)
5198 op0 = expand_unop (mode, one_cmpl_optab, op0,
5199 ((STORE_FLAG_VALUE == 1 || normalizep)
5200 ? 0 : subtarget), 0);
5202 if (STORE_FLAG_VALUE == 1 || normalizep)
5203 /* If we are supposed to produce a 0/1 value, we want to do
5204 a logical shift from the sign bit to the low-order bit; for
5205 a -1/0 value, we do an arithmetic shift. */
5206 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5207 size_int (GET_MODE_BITSIZE (mode) - 1),
5208 subtarget, normalizep != -1);
5210 if (mode != target_mode)
5211 op0 = convert_modes (target_mode, mode, op0, 0);
5216 icode = setcc_gen_code[(int) code];
5218 if (icode != CODE_FOR_nothing)
5220 insn_operand_predicate_fn pred;
5222 /* We think we may be able to do this with a scc insn. Emit the
5223 comparison and then the scc insn. */
5225 do_pending_stack_adjust ();
5226 last = get_last_insn ();
5229 = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5230 if (CONSTANT_P (comparison))
5232 switch (GET_CODE (comparison))
5235 if (comparison == const0_rtx)
5239 #ifdef FLOAT_STORE_FLAG_VALUE
5241 if (comparison == CONST0_RTX (GET_MODE (comparison)))
5249 if (normalizep == 1)
5251 if (normalizep == -1)
5253 return const_true_rtx;
5256 /* The code of COMPARISON may not match CODE if compare_from_rtx
5257 decided to swap its operands and reverse the original code.
5259 We know that compare_from_rtx returns either a CONST_INT or
5260 a new comparison code, so it is safe to just extract the
5261 code from COMPARISON. */
5262 code = GET_CODE (comparison);
5264 /* Get a reference to the target in the proper mode for this insn. */
5265 compare_mode = insn_data[(int) icode].operand[0].mode;
5267 pred = insn_data[(int) icode].operand[0].predicate;
5268 if (optimize || ! (*pred) (subtarget, compare_mode))
5269 subtarget = gen_reg_rtx (compare_mode);
5271 pattern = GEN_FCN (icode) (subtarget);
5274 emit_insn (pattern);
5275 return emit_store_flag_1 (target, subtarget, compare_mode,
5281 /* We don't have an scc insn, so try a cstore insn. */
5283 for (compare_mode = mode; compare_mode != VOIDmode;
5284 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5286 icode = optab_handler (cstore_optab, compare_mode)->insn_code;
5287 if (icode != CODE_FOR_nothing)
5291 if (icode != CODE_FOR_nothing)
5293 enum machine_mode result_mode
5294 = insn_data[(int) icode].operand[0].mode;
5295 rtx cstore_op0 = op0;
5296 rtx cstore_op1 = op1;
5298 do_pending_stack_adjust ();
5299 last = get_last_insn ();
5301 if (compare_mode != mode)
5303 cstore_op0 = convert_modes (compare_mode, mode, cstore_op0,
5305 cstore_op1 = convert_modes (compare_mode, mode, cstore_op1,
5309 if (!insn_data[(int) icode].operand[2].predicate (cstore_op0,
5311 cstore_op0 = copy_to_mode_reg (compare_mode, cstore_op0);
5313 if (!insn_data[(int) icode].operand[3].predicate (cstore_op1,
5315 cstore_op1 = copy_to_mode_reg (compare_mode, cstore_op1);
5317 comparison = gen_rtx_fmt_ee (code, result_mode, cstore_op0,
5321 if (optimize || !(insn_data[(int) icode].operand[0].predicate
5322 (subtarget, result_mode)))
5323 subtarget = gen_reg_rtx (result_mode);
5325 pattern = GEN_FCN (icode) (subtarget, comparison, cstore_op0,
5330 emit_insn (pattern);
5331 return emit_store_flag_1 (target, subtarget, result_mode,
5337 delete_insns_since (last);
5339 /* If optimizing, use different pseudo registers for each insn, instead
5340 of reusing the same pseudo. This leads to better CSE, but slows
5341 down the compiler, since there are more pseudos */
5342 subtarget = (!optimize
5343 && (target_mode == mode)) ? target : NULL_RTX;
5345 /* If we reached here, we can't do this with a scc insn. However, there
5346 are some comparisons that can be done directly. For example, if
5347 this is an equality comparison of integers, we can try to exclusive-or
5348 (or subtract) the two operands and use a recursive call to try the
5349 comparison with zero. Don't do any of these cases if branches are
5352 if (BRANCH_COST (optimize_insn_for_speed_p (),
5354 && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5355 && op1 != const0_rtx)
5357 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5361 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5364 tem = emit_store_flag (target, code, tem, const0_rtx,
5365 mode, unsignedp, normalizep);
5367 delete_insns_since (last);
5371 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5372 the constant zero. Reject all other comparisons at this point. Only
5373 do LE and GT if branches are expensive since they are expensive on
5374 2-operand machines. */
5376 if (BRANCH_COST (optimize_insn_for_speed_p (),
5378 || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5379 || (code != EQ && code != NE
5380 && (BRANCH_COST (optimize_insn_for_speed_p (),
5381 false) <= 1 || (code != LE && code != GT))))
5384 /* See what we need to return. We can only return a 1, -1, or the
5387 if (normalizep == 0)
5389 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5390 normalizep = STORE_FLAG_VALUE;
5392 else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5393 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5394 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5400 /* Try to put the result of the comparison in the sign bit. Assume we can't
5401 do the necessary operation below. */
5405 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5406 the sign bit set. */
5410 /* This is destructive, so SUBTARGET can't be OP0. */
5411 if (rtx_equal_p (subtarget, op0))
5414 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5417 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5421 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5422 number of bits in the mode of OP0, minus one. */
5426 if (rtx_equal_p (subtarget, op0))
5429 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5430 size_int (GET_MODE_BITSIZE (mode) - 1),
5432 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5436 if (code == EQ || code == NE)
5438 /* For EQ or NE, one way to do the comparison is to apply an operation
5439 that converts the operand into a positive number if it is nonzero
5440 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5441 for NE we negate. This puts the result in the sign bit. Then we
5442 normalize with a shift, if needed.
5444 Two operations that can do the above actions are ABS and FFS, so try
5445 them. If that doesn't work, and MODE is smaller than a full word,
5446 we can use zero-extension to the wider mode (an unsigned conversion)
5447 as the operation. */
5449 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5450 that is compensated by the subsequent overflow when subtracting
5453 if (optab_handler (abs_optab, mode)->insn_code != CODE_FOR_nothing)
5454 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5455 else if (optab_handler (ffs_optab, mode)->insn_code != CODE_FOR_nothing)
5456 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5457 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5459 tem = convert_modes (word_mode, mode, op0, 1);
5466 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5469 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5472 /* If we couldn't do it that way, for NE we can "or" the two's complement
5473 of the value with itself. For EQ, we take the one's complement of
5474 that "or", which is an extra insn, so we only handle EQ if branches
5479 || BRANCH_COST (optimize_insn_for_speed_p (),
5482 if (rtx_equal_p (subtarget, op0))
5485 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5486 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5489 if (tem && code == EQ)
5490 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5494 if (tem && normalizep)
5495 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5496 size_int (GET_MODE_BITSIZE (mode) - 1),
5497 subtarget, normalizep == 1);
5501 if (GET_MODE (tem) != target_mode)
5503 convert_move (target, tem, 0);
5506 else if (!subtarget)
5508 emit_move_insn (target, tem);
5513 delete_insns_since (last);
5518 /* Like emit_store_flag, but always succeeds. */
5521 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5522 enum machine_mode mode, int unsignedp, int normalizep)
5526 /* First see if emit_store_flag can do the job. */
5527 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5531 if (normalizep == 0)
5534 /* If this failed, we have to do this with set/compare/jump/set code. */
5537 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5538 target = gen_reg_rtx (GET_MODE (target));
5540 emit_move_insn (target, const1_rtx);
5541 label = gen_label_rtx ();
5542 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5545 emit_move_insn (target, const0_rtx);
5551 /* Perform possibly multi-word comparison and conditional jump to LABEL
5552 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5553 now a thin wrapper around do_compare_rtx_and_jump. */
5556 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5559 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5560 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5561 NULL_RTX, NULL_RTX, label);