1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987-2016 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
34 #include "diagnostic-core.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
40 #include "langhooks.h"
42 struct target_expmed default_target_expmed;
44 struct target_expmed *this_target_expmed = &default_target_expmed;
47 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
48 unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
52 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
53 unsigned HOST_WIDE_INT,
55 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 unsigned HOST_WIDE_INT,
60 static rtx extract_fixed_bit_field (machine_mode, rtx,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
64 unsigned HOST_WIDE_INT,
65 unsigned HOST_WIDE_INT, rtx, int, bool);
66 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
67 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
68 unsigned HOST_WIDE_INT, int, bool);
69 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
70 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
71 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
73 /* Return a constant integer mask value of mode MODE with BITSIZE ones
74 followed by BITPOS zeros, or the complement of that if COMPLEMENT.
75 The mask is truncated if necessary to the width of mode MODE. The
76 mask is zero-extended if BITSIZE+BITPOS is too small for MODE. */
79 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
81 return immed_wide_int_const
82 (wi::shifted_mask (bitpos, bitsize, complement,
83 GET_MODE_PRECISION (mode)), mode);
86 /* Test whether a value is zero of a power of two. */
87 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
88 (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
90 struct init_expmed_rtl
111 rtx pow2[MAX_BITS_PER_WORD];
112 rtx cint[MAX_BITS_PER_WORD];
116 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
117 machine_mode from_mode, bool speed)
119 int to_size, from_size;
122 to_size = GET_MODE_PRECISION (to_mode);
123 from_size = GET_MODE_PRECISION (from_mode);
125 /* Most partial integers have a precision less than the "full"
126 integer it requires for storage. In case one doesn't, for
127 comparison purposes here, reduce the bit size by one in that
129 if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
130 && exact_log2 (to_size) != -1)
132 if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
133 && exact_log2 (from_size) != -1)
136 /* Assume cost of zero-extend and sign-extend is the same. */
137 which = (to_size < from_size ? all->trunc : all->zext);
139 PUT_MODE (all->reg, from_mode);
140 set_convert_cost (to_mode, from_mode, speed,
141 set_src_cost (which, to_mode, speed));
145 init_expmed_one_mode (struct init_expmed_rtl *all,
146 machine_mode mode, int speed)
148 int m, n, mode_bitsize;
149 machine_mode mode_from;
151 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
153 PUT_MODE (all->reg, mode);
154 PUT_MODE (all->plus, mode);
155 PUT_MODE (all->neg, mode);
156 PUT_MODE (all->mult, mode);
157 PUT_MODE (all->sdiv, mode);
158 PUT_MODE (all->udiv, mode);
159 PUT_MODE (all->sdiv_32, mode);
160 PUT_MODE (all->smod_32, mode);
161 PUT_MODE (all->wide_trunc, mode);
162 PUT_MODE (all->shift, mode);
163 PUT_MODE (all->shift_mult, mode);
164 PUT_MODE (all->shift_add, mode);
165 PUT_MODE (all->shift_sub0, mode);
166 PUT_MODE (all->shift_sub1, mode);
167 PUT_MODE (all->zext, mode);
168 PUT_MODE (all->trunc, mode);
170 set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
171 set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
172 set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
173 set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
174 set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
176 set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
177 <= 2 * add_cost (speed, mode)));
178 set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
179 <= 4 * add_cost (speed, mode)));
181 set_shift_cost (speed, mode, 0, 0);
183 int cost = add_cost (speed, mode);
184 set_shiftadd_cost (speed, mode, 0, cost);
185 set_shiftsub0_cost (speed, mode, 0, cost);
186 set_shiftsub1_cost (speed, mode, 0, cost);
189 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
190 for (m = 1; m < n; m++)
192 XEXP (all->shift, 1) = all->cint[m];
193 XEXP (all->shift_mult, 1) = all->pow2[m];
195 set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
196 set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
198 set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
200 set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
204 if (SCALAR_INT_MODE_P (mode))
206 for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
207 mode_from = (machine_mode)(mode_from + 1))
208 init_expmed_one_conv (all, mode, mode_from, speed);
210 if (GET_MODE_CLASS (mode) == MODE_INT)
212 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
213 if (wider_mode != VOIDmode)
215 PUT_MODE (all->zext, wider_mode);
216 PUT_MODE (all->wide_mult, wider_mode);
217 PUT_MODE (all->wide_lshr, wider_mode);
218 XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
220 set_mul_widen_cost (speed, wider_mode,
221 set_src_cost (all->wide_mult, wider_mode, speed));
222 set_mul_highpart_cost (speed, mode,
223 set_src_cost (all->wide_trunc, mode, speed));
231 struct init_expmed_rtl all;
232 machine_mode mode = QImode;
235 memset (&all, 0, sizeof all);
236 for (m = 1; m < MAX_BITS_PER_WORD; m++)
238 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
239 all.cint[m] = GEN_INT (m);
242 /* Avoid using hard regs in ways which may be unsupported. */
243 all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
244 all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
245 all.neg = gen_rtx_NEG (mode, all.reg);
246 all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
247 all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
248 all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
249 all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
250 all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
251 all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
252 all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
253 all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
254 all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
255 all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
256 all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
257 all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
258 all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
259 all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
260 all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
262 for (speed = 0; speed < 2; speed++)
264 crtl->maybe_hot_insn_p = speed;
265 set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
267 for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
268 mode = (machine_mode)(mode + 1))
269 init_expmed_one_mode (&all, mode, speed);
271 if (MIN_MODE_PARTIAL_INT != VOIDmode)
272 for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
273 mode = (machine_mode)(mode + 1))
274 init_expmed_one_mode (&all, mode, speed);
276 if (MIN_MODE_VECTOR_INT != VOIDmode)
277 for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
278 mode = (machine_mode)(mode + 1))
279 init_expmed_one_mode (&all, mode, speed);
282 if (alg_hash_used_p ())
284 struct alg_hash_entry *p = alg_hash_entry_ptr (0);
285 memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
288 set_alg_hash_used_p (true);
289 default_rtl_profile ();
291 ggc_free (all.trunc);
292 ggc_free (all.shift_sub1);
293 ggc_free (all.shift_sub0);
294 ggc_free (all.shift_add);
295 ggc_free (all.shift_mult);
296 ggc_free (all.shift);
297 ggc_free (all.wide_trunc);
298 ggc_free (all.wide_lshr);
299 ggc_free (all.wide_mult);
301 ggc_free (all.smod_32);
302 ggc_free (all.sdiv_32);
311 /* Return an rtx representing minus the value of X.
312 MODE is the intended mode of the result,
313 useful if X is a CONST_INT. */
316 negate_rtx (machine_mode mode, rtx x)
318 rtx result = simplify_unary_operation (NEG, mode, x, mode);
321 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
326 /* Whether reverse storage order is supported on the target. */
327 static int reverse_storage_order_supported = -1;
329 /* Check whether reverse storage order is supported on the target. */
332 check_reverse_storage_order_support (void)
334 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
336 reverse_storage_order_supported = 0;
337 sorry ("reverse scalar storage order");
340 reverse_storage_order_supported = 1;
343 /* Whether reverse FP storage order is supported on the target. */
344 static int reverse_float_storage_order_supported = -1;
346 /* Check whether reverse FP storage order is supported on the target. */
349 check_reverse_float_storage_order_support (void)
351 if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
353 reverse_float_storage_order_supported = 0;
354 sorry ("reverse floating-point scalar storage order");
357 reverse_float_storage_order_supported = 1;
360 /* Return an rtx representing value of X with reverse storage order.
361 MODE is the intended mode of the result,
362 useful if X is a CONST_INT. */
365 flip_storage_order (enum machine_mode mode, rtx x)
367 enum machine_mode int_mode;
373 if (COMPLEX_MODE_P (mode))
375 rtx real = read_complex_part (x, false);
376 rtx imag = read_complex_part (x, true);
378 real = flip_storage_order (GET_MODE_INNER (mode), real);
379 imag = flip_storage_order (GET_MODE_INNER (mode), imag);
381 return gen_rtx_CONCAT (mode, real, imag);
384 if (__builtin_expect (reverse_storage_order_supported < 0, 0))
385 check_reverse_storage_order_support ();
387 if (SCALAR_INT_MODE_P (mode))
391 if (FLOAT_MODE_P (mode)
392 && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
393 check_reverse_float_storage_order_support ();
395 int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
396 if (int_mode == BLKmode)
398 sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
401 x = gen_lowpart (int_mode, x);
404 result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
406 result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
408 if (int_mode != mode)
409 result = gen_lowpart (mode, result);
414 /* Adjust bitfield memory MEM so that it points to the first unit of mode
415 MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
416 If MODE is BLKmode, return a reference to every byte in the bitfield.
417 Set *NEW_BITNUM to the bit position of the field within the new memory. */
420 narrow_bit_field_mem (rtx mem, machine_mode mode,
421 unsigned HOST_WIDE_INT bitsize,
422 unsigned HOST_WIDE_INT bitnum,
423 unsigned HOST_WIDE_INT *new_bitnum)
427 *new_bitnum = bitnum % BITS_PER_UNIT;
428 HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
429 HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
431 return adjust_bitfield_address_size (mem, mode, offset, size);
435 unsigned int unit = GET_MODE_BITSIZE (mode);
436 *new_bitnum = bitnum % unit;
437 HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
438 return adjust_bitfield_address (mem, mode, offset);
442 /* The caller wants to perform insertion or extraction PATTERN on a
443 bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
444 BITREGION_START and BITREGION_END are as for store_bit_field
445 and FIELDMODE is the natural mode of the field.
447 Search for a mode that is compatible with the memory access
448 restrictions and (where applicable) with a register insertion or
449 extraction. Return the new memory on success, storing the adjusted
450 bit position in *NEW_BITNUM. Return null otherwise. */
453 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
454 rtx op0, HOST_WIDE_INT bitsize,
455 HOST_WIDE_INT bitnum,
456 unsigned HOST_WIDE_INT bitregion_start,
457 unsigned HOST_WIDE_INT bitregion_end,
458 machine_mode fieldmode,
459 unsigned HOST_WIDE_INT *new_bitnum)
461 bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
462 bitregion_end, MEM_ALIGN (op0),
463 MEM_VOLATILE_P (op0));
464 machine_mode best_mode;
465 if (iter.next_mode (&best_mode))
467 /* We can use a memory in BEST_MODE. See whether this is true for
468 any wider modes. All other things being equal, we prefer to
469 use the widest mode possible because it tends to expose more
470 CSE opportunities. */
471 if (!iter.prefer_smaller_modes ())
473 /* Limit the search to the mode required by the corresponding
474 register insertion or extraction instruction, if any. */
475 machine_mode limit_mode = word_mode;
476 extraction_insn insn;
477 if (get_best_reg_extraction_insn (&insn, pattern,
478 GET_MODE_BITSIZE (best_mode),
480 limit_mode = insn.field_mode;
482 machine_mode wider_mode;
483 while (iter.next_mode (&wider_mode)
484 && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
485 best_mode = wider_mode;
487 return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
493 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
494 a structure of mode STRUCT_MODE represents a lowpart subreg. The subreg
495 offset is then BITNUM / BITS_PER_UNIT. */
498 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
499 unsigned HOST_WIDE_INT bitsize,
500 machine_mode struct_mode)
502 if (BYTES_BIG_ENDIAN)
503 return (bitnum % BITS_PER_UNIT == 0
504 && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
505 || (bitnum + bitsize) % BITS_PER_WORD == 0));
507 return bitnum % BITS_PER_WORD == 0;
510 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
511 containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
512 Return false if the access would touch memory outside the range
513 BITREGION_START to BITREGION_END for conformance to the C++ memory
517 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
518 unsigned HOST_WIDE_INT bitnum,
519 machine_mode fieldmode,
520 unsigned HOST_WIDE_INT bitregion_start,
521 unsigned HOST_WIDE_INT bitregion_end)
523 unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
525 /* -fstrict-volatile-bitfields must be enabled and we must have a
528 || !MEM_VOLATILE_P (op0)
529 || flag_strict_volatile_bitfields <= 0)
532 /* Non-integral modes likely only happen with packed structures.
534 if (!SCALAR_INT_MODE_P (fieldmode))
537 /* The bit size must not be larger than the field mode, and
538 the field mode must not be larger than a word. */
539 if (bitsize > modesize || modesize > BITS_PER_WORD)
542 /* Check for cases of unaligned fields that must be split. */
543 if (bitnum % modesize + bitsize > modesize)
546 /* The memory must be sufficiently aligned for a MODESIZE access.
547 This condition guarantees, that the memory access will not
548 touch anything after the end of the structure. */
549 if (MEM_ALIGN (op0) < modesize)
552 /* Check for cases where the C++ memory model applies. */
553 if (bitregion_end != 0
554 && (bitnum - bitnum % modesize < bitregion_start
555 || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
561 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
562 bit number BITNUM can be treated as a simple value of mode MODE. */
565 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
566 unsigned HOST_WIDE_INT bitnum, machine_mode mode)
569 && bitnum % BITS_PER_UNIT == 0
570 && bitsize == GET_MODE_BITSIZE (mode)
571 && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
572 || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
573 && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
576 /* Try to use instruction INSV to store VALUE into a field of OP0.
577 BITSIZE and BITNUM are as for store_bit_field. */
580 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
581 unsigned HOST_WIDE_INT bitsize,
582 unsigned HOST_WIDE_INT bitnum,
585 struct expand_operand ops[4];
588 rtx_insn *last = get_last_insn ();
589 bool copy_back = false;
591 machine_mode op_mode = insv->field_mode;
592 unsigned int unit = GET_MODE_BITSIZE (op_mode);
593 if (bitsize == 0 || bitsize > unit)
597 /* Get a reference to the first byte of the field. */
598 xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
602 /* Convert from counting within OP0 to counting in OP_MODE. */
603 if (BYTES_BIG_ENDIAN)
604 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
606 /* If xop0 is a register, we need it in OP_MODE
607 to make it acceptable to the format of insv. */
608 if (GET_CODE (xop0) == SUBREG)
609 /* We can't just change the mode, because this might clobber op0,
610 and we will need the original value of op0 if insv fails. */
611 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
612 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
613 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
616 /* If the destination is a paradoxical subreg such that we need a
617 truncate to the inner mode, perform the insertion on a temporary and
618 truncate the result to the original destination. Note that we can't
619 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
620 X) 0)) is (reg:N X). */
621 if (GET_CODE (xop0) == SUBREG
622 && REG_P (SUBREG_REG (xop0))
623 && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
626 rtx tem = gen_reg_rtx (op_mode);
627 emit_move_insn (tem, xop0);
632 /* There are similar overflow check at the start of store_bit_field_1,
633 but that only check the situation where the field lies completely
634 outside the register, while there do have situation where the field
635 lies partialy in the register, we need to adjust bitsize for this
636 partial overflow situation. Without this fix, pr48335-2.c on big-endian
637 will broken on those arch support bit insert instruction, like arm, aarch64
639 if (bitsize + bitnum > unit && bitnum < unit)
641 warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
642 "destination object, data truncated into %wu-bit",
643 bitsize, unit - bitnum);
644 bitsize = unit - bitnum;
647 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
648 "backwards" from the size of the unit we are inserting into.
649 Otherwise, we count bits from the most significant on a
650 BYTES/BITS_BIG_ENDIAN machine. */
652 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
653 bitnum = unit - bitsize - bitnum;
655 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
657 if (GET_MODE (value) != op_mode)
659 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
662 /* Optimization: Don't bother really extending VALUE
663 if it has all the bits we will actually use. However,
664 if we must narrow it, be sure we do it correctly. */
666 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
668 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
670 tmp = simplify_gen_subreg (op_mode,
671 force_reg (GET_MODE (value),
673 GET_MODE (value), 0);
677 tmp = gen_lowpart_if_possible (op_mode, value1);
679 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
684 else if (CONST_INT_P (value))
685 value1 = gen_int_mode (INTVAL (value), op_mode);
687 /* Parse phase is supposed to make VALUE's data type
688 match that of the component reference, which is a type
689 at least as wide as the field; so VALUE should have
690 a mode that corresponds to that type. */
691 gcc_assert (CONSTANT_P (value));
694 create_fixed_operand (&ops[0], xop0);
695 create_integer_operand (&ops[1], bitsize);
696 create_integer_operand (&ops[2], bitnum);
697 create_input_operand (&ops[3], value1, op_mode);
698 if (maybe_expand_insn (insv->icode, 4, ops))
701 convert_move (op0, xop0, true);
704 delete_insns_since (last);
708 /* A subroutine of store_bit_field, with the same arguments. Return true
709 if the operation could be implemented.
711 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
712 no other way of implementing the operation. If FALLBACK_P is false,
713 return false instead. */
716 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
717 unsigned HOST_WIDE_INT bitnum,
718 unsigned HOST_WIDE_INT bitregion_start,
719 unsigned HOST_WIDE_INT bitregion_end,
720 machine_mode fieldmode,
721 rtx value, bool reverse, bool fallback_p)
726 while (GET_CODE (op0) == SUBREG)
728 /* The following line once was done only if WORDS_BIG_ENDIAN,
729 but I think that is a mistake. WORDS_BIG_ENDIAN is
730 meaningful at a much higher level; when structures are copied
731 between memory and regs, the higher-numbered regs
732 always get higher addresses. */
733 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
734 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
737 /* Paradoxical subregs need special handling on big-endian machines. */
738 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
740 int difference = inner_mode_size - outer_mode_size;
742 if (WORDS_BIG_ENDIAN)
743 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
744 if (BYTES_BIG_ENDIAN)
745 byte_offset += difference % UNITS_PER_WORD;
748 byte_offset = SUBREG_BYTE (op0);
750 bitnum += byte_offset * BITS_PER_UNIT;
751 op0 = SUBREG_REG (op0);
754 /* No action is needed if the target is a register and if the field
755 lies completely outside that register. This can occur if the source
756 code contains an out-of-bounds access to a small array. */
757 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
760 /* Use vec_set patterns for inserting parts of vectors whenever
762 if (VECTOR_MODE_P (GET_MODE (op0))
764 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
765 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
766 && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
767 && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
769 struct expand_operand ops[3];
770 machine_mode outermode = GET_MODE (op0);
771 machine_mode innermode = GET_MODE_INNER (outermode);
772 enum insn_code icode = optab_handler (vec_set_optab, outermode);
773 int pos = bitnum / GET_MODE_BITSIZE (innermode);
775 create_fixed_operand (&ops[0], op0);
776 create_input_operand (&ops[1], value, innermode);
777 create_integer_operand (&ops[2], pos);
778 if (maybe_expand_insn (icode, 3, ops))
782 /* If the target is a register, overwriting the entire object, or storing
783 a full-word or multi-word field can be done with just a SUBREG. */
785 && bitsize == GET_MODE_BITSIZE (fieldmode)
786 && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
787 || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
789 /* Use the subreg machinery either to narrow OP0 to the required
790 words or to cope with mode punning between equal-sized modes.
791 In the latter case, use subreg on the rhs side, not lhs. */
794 if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
796 sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
800 sub = flip_storage_order (GET_MODE (op0), sub);
801 emit_move_insn (op0, sub);
807 sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
808 bitnum / BITS_PER_UNIT);
812 value = flip_storage_order (fieldmode, value);
813 emit_move_insn (sub, value);
819 /* If the target is memory, storing any naturally aligned field can be
820 done with a simple store. For targets that support fast unaligned
821 memory, any naturally sized, unit aligned field can be done directly. */
822 if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
824 op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
826 value = flip_storage_order (fieldmode, value);
827 emit_move_insn (op0, value);
831 /* Make sure we are playing with integral modes. Pun with subregs
832 if we aren't. This must come after the entire register case above,
833 since that case is valid for any mode. The following cases are only
834 valid for integral modes. */
836 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
837 if (imode != GET_MODE (op0))
840 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
843 gcc_assert (imode != BLKmode);
844 op0 = gen_lowpart (imode, op0);
849 /* Storing an lsb-aligned field in a register
850 can be done with a movstrict instruction. */
854 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
855 && bitsize == GET_MODE_BITSIZE (fieldmode)
856 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
858 struct expand_operand ops[2];
859 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
861 unsigned HOST_WIDE_INT subreg_off;
863 if (GET_CODE (arg0) == SUBREG)
865 /* Else we've got some float mode source being extracted into
866 a different float mode destination -- this combination of
867 subregs results in Severe Tire Damage. */
868 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
869 || GET_MODE_CLASS (fieldmode) == MODE_INT
870 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
871 arg0 = SUBREG_REG (arg0);
874 subreg_off = bitnum / BITS_PER_UNIT;
875 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
877 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
879 create_fixed_operand (&ops[0], arg0);
880 /* Shrink the source operand to FIELDMODE. */
881 create_convert_operand_to (&ops[1], value, fieldmode, false);
882 if (maybe_expand_insn (icode, 2, ops))
887 /* Handle fields bigger than a word. */
889 if (bitsize > BITS_PER_WORD)
891 /* Here we transfer the words of the field
892 in the order least significant first.
893 This is because the most significant word is the one which may
895 However, only do that if the value is not BLKmode. */
897 const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
898 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
902 /* This is the mode we must force value to, so that there will be enough
903 subwords to extract. Note that fieldmode will often (always?) be
904 VOIDmode, because that is what store_field uses to indicate that this
905 is a bit field, but passing VOIDmode to operand_subword_force
907 fieldmode = GET_MODE (value);
908 if (fieldmode == VOIDmode)
909 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
911 last = get_last_insn ();
912 for (i = 0; i < nwords; i++)
914 /* If I is 0, use the low-order word in both field and target;
915 if I is 1, use the next to lowest word; and so on. */
916 unsigned int wordnum = (backwards
917 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
920 unsigned int bit_offset = (backwards ^ reverse
921 ? MAX ((int) bitsize - ((int) i + 1)
924 : (int) i * BITS_PER_WORD);
925 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
926 unsigned HOST_WIDE_INT new_bitsize =
927 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
929 /* If the remaining chunk doesn't have full wordsize we have
930 to make sure that for big-endian machines the higher order
932 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
933 value_word = simplify_expand_binop (word_mode, lshr_optab,
935 GEN_INT (BITS_PER_WORD
940 if (!store_bit_field_1 (op0, new_bitsize,
942 bitregion_start, bitregion_end,
944 value_word, reverse, fallback_p))
946 delete_insns_since (last);
953 /* If VALUE has a floating-point or complex mode, access it as an
954 integer of the corresponding size. This can occur on a machine
955 with 64 bit registers that uses SFmode for float. It can also
956 occur for unaligned float or complex fields. */
958 if (GET_MODE (value) != VOIDmode
959 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
960 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
962 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
963 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
966 /* If OP0 is a multi-word register, narrow it to the affected word.
967 If the region spans two words, defer to store_split_bit_field. */
968 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
970 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
971 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
973 bitnum %= BITS_PER_WORD;
974 if (bitnum + bitsize > BITS_PER_WORD)
979 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
980 bitregion_end, value, reverse);
985 /* From here on we can assume that the field to be stored in fits
986 within a word. If the destination is a register, it too fits
989 extraction_insn insv;
992 && get_best_reg_extraction_insn (&insv, EP_insv,
993 GET_MODE_BITSIZE (GET_MODE (op0)),
995 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
998 /* If OP0 is a memory, try copying it to a register and seeing if a
999 cheap register alternative is available. */
1000 if (MEM_P (op0) && !reverse)
1002 if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1004 && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1007 rtx_insn *last = get_last_insn ();
1009 /* Try loading part of OP0 into a register, inserting the bitfield
1010 into that, and then copying the result back to OP0. */
1011 unsigned HOST_WIDE_INT bitpos;
1012 rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1013 bitregion_start, bitregion_end,
1014 fieldmode, &bitpos);
1017 rtx tempreg = copy_to_reg (xop0);
1018 if (store_bit_field_1 (tempreg, bitsize, bitpos,
1019 bitregion_start, bitregion_end,
1020 fieldmode, orig_value, reverse, false))
1022 emit_move_insn (xop0, tempreg);
1025 delete_insns_since (last);
1032 store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1033 bitregion_end, value, reverse);
1037 /* Generate code to store value from rtx VALUE
1038 into a bit-field within structure STR_RTX
1039 containing BITSIZE bits starting at bit BITNUM.
1041 BITREGION_START is bitpos of the first bitfield in this region.
1042 BITREGION_END is the bitpos of the ending bitfield in this region.
1043 These two fields are 0, if the C++ memory model does not apply,
1044 or we are not interested in keeping track of bitfield regions.
1046 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1048 If REVERSE is true, the store is to be done in reverse order. */
1051 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1052 unsigned HOST_WIDE_INT bitnum,
1053 unsigned HOST_WIDE_INT bitregion_start,
1054 unsigned HOST_WIDE_INT bitregion_end,
1055 machine_mode fieldmode,
1056 rtx value, bool reverse)
1058 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1059 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1060 bitregion_start, bitregion_end))
1062 /* Storing of a full word can be done with a simple store.
1063 We know here that the field can be accessed with one single
1064 instruction. For targets that support unaligned memory,
1065 an unaligned access may be necessary. */
1066 if (bitsize == GET_MODE_BITSIZE (fieldmode))
1068 str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1069 bitnum / BITS_PER_UNIT);
1071 value = flip_storage_order (fieldmode, value);
1072 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1073 emit_move_insn (str_rtx, value);
1079 str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1081 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1082 temp = copy_to_reg (str_rtx);
1083 if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1084 fieldmode, value, reverse, true))
1087 emit_move_insn (str_rtx, temp);
1093 /* Under the C++0x memory model, we must not touch bits outside the
1094 bit region. Adjust the address to start at the beginning of the
1096 if (MEM_P (str_rtx) && bitregion_start > 0)
1098 machine_mode bestmode;
1099 HOST_WIDE_INT offset, size;
1101 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1103 offset = bitregion_start / BITS_PER_UNIT;
1104 bitnum -= bitregion_start;
1105 size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1106 bitregion_end -= bitregion_start;
1107 bitregion_start = 0;
1108 bestmode = get_best_mode (bitsize, bitnum,
1109 bitregion_start, bitregion_end,
1110 MEM_ALIGN (str_rtx), VOIDmode,
1111 MEM_VOLATILE_P (str_rtx));
1112 str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1115 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1116 bitregion_start, bitregion_end,
1117 fieldmode, value, reverse, true))
1121 /* Use shifts and boolean operations to store VALUE into a bit field of
1122 width BITSIZE in OP0, starting at bit BITNUM.
1124 If REVERSE is true, the store is to be done in reverse order. */
1127 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1128 unsigned HOST_WIDE_INT bitnum,
1129 unsigned HOST_WIDE_INT bitregion_start,
1130 unsigned HOST_WIDE_INT bitregion_end,
1131 rtx value, bool reverse)
1133 /* There is a case not handled here:
1134 a structure with a known alignment of just a halfword
1135 and a field split across two aligned halfwords within the structure.
1136 Or likewise a structure with a known alignment of just a byte
1137 and a field split across two bytes.
1138 Such cases are not supposed to be able to occur. */
1142 machine_mode mode = GET_MODE (op0);
1143 if (GET_MODE_BITSIZE (mode) == 0
1144 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1146 mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1147 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1149 if (mode == VOIDmode)
1151 /* The only way this should occur is if the field spans word
1153 store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1154 bitregion_end, value, reverse);
1158 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1161 store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1164 /* Helper function for store_fixed_bit_field, stores
1165 the bit field always using the MODE of OP0. */
1168 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1169 unsigned HOST_WIDE_INT bitnum,
1170 rtx value, bool reverse)
1177 mode = GET_MODE (op0);
1178 gcc_assert (SCALAR_INT_MODE_P (mode));
1180 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1181 for invalid input, such as f5 from gcc.dg/pr48335-2.c. */
1183 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1184 /* BITNUM is the distance between our msb
1185 and that of the containing datum.
1186 Convert it to the distance from the lsb. */
1187 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1189 /* Now BITNUM is always the distance between our lsb
1192 /* Shift VALUE left by BITNUM bits. If VALUE is not constant,
1193 we must first convert its mode to MODE. */
1195 if (CONST_INT_P (value))
1197 unsigned HOST_WIDE_INT v = UINTVAL (value);
1199 if (bitsize < HOST_BITS_PER_WIDE_INT)
1200 v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1204 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1205 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1206 || (bitsize == HOST_BITS_PER_WIDE_INT
1207 && v == (unsigned HOST_WIDE_INT) -1))
1210 value = lshift_value (mode, v, bitnum);
1214 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1215 && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1217 if (GET_MODE (value) != mode)
1218 value = convert_to_mode (mode, value, 1);
1221 value = expand_binop (mode, and_optab, value,
1222 mask_rtx (mode, 0, bitsize, 0),
1223 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1225 value = expand_shift (LSHIFT_EXPR, mode, value,
1226 bitnum, NULL_RTX, 1);
1230 value = flip_storage_order (mode, value);
1232 /* Now clear the chosen bits in OP0,
1233 except that if VALUE is -1 we need not bother. */
1234 /* We keep the intermediates in registers to allow CSE to combine
1235 consecutive bitfield assignments. */
1237 temp = force_reg (mode, op0);
1241 rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1243 mask = flip_storage_order (mode, mask);
1244 temp = expand_binop (mode, and_optab, temp, mask,
1245 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1246 temp = force_reg (mode, temp);
1249 /* Now logical-or VALUE into OP0, unless it is zero. */
1253 temp = expand_binop (mode, ior_optab, temp, value,
1254 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1255 temp = force_reg (mode, temp);
1260 op0 = copy_rtx (op0);
1261 emit_move_insn (op0, temp);
1265 /* Store a bit field that is split across multiple accessible memory objects.
1267 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1268 BITSIZE is the field width; BITPOS the position of its first bit
1270 VALUE is the value to store.
1272 If REVERSE is true, the store is to be done in reverse order.
1274 This does not yet handle fields wider than BITS_PER_WORD. */
1277 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1278 unsigned HOST_WIDE_INT bitpos,
1279 unsigned HOST_WIDE_INT bitregion_start,
1280 unsigned HOST_WIDE_INT bitregion_end,
1281 rtx value, bool reverse)
1283 unsigned int unit, total_bits, bitsdone = 0;
1285 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1287 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1288 unit = BITS_PER_WORD;
1290 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1292 /* If OP0 is a memory with a mode, then UNIT must not be larger than
1293 OP0's mode as well. Otherwise, store_fixed_bit_field will call us
1294 again, and we will mutually recurse forever. */
1295 if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1296 unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1298 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1299 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1300 that VALUE might be a floating-point constant. */
1301 if (CONSTANT_P (value) && !CONST_INT_P (value))
1303 rtx word = gen_lowpart_common (word_mode, value);
1305 if (word && (value != word))
1308 value = gen_lowpart_common (word_mode,
1309 force_reg (GET_MODE (value) != VOIDmode
1311 : word_mode, value));
1314 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1316 while (bitsdone < bitsize)
1318 unsigned HOST_WIDE_INT thissize;
1319 unsigned HOST_WIDE_INT thispos;
1320 unsigned HOST_WIDE_INT offset;
1323 offset = (bitpos + bitsdone) / unit;
1324 thispos = (bitpos + bitsdone) % unit;
1326 /* When region of bytes we can touch is restricted, decrease
1327 UNIT close to the end of the region as needed. If op0 is a REG
1328 or SUBREG of REG, don't do this, as there can't be data races
1329 on a register and we can expand shorter code in some cases. */
1331 && unit > BITS_PER_UNIT
1332 && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1334 && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1340 /* THISSIZE must not overrun a word boundary. Otherwise,
1341 store_fixed_bit_field will call us again, and we will mutually
1343 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1344 thissize = MIN (thissize, unit - thispos);
1346 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1348 /* Fetch successively less significant portions. */
1349 if (CONST_INT_P (value))
1350 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1351 >> (bitsize - bitsdone - thissize))
1352 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1353 /* Likewise, but the source is little-endian. */
1355 part = extract_fixed_bit_field (word_mode, value, thissize,
1356 bitsize - bitsdone - thissize,
1357 NULL_RTX, 1, false);
1360 int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1361 /* The args are chosen so that the last part includes the
1362 lsb. Give extract_bit_field the value it needs (with
1363 endianness compensation) to fetch the piece we want. */
1364 part = extract_fixed_bit_field (word_mode, value, thissize,
1365 total_bits - bitsize + bitsdone,
1366 NULL_RTX, 1, false);
1371 /* Fetch successively more significant portions. */
1372 if (CONST_INT_P (value))
1373 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1375 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1376 /* Likewise, but the source is big-endian. */
1378 part = extract_fixed_bit_field (word_mode, value, thissize,
1379 total_bits - bitsdone - thissize,
1380 NULL_RTX, 1, false);
1382 part = extract_fixed_bit_field (word_mode, value, thissize,
1383 bitsdone, NULL_RTX, 1, false);
1386 /* If OP0 is a register, then handle OFFSET here.
1388 When handling multiword bitfields, extract_bit_field may pass
1389 down a word_mode SUBREG of a larger REG for a bitfield that actually
1390 crosses a word boundary. Thus, for a SUBREG, we must find
1391 the current word starting from the base register. */
1392 if (GET_CODE (op0) == SUBREG)
1394 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1395 + (offset * unit / BITS_PER_WORD);
1396 machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1397 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1398 word = word_offset ? const0_rtx : op0;
1400 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1401 GET_MODE (SUBREG_REG (op0)));
1402 offset &= BITS_PER_WORD / unit - 1;
1404 else if (REG_P (op0))
1406 machine_mode op0_mode = GET_MODE (op0);
1407 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1408 word = offset ? const0_rtx : op0;
1410 word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1412 offset &= BITS_PER_WORD / unit - 1;
1417 /* OFFSET is in UNITs, and UNIT is in bits. If WORD is const0_rtx,
1418 it is just an out-of-bounds access. Ignore it. */
1419 if (word != const0_rtx)
1420 store_fixed_bit_field (word, thissize, offset * unit + thispos,
1421 bitregion_start, bitregion_end, part,
1423 bitsdone += thissize;
1427 /* A subroutine of extract_bit_field_1 that converts return value X
1428 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1429 to extract_bit_field. */
1432 convert_extracted_bit_field (rtx x, machine_mode mode,
1433 machine_mode tmode, bool unsignedp)
1435 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1438 /* If the x mode is not a scalar integral, first convert to the
1439 integer mode of that size and then access it as a floating-point
1440 value via a SUBREG. */
1441 if (!SCALAR_INT_MODE_P (tmode))
1445 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1446 x = convert_to_mode (smode, x, unsignedp);
1447 x = force_reg (smode, x);
1448 return gen_lowpart (tmode, x);
1451 return convert_to_mode (tmode, x, unsignedp);
1454 /* Try to use an ext(z)v pattern to extract a field from OP0.
1455 Return the extracted value on success, otherwise return null.
1456 EXT_MODE is the mode of the extraction and the other arguments
1457 are as for extract_bit_field. */
1460 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1461 unsigned HOST_WIDE_INT bitsize,
1462 unsigned HOST_WIDE_INT bitnum,
1463 int unsignedp, rtx target,
1464 machine_mode mode, machine_mode tmode)
1466 struct expand_operand ops[4];
1467 rtx spec_target = target;
1468 rtx spec_target_subreg = 0;
1469 machine_mode ext_mode = extv->field_mode;
1470 unsigned unit = GET_MODE_BITSIZE (ext_mode);
1472 if (bitsize == 0 || unit < bitsize)
1476 /* Get a reference to the first byte of the field. */
1477 op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1481 /* Convert from counting within OP0 to counting in EXT_MODE. */
1482 if (BYTES_BIG_ENDIAN)
1483 bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1485 /* If op0 is a register, we need it in EXT_MODE to make it
1486 acceptable to the format of ext(z)v. */
1487 if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1489 if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1490 op0 = gen_lowpart_SUBREG (ext_mode, op0);
1493 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1494 "backwards" from the size of the unit we are extracting from.
1495 Otherwise, we count bits from the most significant on a
1496 BYTES/BITS_BIG_ENDIAN machine. */
1498 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1499 bitnum = unit - bitsize - bitnum;
1502 target = spec_target = gen_reg_rtx (tmode);
1504 if (GET_MODE (target) != ext_mode)
1506 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1507 between the mode of the extraction (word_mode) and the target
1508 mode. Instead, create a temporary and use convert_move to set
1511 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1513 target = gen_lowpart (ext_mode, target);
1514 if (GET_MODE_PRECISION (ext_mode)
1515 > GET_MODE_PRECISION (GET_MODE (spec_target)))
1516 spec_target_subreg = target;
1519 target = gen_reg_rtx (ext_mode);
1522 create_output_operand (&ops[0], target, ext_mode);
1523 create_fixed_operand (&ops[1], op0);
1524 create_integer_operand (&ops[2], bitsize);
1525 create_integer_operand (&ops[3], bitnum);
1526 if (maybe_expand_insn (extv->icode, 4, ops))
1528 target = ops[0].value;
1529 if (target == spec_target)
1531 if (target == spec_target_subreg)
1533 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1538 /* A subroutine of extract_bit_field, with the same arguments.
1539 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1540 if we can find no other means of implementing the operation.
1541 if FALLBACK_P is false, return NULL instead. */
1544 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1545 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1546 machine_mode mode, machine_mode tmode,
1547 bool reverse, bool fallback_p)
1550 machine_mode int_mode;
1553 if (tmode == VOIDmode)
1556 while (GET_CODE (op0) == SUBREG)
1558 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1559 op0 = SUBREG_REG (op0);
1562 /* If we have an out-of-bounds access to a register, just return an
1563 uninitialized register of the required mode. This can occur if the
1564 source code contains an out-of-bounds access to a small array. */
1565 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1566 return gen_reg_rtx (tmode);
1569 && mode == GET_MODE (op0)
1571 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1574 op0 = flip_storage_order (mode, op0);
1575 /* We're trying to extract a full register from itself. */
1579 /* See if we can get a better vector mode before extracting. */
1580 if (VECTOR_MODE_P (GET_MODE (op0))
1582 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1584 machine_mode new_mode;
1586 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1587 new_mode = MIN_MODE_VECTOR_FLOAT;
1588 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1589 new_mode = MIN_MODE_VECTOR_FRACT;
1590 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1591 new_mode = MIN_MODE_VECTOR_UFRACT;
1592 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1593 new_mode = MIN_MODE_VECTOR_ACCUM;
1594 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1595 new_mode = MIN_MODE_VECTOR_UACCUM;
1597 new_mode = MIN_MODE_VECTOR_INT;
1599 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1600 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1601 && targetm.vector_mode_supported_p (new_mode))
1603 if (new_mode != VOIDmode)
1604 op0 = gen_lowpart (new_mode, op0);
1607 /* Use vec_extract patterns for extracting parts of vectors whenever
1609 if (VECTOR_MODE_P (GET_MODE (op0))
1611 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1612 && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1613 == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1615 struct expand_operand ops[3];
1616 machine_mode outermode = GET_MODE (op0);
1617 machine_mode innermode = GET_MODE_INNER (outermode);
1618 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1619 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1621 create_output_operand (&ops[0], target, innermode);
1622 create_input_operand (&ops[1], op0, outermode);
1623 create_integer_operand (&ops[2], pos);
1624 if (maybe_expand_insn (icode, 3, ops))
1626 target = ops[0].value;
1627 if (GET_MODE (target) != mode)
1628 return gen_lowpart (tmode, target);
1633 /* Make sure we are playing with integral modes. Pun with subregs
1636 machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1637 if (imode != GET_MODE (op0))
1640 op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1641 else if (imode != BLKmode)
1643 op0 = gen_lowpart (imode, op0);
1645 /* If we got a SUBREG, force it into a register since we
1646 aren't going to be able to do another SUBREG on it. */
1647 if (GET_CODE (op0) == SUBREG)
1648 op0 = force_reg (imode, op0);
1650 else if (REG_P (op0))
1653 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1655 reg = gen_reg_rtx (imode);
1656 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1657 emit_move_insn (subreg, op0);
1659 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1663 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1664 rtx mem = assign_stack_temp (GET_MODE (op0), size);
1665 emit_move_insn (mem, op0);
1666 op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1671 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1672 If that's wrong, the solution is to test for it and set TARGET to 0
1675 /* Get the mode of the field to use for atomic access or subreg
1678 if (SCALAR_INT_MODE_P (tmode))
1680 machine_mode try_mode = mode_for_size (bitsize,
1681 GET_MODE_CLASS (tmode), 0);
1682 if (try_mode != BLKmode)
1685 gcc_assert (mode1 != BLKmode);
1687 /* Extraction of a full MODE1 value can be done with a subreg as long
1688 as the least significant bit of the value is the least significant
1689 bit of either OP0 or a word of OP0. */
1692 && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1693 && bitsize == GET_MODE_BITSIZE (mode1)
1694 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1696 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1697 bitnum / BITS_PER_UNIT);
1699 return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1702 /* Extraction of a full MODE1 value can be done with a load as long as
1703 the field is on a byte boundary and is sufficiently aligned. */
1704 if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1706 op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1708 op0 = flip_storage_order (mode1, op0);
1709 return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1712 /* Handle fields bigger than a word. */
1714 if (bitsize > BITS_PER_WORD)
1716 /* Here we transfer the words of the field
1717 in the order least significant first.
1718 This is because the most significant word is the one which may
1719 be less than full. */
1721 const bool backwards = WORDS_BIG_ENDIAN;
1722 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1726 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1727 target = gen_reg_rtx (mode);
1729 /* In case we're about to clobber a base register or something
1730 (see gcc.c-torture/execute/20040625-1.c). */
1731 if (reg_mentioned_p (target, str_rtx))
1732 target = gen_reg_rtx (mode);
1734 /* Indicate for flow that the entire target reg is being set. */
1735 emit_clobber (target);
1737 last = get_last_insn ();
1738 for (i = 0; i < nwords; i++)
1740 /* If I is 0, use the low-order word in both field and target;
1741 if I is 1, use the next to lowest word; and so on. */
1742 /* Word number in TARGET to use. */
1743 unsigned int wordnum
1745 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1747 /* Offset from start of field in OP0. */
1748 unsigned int bit_offset = (backwards ^ reverse
1749 ? MAX ((int) bitsize - ((int) i + 1)
1752 : (int) i * BITS_PER_WORD);
1753 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1755 = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1756 bitsize - i * BITS_PER_WORD),
1757 bitnum + bit_offset, 1, target_part,
1758 mode, word_mode, reverse, fallback_p);
1760 gcc_assert (target_part);
1763 delete_insns_since (last);
1767 if (result_part != target_part)
1768 emit_move_insn (target_part, result_part);
1773 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1774 need to be zero'd out. */
1775 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1777 unsigned int i, total_words;
1779 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1780 for (i = nwords; i < total_words; i++)
1782 (operand_subword (target,
1783 backwards ? total_words - i - 1 : i,
1790 /* Signed bit field: sign-extend with two arithmetic shifts. */
1791 target = expand_shift (LSHIFT_EXPR, mode, target,
1792 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1793 return expand_shift (RSHIFT_EXPR, mode, target,
1794 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1797 /* If OP0 is a multi-word register, narrow it to the affected word.
1798 If the region spans two words, defer to extract_split_bit_field. */
1799 if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1801 op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1802 bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1803 bitnum %= BITS_PER_WORD;
1804 if (bitnum + bitsize > BITS_PER_WORD)
1808 target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1810 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1814 /* From here on we know the desired field is smaller than a word.
1815 If OP0 is a register, it too fits within a word. */
1816 enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1817 extraction_insn extv;
1820 /* ??? We could limit the structure size to the part of OP0 that
1821 contains the field, with appropriate checks for endianness
1822 and TRULY_NOOP_TRUNCATION. */
1823 && get_best_reg_extraction_insn (&extv, pattern,
1824 GET_MODE_BITSIZE (GET_MODE (op0)),
1827 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1828 unsignedp, target, mode,
1834 /* If OP0 is a memory, try copying it to a register and seeing if a
1835 cheap register alternative is available. */
1836 if (MEM_P (op0) & !reverse)
1838 if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1841 rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1849 rtx_insn *last = get_last_insn ();
1851 /* Try loading part of OP0 into a register and extracting the
1852 bitfield from that. */
1853 unsigned HOST_WIDE_INT bitpos;
1854 rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1855 0, 0, tmode, &bitpos);
1858 xop0 = copy_to_reg (xop0);
1859 rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1861 mode, tmode, reverse, false);
1864 delete_insns_since (last);
1871 /* Find a correspondingly-sized integer field, so we can apply
1872 shifts and masks to it. */
1873 int_mode = int_mode_for_mode (tmode);
1874 if (int_mode == BLKmode)
1875 int_mode = int_mode_for_mode (mode);
1876 /* Should probably push op0 out to memory and then do a load. */
1877 gcc_assert (int_mode != BLKmode);
1879 target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1880 unsignedp, reverse);
1882 /* Complex values must be reversed piecewise, so we need to undo the global
1883 reversal, convert to the complex mode and reverse again. */
1884 if (reverse && COMPLEX_MODE_P (tmode))
1886 target = flip_storage_order (int_mode, target);
1887 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1888 target = flip_storage_order (tmode, target);
1891 target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1896 /* Generate code to extract a byte-field from STR_RTX
1897 containing BITSIZE bits, starting at BITNUM,
1898 and put it in TARGET if possible (if TARGET is nonzero).
1899 Regardless of TARGET, we return the rtx for where the value is placed.
1901 STR_RTX is the structure containing the byte (a REG or MEM).
1902 UNSIGNEDP is nonzero if this is an unsigned bit field.
1903 MODE is the natural mode of the field value once extracted.
1904 TMODE is the mode the caller would like the value to have;
1905 but the value may be returned with type MODE instead.
1907 If REVERSE is true, the extraction is to be done in reverse order.
1909 If a TARGET is specified and we can store in it at no extra cost,
1910 we do so, and return TARGET.
1911 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1912 if they are equally easy. */
1915 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1916 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1917 machine_mode mode, machine_mode tmode, bool reverse)
1921 /* Handle -fstrict-volatile-bitfields in the cases where it applies. */
1922 if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1923 mode1 = GET_MODE (str_rtx);
1924 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1925 mode1 = GET_MODE (target);
1929 if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1931 /* Extraction of a full MODE1 value can be done with a simple load.
1932 We know here that the field can be accessed with one single
1933 instruction. For targets that support unaligned memory,
1934 an unaligned access may be necessary. */
1935 if (bitsize == GET_MODE_BITSIZE (mode1))
1937 rtx result = adjust_bitfield_address (str_rtx, mode1,
1938 bitnum / BITS_PER_UNIT);
1940 result = flip_storage_order (mode1, result);
1941 gcc_assert (bitnum % BITS_PER_UNIT == 0);
1942 return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1945 str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1947 gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1948 str_rtx = copy_to_reg (str_rtx);
1951 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1952 target, mode, tmode, reverse, true);
1955 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1956 from bit BITNUM of OP0.
1958 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1959 If REVERSE is true, the extraction is to be done in reverse order.
1961 If TARGET is nonzero, attempts to store the value there
1962 and return TARGET, but this is not guaranteed.
1963 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1966 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1967 unsigned HOST_WIDE_INT bitsize,
1968 unsigned HOST_WIDE_INT bitnum, rtx target,
1969 int unsignedp, bool reverse)
1974 = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1975 MEM_VOLATILE_P (op0));
1977 if (mode == VOIDmode)
1978 /* The only way this should occur is if the field spans word
1980 return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1983 op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1986 return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1987 target, unsignedp, reverse);
1990 /* Helper function for extract_fixed_bit_field, extracts
1991 the bit field always using the MODE of OP0. */
1994 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1995 unsigned HOST_WIDE_INT bitsize,
1996 unsigned HOST_WIDE_INT bitnum, rtx target,
1997 int unsignedp, bool reverse)
1999 machine_mode mode = GET_MODE (op0);
2000 gcc_assert (SCALAR_INT_MODE_P (mode));
2002 /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2003 for invalid input, such as extract equivalent of f5 from
2004 gcc.dg/pr48335-2.c. */
2006 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2007 /* BITNUM is the distance between our msb and that of OP0.
2008 Convert it to the distance from the lsb. */
2009 bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2011 /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2012 We have reduced the big-endian case to the little-endian case. */
2014 op0 = flip_storage_order (mode, op0);
2020 /* If the field does not already start at the lsb,
2021 shift it so it does. */
2022 /* Maybe propagate the target for the shift. */
2023 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2026 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2028 /* Convert the value to the desired mode. */
2030 op0 = convert_to_mode (tmode, op0, 1);
2032 /* Unless the msb of the field used to be the msb when we shifted,
2033 mask out the upper bits. */
2035 if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2036 return expand_binop (GET_MODE (op0), and_optab, op0,
2037 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2038 target, 1, OPTAB_LIB_WIDEN);
2042 /* To extract a signed bit-field, first shift its msb to the msb of the word,
2043 then arithmetic-shift its lsb to the lsb of the word. */
2044 op0 = force_reg (mode, op0);
2046 /* Find the narrowest integer mode that contains the field. */
2048 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2049 mode = GET_MODE_WIDER_MODE (mode))
2050 if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2052 op0 = convert_to_mode (mode, op0, 0);
2059 if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2061 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2062 /* Maybe propagate the target for the shift. */
2063 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2064 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2067 return expand_shift (RSHIFT_EXPR, mode, op0,
2068 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2071 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2075 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2078 return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2081 /* Extract a bit field that is split across two words
2082 and return an RTX for the result.
2084 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2085 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2086 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2088 If REVERSE is true, the extraction is to be done in reverse order. */
2091 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2092 unsigned HOST_WIDE_INT bitpos, int unsignedp,
2096 unsigned int bitsdone = 0;
2097 rtx result = NULL_RTX;
2100 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2102 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2103 unit = BITS_PER_WORD;
2105 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2107 while (bitsdone < bitsize)
2109 unsigned HOST_WIDE_INT thissize;
2111 unsigned HOST_WIDE_INT thispos;
2112 unsigned HOST_WIDE_INT offset;
2114 offset = (bitpos + bitsdone) / unit;
2115 thispos = (bitpos + bitsdone) % unit;
2117 /* THISSIZE must not overrun a word boundary. Otherwise,
2118 extract_fixed_bit_field will call us again, and we will mutually
2120 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2121 thissize = MIN (thissize, unit - thispos);
2123 /* If OP0 is a register, then handle OFFSET here.
2125 When handling multiword bitfields, extract_bit_field may pass
2126 down a word_mode SUBREG of a larger REG for a bitfield that actually
2127 crosses a word boundary. Thus, for a SUBREG, we must find
2128 the current word starting from the base register. */
2129 if (GET_CODE (op0) == SUBREG)
2131 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2132 word = operand_subword_force (SUBREG_REG (op0), word_offset,
2133 GET_MODE (SUBREG_REG (op0)));
2136 else if (REG_P (op0))
2138 word = operand_subword_force (op0, offset, GET_MODE (op0));
2144 /* Extract the parts in bit-counting order,
2145 whose meaning is determined by BYTES_PER_UNIT.
2146 OFFSET is in UNITs, and UNIT is in bits. */
2147 part = extract_fixed_bit_field (word_mode, word, thissize,
2148 offset * unit + thispos, 0, 1, reverse);
2149 bitsdone += thissize;
2151 /* Shift this part into place for the result. */
2152 if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2154 if (bitsize != bitsdone)
2155 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2156 bitsize - bitsdone, 0, 1);
2160 if (bitsdone != thissize)
2161 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2162 bitsdone - thissize, 0, 1);
2168 /* Combine the parts with bitwise or. This works
2169 because we extracted each part as an unsigned bit field. */
2170 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2176 /* Unsigned bit field: we are done. */
2179 /* Signed bit field: sign-extend with two arithmetic shifts. */
2180 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2181 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2182 return expand_shift (RSHIFT_EXPR, word_mode, result,
2183 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2186 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2187 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2188 MODE, fill the upper bits with zeros. Fail if the layout of either
2189 mode is unknown (as for CC modes) or if the extraction would involve
2190 unprofitable mode punning. Return the value on success, otherwise
2193 This is different from gen_lowpart* in these respects:
2195 - the returned value must always be considered an rvalue
2197 - when MODE is wider than SRC_MODE, the extraction involves
2200 - when MODE is smaller than SRC_MODE, the extraction involves
2201 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2203 In other words, this routine performs a computation, whereas the
2204 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2208 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2210 machine_mode int_mode, src_int_mode;
2212 if (mode == src_mode)
2215 if (CONSTANT_P (src))
2217 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2218 fails, it will happily create (subreg (symbol_ref)) or similar
2220 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2221 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2225 if (GET_MODE (src) == VOIDmode
2226 || !validate_subreg (mode, src_mode, src, byte))
2229 src = force_reg (GET_MODE (src), src);
2230 return gen_rtx_SUBREG (mode, src, byte);
2233 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2236 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2237 && MODES_TIEABLE_P (mode, src_mode))
2239 rtx x = gen_lowpart_common (mode, src);
2244 src_int_mode = int_mode_for_mode (src_mode);
2245 int_mode = int_mode_for_mode (mode);
2246 if (src_int_mode == BLKmode || int_mode == BLKmode)
2249 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2251 if (!MODES_TIEABLE_P (int_mode, mode))
2254 src = gen_lowpart (src_int_mode, src);
2255 src = convert_modes (int_mode, src_int_mode, src, true);
2256 src = gen_lowpart (mode, src);
2260 /* Add INC into TARGET. */
2263 expand_inc (rtx target, rtx inc)
2265 rtx value = expand_binop (GET_MODE (target), add_optab,
2267 target, 0, OPTAB_LIB_WIDEN);
2268 if (value != target)
2269 emit_move_insn (target, value);
2272 /* Subtract DEC from TARGET. */
2275 expand_dec (rtx target, rtx dec)
2277 rtx value = expand_binop (GET_MODE (target), sub_optab,
2279 target, 0, OPTAB_LIB_WIDEN);
2280 if (value != target)
2281 emit_move_insn (target, value);
2284 /* Output a shift instruction for expression code CODE,
2285 with SHIFTED being the rtx for the value to shift,
2286 and AMOUNT the rtx for the amount to shift by.
2287 Store the result in the rtx TARGET, if that is convenient.
2288 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2289 Return the rtx for where the value is. */
2292 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2293 rtx amount, rtx target, int unsignedp)
2296 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2297 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2298 optab lshift_optab = ashl_optab;
2299 optab rshift_arith_optab = ashr_optab;
2300 optab rshift_uns_optab = lshr_optab;
2301 optab lrotate_optab = rotl_optab;
2302 optab rrotate_optab = rotr_optab;
2303 machine_mode op1_mode;
2304 machine_mode scalar_mode = mode;
2306 bool speed = optimize_insn_for_speed_p ();
2308 if (VECTOR_MODE_P (mode))
2309 scalar_mode = GET_MODE_INNER (mode);
2311 op1_mode = GET_MODE (op1);
2313 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2314 shift amount is a vector, use the vector/vector shift patterns. */
2315 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2317 lshift_optab = vashl_optab;
2318 rshift_arith_optab = vashr_optab;
2319 rshift_uns_optab = vlshr_optab;
2320 lrotate_optab = vrotl_optab;
2321 rrotate_optab = vrotr_optab;
2324 /* Previously detected shift-counts computed by NEGATE_EXPR
2325 and shifted in the other direction; but that does not work
2328 if (SHIFT_COUNT_TRUNCATED)
2330 if (CONST_INT_P (op1)
2331 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2332 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2333 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2334 % GET_MODE_BITSIZE (scalar_mode));
2335 else if (GET_CODE (op1) == SUBREG
2336 && subreg_lowpart_p (op1)
2337 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2338 && SCALAR_INT_MODE_P (GET_MODE (op1)))
2339 op1 = SUBREG_REG (op1);
2342 /* Canonicalize rotates by constant amount. If op1 is bitsize / 2,
2343 prefer left rotation, if op1 is from bitsize / 2 + 1 to
2344 bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2347 && CONST_INT_P (op1)
2348 && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2349 GET_MODE_BITSIZE (scalar_mode) - 1))
2351 op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2353 code = left ? LROTATE_EXPR : RROTATE_EXPR;
2356 /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2357 Note that this is not the case for bigger values. For instance a rotation
2358 of 0x01020304 by 16 bits gives 0x03040102 which is different from
2359 0x04030201 (bswapsi). */
2361 && CONST_INT_P (op1)
2362 && INTVAL (op1) == BITS_PER_UNIT
2363 && GET_MODE_SIZE (scalar_mode) == 2
2364 && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2365 return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2368 if (op1 == const0_rtx)
2371 /* Check whether its cheaper to implement a left shift by a constant
2372 bit count by a sequence of additions. */
2373 if (code == LSHIFT_EXPR
2374 && CONST_INT_P (op1)
2376 && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2377 && INTVAL (op1) < MAX_BITS_PER_WORD
2378 && (shift_cost (speed, mode, INTVAL (op1))
2379 > INTVAL (op1) * add_cost (speed, mode))
2380 && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2383 for (i = 0; i < INTVAL (op1); i++)
2385 temp = force_reg (mode, shifted);
2386 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2387 unsignedp, OPTAB_LIB_WIDEN);
2392 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2394 enum optab_methods methods;
2397 methods = OPTAB_DIRECT;
2398 else if (attempt == 1)
2399 methods = OPTAB_WIDEN;
2401 methods = OPTAB_LIB_WIDEN;
2405 /* Widening does not work for rotation. */
2406 if (methods == OPTAB_WIDEN)
2408 else if (methods == OPTAB_LIB_WIDEN)
2410 /* If we have been unable to open-code this by a rotation,
2411 do it as the IOR of two shifts. I.e., to rotate A
2413 (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2414 where C is the bitsize of A.
2416 It is theoretically possible that the target machine might
2417 not be able to perform either shift and hence we would
2418 be making two libcalls rather than just the one for the
2419 shift (similarly if IOR could not be done). We will allow
2420 this extremely unlikely lossage to avoid complicating the
2423 rtx subtarget = target == shifted ? 0 : target;
2424 rtx new_amount, other_amount;
2428 if (op1 == const0_rtx)
2430 else if (CONST_INT_P (op1))
2431 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2436 = simplify_gen_unary (NEG, GET_MODE (op1),
2437 op1, GET_MODE (op1));
2438 HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2440 = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2441 gen_int_mode (mask, GET_MODE (op1)));
2444 shifted = force_reg (mode, shifted);
2446 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2447 mode, shifted, new_amount, 0, 1);
2448 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2449 mode, shifted, other_amount,
2451 return expand_binop (mode, ior_optab, temp, temp1, target,
2452 unsignedp, methods);
2455 temp = expand_binop (mode,
2456 left ? lrotate_optab : rrotate_optab,
2457 shifted, op1, target, unsignedp, methods);
2460 temp = expand_binop (mode,
2461 left ? lshift_optab : rshift_uns_optab,
2462 shifted, op1, target, unsignedp, methods);
2464 /* Do arithmetic shifts.
2465 Also, if we are going to widen the operand, we can just as well
2466 use an arithmetic right-shift instead of a logical one. */
2467 if (temp == 0 && ! rotate
2468 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2470 enum optab_methods methods1 = methods;
2472 /* If trying to widen a log shift to an arithmetic shift,
2473 don't accept an arithmetic shift of the same size. */
2475 methods1 = OPTAB_MUST_WIDEN;
2477 /* Arithmetic shift */
2479 temp = expand_binop (mode,
2480 left ? lshift_optab : rshift_arith_optab,
2481 shifted, op1, target, unsignedp, methods1);
2484 /* We used to try extzv here for logical right shifts, but that was
2485 only useful for one machine, the VAX, and caused poor code
2486 generation there for lshrdi3, so the code was deleted and a
2487 define_expand for lshrsi3 was added to vax.md. */
2494 /* Output a shift instruction for expression code CODE,
2495 with SHIFTED being the rtx for the value to shift,
2496 and AMOUNT the amount to shift by.
2497 Store the result in the rtx TARGET, if that is convenient.
2498 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2499 Return the rtx for where the value is. */
2502 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2503 int amount, rtx target, int unsignedp)
2505 return expand_shift_1 (code, mode,
2506 shifted, GEN_INT (amount), target, unsignedp);
2509 /* Output a shift instruction for expression code CODE,
2510 with SHIFTED being the rtx for the value to shift,
2511 and AMOUNT the tree for the amount to shift by.
2512 Store the result in the rtx TARGET, if that is convenient.
2513 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2514 Return the rtx for where the value is. */
2517 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2518 tree amount, rtx target, int unsignedp)
2520 return expand_shift_1 (code, mode,
2521 shifted, expand_normal (amount), target, unsignedp);
2525 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2526 const struct mult_cost *, machine_mode mode);
2527 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2528 const struct algorithm *, enum mult_variant);
2529 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2530 static rtx extract_high_half (machine_mode, rtx);
2531 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2532 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2534 /* Compute and return the best algorithm for multiplying by T.
2535 The algorithm must cost less than cost_limit
2536 If retval.cost >= COST_LIMIT, no algorithm was found and all
2537 other field of the returned struct are undefined.
2538 MODE is the machine mode of the multiplication. */
2541 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2542 const struct mult_cost *cost_limit, machine_mode mode)
2545 struct algorithm *alg_in, *best_alg;
2546 struct mult_cost best_cost;
2547 struct mult_cost new_limit;
2548 int op_cost, op_latency;
2549 unsigned HOST_WIDE_INT orig_t = t;
2550 unsigned HOST_WIDE_INT q;
2551 int maxm, hash_index;
2552 bool cache_hit = false;
2553 enum alg_code cache_alg = alg_zero;
2554 bool speed = optimize_insn_for_speed_p ();
2556 struct alg_hash_entry *entry_ptr;
2558 /* Indicate that no algorithm is yet found. If no algorithm
2559 is found, this value will be returned and indicate failure. */
2560 alg_out->cost.cost = cost_limit->cost + 1;
2561 alg_out->cost.latency = cost_limit->latency + 1;
2563 if (cost_limit->cost < 0
2564 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2567 /* Be prepared for vector modes. */
2568 imode = GET_MODE_INNER (mode);
2570 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2572 /* Restrict the bits of "t" to the multiplication's mode. */
2573 t &= GET_MODE_MASK (imode);
2575 /* t == 1 can be done in zero cost. */
2579 alg_out->cost.cost = 0;
2580 alg_out->cost.latency = 0;
2581 alg_out->op[0] = alg_m;
2585 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2589 if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2594 alg_out->cost.cost = zero_cost (speed);
2595 alg_out->cost.latency = zero_cost (speed);
2596 alg_out->op[0] = alg_zero;
2601 /* We'll be needing a couple extra algorithm structures now. */
2603 alg_in = XALLOCA (struct algorithm);
2604 best_alg = XALLOCA (struct algorithm);
2605 best_cost = *cost_limit;
2607 /* Compute the hash index. */
2608 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2610 /* See if we already know what to do for T. */
2611 entry_ptr = alg_hash_entry_ptr (hash_index);
2612 if (entry_ptr->t == t
2613 && entry_ptr->mode == mode
2614 && entry_ptr->mode == mode
2615 && entry_ptr->speed == speed
2616 && entry_ptr->alg != alg_unknown)
2618 cache_alg = entry_ptr->alg;
2620 if (cache_alg == alg_impossible)
2622 /* The cache tells us that it's impossible to synthesize
2623 multiplication by T within entry_ptr->cost. */
2624 if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2625 /* COST_LIMIT is at least as restrictive as the one
2626 recorded in the hash table, in which case we have no
2627 hope of synthesizing a multiplication. Just
2631 /* If we get here, COST_LIMIT is less restrictive than the
2632 one recorded in the hash table, so we may be able to
2633 synthesize a multiplication. Proceed as if we didn't
2634 have the cache entry. */
2638 if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2639 /* The cached algorithm shows that this multiplication
2640 requires more cost than COST_LIMIT. Just return. This
2641 way, we don't clobber this cache entry with
2642 alg_impossible but retain useful information. */
2654 goto do_alg_addsub_t_m2;
2656 case alg_add_factor:
2657 case alg_sub_factor:
2658 goto do_alg_addsub_factor;
2661 goto do_alg_add_t2_m;
2664 goto do_alg_sub_t2_m;
2672 /* If we have a group of zero bits at the low-order part of T, try
2673 multiplying by the remaining bits and then doing a shift. */
2678 m = floor_log2 (t & -t); /* m = number of low zero bits */
2682 /* The function expand_shift will choose between a shift and
2683 a sequence of additions, so the observed cost is given as
2684 MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)). */
2685 op_cost = m * add_cost (speed, mode);
2686 if (shift_cost (speed, mode, m) < op_cost)
2687 op_cost = shift_cost (speed, mode, m);
2688 new_limit.cost = best_cost.cost - op_cost;
2689 new_limit.latency = best_cost.latency - op_cost;
2690 synth_mult (alg_in, q, &new_limit, mode);
2692 alg_in->cost.cost += op_cost;
2693 alg_in->cost.latency += op_cost;
2694 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2696 best_cost = alg_in->cost;
2697 std::swap (alg_in, best_alg);
2698 best_alg->log[best_alg->ops] = m;
2699 best_alg->op[best_alg->ops] = alg_shift;
2702 /* See if treating ORIG_T as a signed number yields a better
2703 sequence. Try this sequence only for a negative ORIG_T
2704 as it would be useless for a non-negative ORIG_T. */
2705 if ((HOST_WIDE_INT) orig_t < 0)
2707 /* Shift ORIG_T as follows because a right shift of a
2708 negative-valued signed type is implementation
2710 q = ~(~orig_t >> m);
2711 /* The function expand_shift will choose between a shift
2712 and a sequence of additions, so the observed cost is
2713 given as MIN (m * add_cost(speed, mode),
2714 shift_cost(speed, mode, m)). */
2715 op_cost = m * add_cost (speed, mode);
2716 if (shift_cost (speed, mode, m) < op_cost)
2717 op_cost = shift_cost (speed, mode, m);
2718 new_limit.cost = best_cost.cost - op_cost;
2719 new_limit.latency = best_cost.latency - op_cost;
2720 synth_mult (alg_in, q, &new_limit, mode);
2722 alg_in->cost.cost += op_cost;
2723 alg_in->cost.latency += op_cost;
2724 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2726 best_cost = alg_in->cost;
2727 std::swap (alg_in, best_alg);
2728 best_alg->log[best_alg->ops] = m;
2729 best_alg->op[best_alg->ops] = alg_shift;
2737 /* If we have an odd number, add or subtract one. */
2740 unsigned HOST_WIDE_INT w;
2743 for (w = 1; (w & t) != 0; w <<= 1)
2745 /* If T was -1, then W will be zero after the loop. This is another
2746 case where T ends with ...111. Handling this with (T + 1) and
2747 subtract 1 produces slightly better code and results in algorithm
2748 selection much faster than treating it like the ...0111 case
2752 /* Reject the case where t is 3.
2753 Thus we prefer addition in that case. */
2756 /* T ends with ...111. Multiply by (T + 1) and subtract T. */
2758 op_cost = add_cost (speed, mode);
2759 new_limit.cost = best_cost.cost - op_cost;
2760 new_limit.latency = best_cost.latency - op_cost;
2761 synth_mult (alg_in, t + 1, &new_limit, mode);
2763 alg_in->cost.cost += op_cost;
2764 alg_in->cost.latency += op_cost;
2765 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2767 best_cost = alg_in->cost;
2768 std::swap (alg_in, best_alg);
2769 best_alg->log[best_alg->ops] = 0;
2770 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2775 /* T ends with ...01 or ...011. Multiply by (T - 1) and add T. */
2777 op_cost = add_cost (speed, mode);
2778 new_limit.cost = best_cost.cost - op_cost;
2779 new_limit.latency = best_cost.latency - op_cost;
2780 synth_mult (alg_in, t - 1, &new_limit, mode);
2782 alg_in->cost.cost += op_cost;
2783 alg_in->cost.latency += op_cost;
2784 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2786 best_cost = alg_in->cost;
2787 std::swap (alg_in, best_alg);
2788 best_alg->log[best_alg->ops] = 0;
2789 best_alg->op[best_alg->ops] = alg_add_t_m2;
2793 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2794 quickly with a - a * n for some appropriate constant n. */
2795 m = exact_log2 (-orig_t + 1);
2796 if (m >= 0 && m < maxm)
2798 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2799 /* If the target has a cheap shift-and-subtract insn use
2800 that in preference to a shift insn followed by a sub insn.
2801 Assume that the shift-and-sub is "atomic" with a latency
2802 equal to it's cost, otherwise assume that on superscalar
2803 hardware the shift may be executed concurrently with the
2804 earlier steps in the algorithm. */
2805 if (shiftsub1_cost (speed, mode, m) <= op_cost)
2807 op_cost = shiftsub1_cost (speed, mode, m);
2808 op_latency = op_cost;
2811 op_latency = add_cost (speed, mode);
2813 new_limit.cost = best_cost.cost - op_cost;
2814 new_limit.latency = best_cost.latency - op_latency;
2815 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2818 alg_in->cost.cost += op_cost;
2819 alg_in->cost.latency += op_latency;
2820 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2822 best_cost = alg_in->cost;
2823 std::swap (alg_in, best_alg);
2824 best_alg->log[best_alg->ops] = m;
2825 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2833 /* Look for factors of t of the form
2834 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2835 If we find such a factor, we can multiply by t using an algorithm that
2836 multiplies by q, shift the result by m and add/subtract it to itself.
2838 We search for large factors first and loop down, even if large factors
2839 are less probable than small; if we find a large factor we will find a
2840 good sequence quickly, and therefore be able to prune (by decreasing
2841 COST_LIMIT) the search. */
2843 do_alg_addsub_factor:
2844 for (m = floor_log2 (t - 1); m >= 2; m--)
2846 unsigned HOST_WIDE_INT d;
2848 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2849 if (t % d == 0 && t > d && m < maxm
2850 && (!cache_hit || cache_alg == alg_add_factor))
2852 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2853 if (shiftadd_cost (speed, mode, m) <= op_cost)
2854 op_cost = shiftadd_cost (speed, mode, m);
2856 op_latency = op_cost;
2859 new_limit.cost = best_cost.cost - op_cost;
2860 new_limit.latency = best_cost.latency - op_latency;
2861 synth_mult (alg_in, t / d, &new_limit, mode);
2863 alg_in->cost.cost += op_cost;
2864 alg_in->cost.latency += op_latency;
2865 if (alg_in->cost.latency < op_cost)
2866 alg_in->cost.latency = op_cost;
2867 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2869 best_cost = alg_in->cost;
2870 std::swap (alg_in, best_alg);
2871 best_alg->log[best_alg->ops] = m;
2872 best_alg->op[best_alg->ops] = alg_add_factor;
2874 /* Other factors will have been taken care of in the recursion. */
2878 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2879 if (t % d == 0 && t > d && m < maxm
2880 && (!cache_hit || cache_alg == alg_sub_factor))
2882 op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2883 if (shiftsub0_cost (speed, mode, m) <= op_cost)
2884 op_cost = shiftsub0_cost (speed, mode, m);
2886 op_latency = op_cost;
2888 new_limit.cost = best_cost.cost - op_cost;
2889 new_limit.latency = best_cost.latency - op_latency;
2890 synth_mult (alg_in, t / d, &new_limit, mode);
2892 alg_in->cost.cost += op_cost;
2893 alg_in->cost.latency += op_latency;
2894 if (alg_in->cost.latency < op_cost)
2895 alg_in->cost.latency = op_cost;
2896 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2898 best_cost = alg_in->cost;
2899 std::swap (alg_in, best_alg);
2900 best_alg->log[best_alg->ops] = m;
2901 best_alg->op[best_alg->ops] = alg_sub_factor;
2909 /* Try shift-and-add (load effective address) instructions,
2910 i.e. do a*3, a*5, a*9. */
2917 if (m >= 0 && m < maxm)
2919 op_cost = shiftadd_cost (speed, mode, m);
2920 new_limit.cost = best_cost.cost - op_cost;
2921 new_limit.latency = best_cost.latency - op_cost;
2922 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2924 alg_in->cost.cost += op_cost;
2925 alg_in->cost.latency += op_cost;
2926 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2928 best_cost = alg_in->cost;
2929 std::swap (alg_in, best_alg);
2930 best_alg->log[best_alg->ops] = m;
2931 best_alg->op[best_alg->ops] = alg_add_t2_m;
2941 if (m >= 0 && m < maxm)
2943 op_cost = shiftsub0_cost (speed, mode, m);
2944 new_limit.cost = best_cost.cost - op_cost;
2945 new_limit.latency = best_cost.latency - op_cost;
2946 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2948 alg_in->cost.cost += op_cost;
2949 alg_in->cost.latency += op_cost;
2950 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2952 best_cost = alg_in->cost;
2953 std::swap (alg_in, best_alg);
2954 best_alg->log[best_alg->ops] = m;
2955 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2963 /* If best_cost has not decreased, we have not found any algorithm. */
2964 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2966 /* We failed to find an algorithm. Record alg_impossible for
2967 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2968 we are asked to find an algorithm for T within the same or
2969 lower COST_LIMIT, we can immediately return to the
2972 entry_ptr->mode = mode;
2973 entry_ptr->speed = speed;
2974 entry_ptr->alg = alg_impossible;
2975 entry_ptr->cost = *cost_limit;
2979 /* Cache the result. */
2983 entry_ptr->mode = mode;
2984 entry_ptr->speed = speed;
2985 entry_ptr->alg = best_alg->op[best_alg->ops];
2986 entry_ptr->cost.cost = best_cost.cost;
2987 entry_ptr->cost.latency = best_cost.latency;
2990 /* If we are getting a too long sequence for `struct algorithm'
2991 to record, make this search fail. */
2992 if (best_alg->ops == MAX_BITS_PER_WORD)
2995 /* Copy the algorithm from temporary space to the space at alg_out.
2996 We avoid using structure assignment because the majority of
2997 best_alg is normally undefined, and this is a critical function. */
2998 alg_out->ops = best_alg->ops + 1;
2999 alg_out->cost = best_cost;
3000 memcpy (alg_out->op, best_alg->op,
3001 alg_out->ops * sizeof *alg_out->op);
3002 memcpy (alg_out->log, best_alg->log,
3003 alg_out->ops * sizeof *alg_out->log);
3006 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3007 Try three variations:
3009 - a shift/add sequence based on VAL itself
3010 - a shift/add sequence based on -VAL, followed by a negation
3011 - a shift/add sequence based on VAL - 1, followed by an addition.
3013 Return true if the cheapest of these cost less than MULT_COST,
3014 describing the algorithm in *ALG and final fixup in *VARIANT. */
3017 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3018 struct algorithm *alg, enum mult_variant *variant,
3021 struct algorithm alg2;
3022 struct mult_cost limit;
3024 bool speed = optimize_insn_for_speed_p ();
3026 /* Fail quickly for impossible bounds. */
3030 /* Ensure that mult_cost provides a reasonable upper bound.
3031 Any constant multiplication can be performed with less
3032 than 2 * bits additions. */
3033 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3034 if (mult_cost > op_cost)
3035 mult_cost = op_cost;
3037 *variant = basic_variant;
3038 limit.cost = mult_cost;
3039 limit.latency = mult_cost;
3040 synth_mult (alg, val, &limit, mode);
3042 /* This works only if the inverted value actually fits in an
3044 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3046 op_cost = neg_cost (speed, mode);
3047 if (MULT_COST_LESS (&alg->cost, mult_cost))
3049 limit.cost = alg->cost.cost - op_cost;
3050 limit.latency = alg->cost.latency - op_cost;
3054 limit.cost = mult_cost - op_cost;
3055 limit.latency = mult_cost - op_cost;
3058 synth_mult (&alg2, -val, &limit, mode);
3059 alg2.cost.cost += op_cost;
3060 alg2.cost.latency += op_cost;
3061 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3062 *alg = alg2, *variant = negate_variant;
3065 /* This proves very useful for division-by-constant. */
3066 op_cost = add_cost (speed, mode);
3067 if (MULT_COST_LESS (&alg->cost, mult_cost))
3069 limit.cost = alg->cost.cost - op_cost;
3070 limit.latency = alg->cost.latency - op_cost;
3074 limit.cost = mult_cost - op_cost;
3075 limit.latency = mult_cost - op_cost;
3078 synth_mult (&alg2, val - 1, &limit, mode);
3079 alg2.cost.cost += op_cost;
3080 alg2.cost.latency += op_cost;
3081 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3082 *alg = alg2, *variant = add_variant;
3084 return MULT_COST_LESS (&alg->cost, mult_cost);
3087 /* A subroutine of expand_mult, used for constant multiplications.
3088 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3089 convenient. Use the shift/add sequence described by ALG and apply
3090 the final fixup specified by VARIANT. */
3093 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3094 rtx target, const struct algorithm *alg,
3095 enum mult_variant variant)
3097 HOST_WIDE_INT val_so_far;
3103 /* Avoid referencing memory over and over and invalid sharing
3105 op0 = force_reg (mode, op0);
3107 /* ACCUM starts out either as OP0 or as a zero, depending on
3108 the first operation. */
3110 if (alg->op[0] == alg_zero)
3112 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3115 else if (alg->op[0] == alg_m)
3117 accum = copy_to_mode_reg (mode, op0);
3123 for (opno = 1; opno < alg->ops; opno++)
3125 int log = alg->log[opno];
3126 rtx shift_subtarget = optimize ? 0 : accum;
3128 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3131 rtx accum_target = optimize ? 0 : accum;
3134 switch (alg->op[opno])
3137 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3138 /* REG_EQUAL note will be attached to the following insn. */
3139 emit_move_insn (accum, tem);
3144 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3145 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3146 add_target ? add_target : accum_target);
3147 val_so_far += (HOST_WIDE_INT) 1 << log;
3151 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3152 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3153 add_target ? add_target : accum_target);
3154 val_so_far -= (HOST_WIDE_INT) 1 << log;
3158 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3159 log, shift_subtarget, 0);
3160 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3161 add_target ? add_target : accum_target);
3162 val_so_far = (val_so_far << log) + 1;
3166 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3167 log, shift_subtarget, 0);
3168 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3169 add_target ? add_target : accum_target);
3170 val_so_far = (val_so_far << log) - 1;
3173 case alg_add_factor:
3174 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3175 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3176 add_target ? add_target : accum_target);
3177 val_so_far += val_so_far << log;
3180 case alg_sub_factor:
3181 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3182 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3184 ? add_target : (optimize ? 0 : tem)));
3185 val_so_far = (val_so_far << log) - val_so_far;
3192 if (SCALAR_INT_MODE_P (mode))
3194 /* Write a REG_EQUAL note on the last insn so that we can cse
3195 multiplication sequences. Note that if ACCUM is a SUBREG,
3196 we've set the inner register and must properly indicate that. */
3197 tem = op0, nmode = mode;
3198 accum_inner = accum;
3199 if (GET_CODE (accum) == SUBREG)
3201 accum_inner = SUBREG_REG (accum);
3202 nmode = GET_MODE (accum_inner);
3203 tem = gen_lowpart (nmode, op0);
3206 insn = get_last_insn ();
3207 set_dst_reg_note (insn, REG_EQUAL,
3208 gen_rtx_MULT (nmode, tem,
3209 gen_int_mode (val_so_far, nmode)),
3214 if (variant == negate_variant)
3216 val_so_far = -val_so_far;
3217 accum = expand_unop (mode, neg_optab, accum, target, 0);
3219 else if (variant == add_variant)
3221 val_so_far = val_so_far + 1;
3222 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3225 /* Compare only the bits of val and val_so_far that are significant
3226 in the result mode, to avoid sign-/zero-extension confusion. */
3227 nmode = GET_MODE_INNER (mode);
3228 val &= GET_MODE_MASK (nmode);
3229 val_so_far &= GET_MODE_MASK (nmode);
3230 gcc_assert (val == val_so_far);
3235 /* Perform a multiplication and return an rtx for the result.
3236 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3237 TARGET is a suggestion for where to store the result (an rtx).
3239 We check specially for a constant integer as OP1.
3240 If you want this check for OP0 as well, then before calling
3241 you should swap the two operands if OP0 would be constant. */
3244 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3247 enum mult_variant variant;
3248 struct algorithm algorithm;
3251 bool speed = optimize_insn_for_speed_p ();
3252 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3254 if (CONSTANT_P (op0))
3255 std::swap (op0, op1);
3257 /* For vectors, there are several simplifications that can be made if
3258 all elements of the vector constant are identical. */
3259 scalar_op1 = unwrap_const_vec_duplicate (op1);
3261 if (INTEGRAL_MODE_P (mode))
3264 HOST_WIDE_INT coeff;
3268 if (op1 == CONST0_RTX (mode))
3270 if (op1 == CONST1_RTX (mode))
3272 if (op1 == CONSTM1_RTX (mode))
3273 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3279 /* If mode is integer vector mode, check if the backend supports
3280 vector lshift (by scalar or vector) at all. If not, we can't use
3281 synthetized multiply. */
3282 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3283 && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3284 && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3287 /* These are the operations that are potentially turned into
3288 a sequence of shifts and additions. */
3289 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3291 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3292 less than or equal in size to `unsigned int' this doesn't matter.
3293 If the mode is larger than `unsigned int', then synth_mult works
3294 only if the constant value exactly fits in an `unsigned int' without
3295 any truncation. This means that multiplying by negative values does
3296 not work; results are off by 2^32 on a 32 bit machine. */
3297 if (CONST_INT_P (scalar_op1))
3299 coeff = INTVAL (scalar_op1);
3302 #if TARGET_SUPPORTS_WIDE_INT
3303 else if (CONST_WIDE_INT_P (scalar_op1))
3305 else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3308 int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3309 /* Perfect power of 2 (other than 1, which is handled above). */
3311 return expand_shift (LSHIFT_EXPR, mode, op0,
3312 shift, target, unsignedp);
3319 /* We used to test optimize here, on the grounds that it's better to
3320 produce a smaller program when -O is not used. But this causes
3321 such a terrible slowdown sometimes that it seems better to always
3324 /* Special case powers of two. */
3325 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3326 && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3327 return expand_shift (LSHIFT_EXPR, mode, op0,
3328 floor_log2 (coeff), target, unsignedp);
3330 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3332 /* Attempt to handle multiplication of DImode values by negative
3333 coefficients, by performing the multiplication by a positive
3334 multiplier and then inverting the result. */
3335 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3337 /* Its safe to use -coeff even for INT_MIN, as the
3338 result is interpreted as an unsigned coefficient.
3339 Exclude cost of op0 from max_cost to match the cost
3340 calculation of the synth_mult. */
3341 coeff = -(unsigned HOST_WIDE_INT) coeff;
3342 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3344 - neg_cost (speed, mode));
3348 /* Special case powers of two. */
3349 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3351 rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3352 floor_log2 (coeff), target, unsignedp);
3353 return expand_unop (mode, neg_optab, temp, target, 0);
3356 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3359 rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3360 &algorithm, variant);
3361 return expand_unop (mode, neg_optab, temp, target, 0);
3366 /* Exclude cost of op0 from max_cost to match the cost
3367 calculation of the synth_mult. */
3368 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3369 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3370 return expand_mult_const (mode, op0, coeff, target,
3371 &algorithm, variant);
3375 /* Expand x*2.0 as x+x. */
3376 if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3377 && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3379 op0 = force_reg (GET_MODE (op0), op0);
3380 return expand_binop (mode, add_optab, op0, op0,
3381 target, unsignedp, OPTAB_LIB_WIDEN);
3384 /* This used to use umul_optab if unsigned, but for non-widening multiply
3385 there is no difference between signed and unsigned. */
3386 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3387 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3392 /* Return a cost estimate for multiplying a register by the given
3393 COEFFicient in the given MODE and SPEED. */
3396 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3399 struct algorithm algorithm;
3400 enum mult_variant variant;
3402 rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3403 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3405 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3406 return algorithm.cost.cost;
3411 /* Perform a widening multiplication and return an rtx for the result.
3412 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3413 TARGET is a suggestion for where to store the result (an rtx).
3414 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3415 or smul_widen_optab.
3417 We check specially for a constant integer as OP1, comparing the
3418 cost of a widening multiply against the cost of a sequence of shifts
3422 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3423 int unsignedp, optab this_optab)
3425 bool speed = optimize_insn_for_speed_p ();
3428 if (CONST_INT_P (op1)
3429 && GET_MODE (op0) != VOIDmode
3430 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3431 this_optab == umul_widen_optab))
3432 && CONST_INT_P (cop1)
3433 && (INTVAL (cop1) >= 0
3434 || HWI_COMPUTABLE_MODE_P (mode)))
3436 HOST_WIDE_INT coeff = INTVAL (cop1);
3438 enum mult_variant variant;
3439 struct algorithm algorithm;
3442 return CONST0_RTX (mode);
3444 /* Special case powers of two. */
3445 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3447 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3448 return expand_shift (LSHIFT_EXPR, mode, op0,
3449 floor_log2 (coeff), target, unsignedp);
3452 /* Exclude cost of op0 from max_cost to match the cost
3453 calculation of the synth_mult. */
3454 max_cost = mul_widen_cost (speed, mode);
3455 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3458 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3459 return expand_mult_const (mode, op0, coeff, target,
3460 &algorithm, variant);
3463 return expand_binop (mode, this_optab, op0, op1, target,
3464 unsignedp, OPTAB_LIB_WIDEN);
3467 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3468 replace division by D, and put the least significant N bits of the result
3469 in *MULTIPLIER_PTR and return the most significant bit.
3471 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3472 needed precision is in PRECISION (should be <= N).
3474 PRECISION should be as small as possible so this function can choose
3475 multiplier more freely.
3477 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3478 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3480 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3481 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3483 unsigned HOST_WIDE_INT
3484 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3485 unsigned HOST_WIDE_INT *multiplier_ptr,
3486 int *post_shift_ptr, int *lgup_ptr)
3488 int lgup, post_shift;
3491 /* lgup = ceil(log2(divisor)); */
3492 lgup = ceil_log2 (d);
3494 gcc_assert (lgup <= n);
3497 pow2 = n + lgup - precision;
3499 /* mlow = 2^(N + lgup)/d */
3500 wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3501 wide_int mlow = wi::udiv_trunc (val, d);
3503 /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3504 val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3505 wide_int mhigh = wi::udiv_trunc (val, d);
3507 /* If precision == N, then mlow, mhigh exceed 2^N
3508 (but they do not exceed 2^(N+1)). */
3510 /* Reduce to lowest terms. */
3511 for (post_shift = lgup; post_shift > 0; post_shift--)
3513 unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3514 HOST_BITS_PER_WIDE_INT);
3515 unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3516 HOST_BITS_PER_WIDE_INT);
3520 mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3521 mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3524 *post_shift_ptr = post_shift;
3526 if (n < HOST_BITS_PER_WIDE_INT)
3528 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3529 *multiplier_ptr = mhigh.to_uhwi () & mask;
3530 return mhigh.to_uhwi () >= mask;
3534 *multiplier_ptr = mhigh.to_uhwi ();
3535 return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3539 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3540 congruent to 1 (mod 2**N). */
3542 static unsigned HOST_WIDE_INT
3543 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3545 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3547 /* The algorithm notes that the choice y = x satisfies
3548 x*y == 1 mod 2^3, since x is assumed odd.
3549 Each iteration doubles the number of bits of significance in y. */
3551 unsigned HOST_WIDE_INT mask;
3552 unsigned HOST_WIDE_INT y = x;
3555 mask = (n == HOST_BITS_PER_WIDE_INT
3556 ? ~(unsigned HOST_WIDE_INT) 0
3557 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3561 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3567 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3568 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3569 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3570 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3573 The result is put in TARGET if that is convenient.
3575 MODE is the mode of operation. */
3578 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3579 rtx op1, rtx target, int unsignedp)
3582 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3584 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3585 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3586 tem = expand_and (mode, tem, op1, NULL_RTX);
3588 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3591 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3592 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3593 tem = expand_and (mode, tem, op0, NULL_RTX);
3594 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3600 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3603 extract_high_half (machine_mode mode, rtx op)
3605 machine_mode wider_mode;
3607 if (mode == word_mode)
3608 return gen_highpart (mode, op);
3610 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3612 wider_mode = GET_MODE_WIDER_MODE (mode);
3613 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3614 GET_MODE_BITSIZE (mode), 0, 1);
3615 return convert_modes (mode, wider_mode, op, 0);
3618 /* Like expmed_mult_highpart, but only consider using a multiplication
3619 optab. OP1 is an rtx for the constant operand. */
3622 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3623 rtx target, int unsignedp, int max_cost)
3625 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3626 machine_mode wider_mode;
3630 bool speed = optimize_insn_for_speed_p ();
3632 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3634 wider_mode = GET_MODE_WIDER_MODE (mode);
3635 size = GET_MODE_BITSIZE (mode);
3637 /* Firstly, try using a multiplication insn that only generates the needed
3638 high part of the product, and in the sign flavor of unsignedp. */
3639 if (mul_highpart_cost (speed, mode) < max_cost)
3641 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3642 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3643 unsignedp, OPTAB_DIRECT);
3648 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3649 Need to adjust the result after the multiplication. */
3650 if (size - 1 < BITS_PER_WORD
3651 && (mul_highpart_cost (speed, mode)
3652 + 2 * shift_cost (speed, mode, size-1)
3653 + 4 * add_cost (speed, mode) < max_cost))
3655 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3656 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3657 unsignedp, OPTAB_DIRECT);
3659 /* We used the wrong signedness. Adjust the result. */
3660 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3664 /* Try widening multiplication. */
3665 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3666 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3667 && mul_widen_cost (speed, wider_mode) < max_cost)
3669 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3670 unsignedp, OPTAB_WIDEN);
3672 return extract_high_half (mode, tem);
3675 /* Try widening the mode and perform a non-widening multiplication. */
3676 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3677 && size - 1 < BITS_PER_WORD
3678 && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3684 /* We need to widen the operands, for example to ensure the
3685 constant multiplier is correctly sign or zero extended.
3686 Use a sequence to clean-up any instructions emitted by
3687 the conversions if things don't work out. */
3689 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3690 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3691 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3692 unsignedp, OPTAB_WIDEN);
3693 insns = get_insns ();
3699 return extract_high_half (mode, tem);
3703 /* Try widening multiplication of opposite signedness, and adjust. */
3704 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3705 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3706 && size - 1 < BITS_PER_WORD
3707 && (mul_widen_cost (speed, wider_mode)
3708 + 2 * shift_cost (speed, mode, size-1)
3709 + 4 * add_cost (speed, mode) < max_cost))
3711 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3712 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3715 tem = extract_high_half (mode, tem);
3716 /* We used the wrong signedness. Adjust the result. */
3717 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3725 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3726 putting the high half of the result in TARGET if that is convenient,
3727 and return where the result is. If the operation can not be performed,
3730 MODE is the mode of operation and result.
3732 UNSIGNEDP nonzero means unsigned multiply.
3734 MAX_COST is the total allowed cost for the expanded RTL. */
3737 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3738 rtx target, int unsignedp, int max_cost)
3740 machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3741 unsigned HOST_WIDE_INT cnst1;
3743 bool sign_adjust = false;
3744 enum mult_variant variant;
3745 struct algorithm alg;
3747 bool speed = optimize_insn_for_speed_p ();
3749 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3750 /* We can't support modes wider than HOST_BITS_PER_INT. */
3751 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3753 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3755 /* We can't optimize modes wider than BITS_PER_WORD.
3756 ??? We might be able to perform double-word arithmetic if
3757 mode == word_mode, however all the cost calculations in
3758 synth_mult etc. assume single-word operations. */
3759 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3760 return expmed_mult_highpart_optab (mode, op0, op1, target,
3761 unsignedp, max_cost);
3763 extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3765 /* Check whether we try to multiply by a negative constant. */
3766 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3769 extra_cost += add_cost (speed, mode);
3772 /* See whether shift/add multiplication is cheap enough. */
3773 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3774 max_cost - extra_cost))
3776 /* See whether the specialized multiplication optabs are
3777 cheaper than the shift/add version. */
3778 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3779 alg.cost.cost + extra_cost);
3783 tem = convert_to_mode (wider_mode, op0, unsignedp);
3784 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3785 tem = extract_high_half (mode, tem);
3787 /* Adjust result for signedness. */
3789 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3793 return expmed_mult_highpart_optab (mode, op0, op1, target,
3794 unsignedp, max_cost);
3798 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3801 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3803 rtx result, temp, shift;
3804 rtx_code_label *label;
3806 int prec = GET_MODE_PRECISION (mode);
3808 logd = floor_log2 (d);
3809 result = gen_reg_rtx (mode);
3811 /* Avoid conditional branches when they're expensive. */
3812 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3813 && optimize_insn_for_speed_p ())
3815 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3819 HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3820 signmask = force_reg (mode, signmask);
3821 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3823 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3824 which instruction sequence to use. If logical right shifts
3825 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3826 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3828 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3829 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3830 || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3831 > COSTS_N_INSNS (2)))
3833 temp = expand_binop (mode, xor_optab, op0, signmask,
3834 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3835 temp = expand_binop (mode, sub_optab, temp, signmask,
3836 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3837 temp = expand_binop (mode, and_optab, temp,
3838 gen_int_mode (masklow, mode),
3839 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3840 temp = expand_binop (mode, xor_optab, temp, signmask,
3841 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3842 temp = expand_binop (mode, sub_optab, temp, signmask,
3843 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3847 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3848 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3849 signmask = force_reg (mode, signmask);
3851 temp = expand_binop (mode, add_optab, op0, signmask,
3852 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3853 temp = expand_binop (mode, and_optab, temp,
3854 gen_int_mode (masklow, mode),
3855 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3856 temp = expand_binop (mode, sub_optab, temp, signmask,
3857 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3863 /* Mask contains the mode's signbit and the significant bits of the
3864 modulus. By including the signbit in the operation, many targets
3865 can avoid an explicit compare operation in the following comparison
3867 wide_int mask = wi::mask (logd, false, prec);
3868 mask = wi::set_bit (mask, prec - 1);
3870 temp = expand_binop (mode, and_optab, op0,
3871 immed_wide_int_const (mask, mode),
3872 result, 1, OPTAB_LIB_WIDEN);
3874 emit_move_insn (result, temp);
3876 label = gen_label_rtx ();
3877 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3879 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3880 0, OPTAB_LIB_WIDEN);
3882 mask = wi::mask (logd, true, prec);
3883 temp = expand_binop (mode, ior_optab, temp,
3884 immed_wide_int_const (mask, mode),
3885 result, 1, OPTAB_LIB_WIDEN);
3886 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3887 0, OPTAB_LIB_WIDEN);
3889 emit_move_insn (result, temp);
3894 /* Expand signed division of OP0 by a power of two D in mode MODE.
3895 This routine is only called for positive values of D. */
3898 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3901 rtx_code_label *label;
3904 logd = floor_log2 (d);
3907 && BRANCH_COST (optimize_insn_for_speed_p (),
3910 temp = gen_reg_rtx (mode);
3911 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3912 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3913 0, OPTAB_LIB_WIDEN);
3914 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3917 if (HAVE_conditional_move
3918 && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3923 temp2 = copy_to_mode_reg (mode, op0);
3924 temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3925 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3926 temp = force_reg (mode, temp);
3928 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3929 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3930 mode, temp, temp2, mode, 0);
3933 rtx_insn *seq = get_insns ();
3936 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3941 if (BRANCH_COST (optimize_insn_for_speed_p (),
3944 int ushift = GET_MODE_BITSIZE (mode) - logd;
3946 temp = gen_reg_rtx (mode);
3947 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3948 if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3949 || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3950 > COSTS_N_INSNS (1))
3951 temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3952 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3954 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3955 ushift, NULL_RTX, 1);
3956 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3957 0, OPTAB_LIB_WIDEN);
3958 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3961 label = gen_label_rtx ();
3962 temp = copy_to_mode_reg (mode, op0);
3963 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3964 expand_inc (temp, gen_int_mode (d - 1, mode));
3966 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3969 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3970 if that is convenient, and returning where the result is.
3971 You may request either the quotient or the remainder as the result;
3972 specify REM_FLAG nonzero to get the remainder.
3974 CODE is the expression code for which kind of division this is;
3975 it controls how rounding is done. MODE is the machine mode to use.
3976 UNSIGNEDP nonzero means do unsigned division. */
3978 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3979 and then correct it by or'ing in missing high bits
3980 if result of ANDI is nonzero.
3981 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3982 This could optimize to a bfexts instruction.
3983 But C doesn't use these operations, so their optimizations are
3985 /* ??? For modulo, we don't actually need the highpart of the first product,
3986 the low part will do nicely. And for small divisors, the second multiply
3987 can also be a low-part only multiply or even be completely left out.
3988 E.g. to calculate the remainder of a division by 3 with a 32 bit
3989 multiply, multiply with 0x55555556 and extract the upper two bits;
3990 the result is exact for inputs up to 0x1fffffff.
3991 The input range can be reduced by using cross-sum rules.
3992 For odd divisors >= 3, the following table gives right shift counts
3993 so that if a number is shifted by an integer multiple of the given
3994 amount, the remainder stays the same:
3995 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3996 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3997 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3998 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3999 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4001 Cross-sum rules for even numbers can be derived by leaving as many bits
4002 to the right alone as the divisor has zeros to the right.
4003 E.g. if x is an unsigned 32 bit number:
4004 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4008 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4009 rtx op0, rtx op1, rtx target, int unsignedp)
4011 machine_mode compute_mode;
4013 rtx quotient = 0, remainder = 0;
4017 optab optab1, optab2;
4018 int op1_is_constant, op1_is_pow2 = 0;
4019 int max_cost, extra_cost;
4020 static HOST_WIDE_INT last_div_const = 0;
4021 bool speed = optimize_insn_for_speed_p ();
4023 op1_is_constant = CONST_INT_P (op1);
4024 if (op1_is_constant)
4026 wide_int ext_op1 = rtx_mode_t (op1, mode);
4027 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4029 && wi::popcount (wi::neg (ext_op1)) == 1));
4033 This is the structure of expand_divmod:
4035 First comes code to fix up the operands so we can perform the operations
4036 correctly and efficiently.
4038 Second comes a switch statement with code specific for each rounding mode.
4039 For some special operands this code emits all RTL for the desired
4040 operation, for other cases, it generates only a quotient and stores it in
4041 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
4042 to indicate that it has not done anything.
4044 Last comes code that finishes the operation. If QUOTIENT is set and
4045 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
4046 QUOTIENT is not set, it is computed using trunc rounding.
4048 We try to generate special code for division and remainder when OP1 is a
4049 constant. If |OP1| = 2**n we can use shifts and some other fast
4050 operations. For other values of OP1, we compute a carefully selected
4051 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4054 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4055 half of the product. Different strategies for generating the product are
4056 implemented in expmed_mult_highpart.
4058 If what we actually want is the remainder, we generate that by another
4059 by-constant multiplication and a subtraction. */
4061 /* We shouldn't be called with OP1 == const1_rtx, but some of the
4062 code below will malfunction if we are, so check here and handle
4063 the special case if so. */
4064 if (op1 == const1_rtx)
4065 return rem_flag ? const0_rtx : op0;
4067 /* When dividing by -1, we could get an overflow.
4068 negv_optab can handle overflows. */
4069 if (! unsignedp && op1 == constm1_rtx)
4073 return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4074 ? negv_optab : neg_optab, op0, target, 0);
4078 /* Don't use the function value register as a target
4079 since we have to read it as well as write it,
4080 and function-inlining gets confused by this. */
4081 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4082 /* Don't clobber an operand while doing a multi-step calculation. */
4083 || ((rem_flag || op1_is_constant)
4084 && (reg_mentioned_p (target, op0)
4085 || (MEM_P (op0) && MEM_P (target))))
4086 || reg_mentioned_p (target, op1)
4087 || (MEM_P (op1) && MEM_P (target))))
4090 /* Get the mode in which to perform this computation. Normally it will
4091 be MODE, but sometimes we can't do the desired operation in MODE.
4092 If so, pick a wider mode in which we can do the operation. Convert
4093 to that mode at the start to avoid repeated conversions.
4095 First see what operations we need. These depend on the expression
4096 we are evaluating. (We assume that divxx3 insns exist under the
4097 same conditions that modxx3 insns and that these insns don't normally
4098 fail. If these assumptions are not correct, we may generate less
4099 efficient code in some cases.)
4101 Then see if we find a mode in which we can open-code that operation
4102 (either a division, modulus, or shift). Finally, check for the smallest
4103 mode for which we can do the operation with a library call. */
4105 /* We might want to refine this now that we have division-by-constant
4106 optimization. Since expmed_mult_highpart tries so many variants, it is
4107 not straightforward to generalize this. Maybe we should make an array
4108 of possible modes in init_expmed? Save this for GCC 2.7. */
4110 optab1 = (op1_is_pow2
4111 ? (unsignedp ? lshr_optab : ashr_optab)
4112 : (unsignedp ? udiv_optab : sdiv_optab));
4113 optab2 = (op1_is_pow2 ? optab1
4114 : (unsignedp ? udivmod_optab : sdivmod_optab));
4116 for (compute_mode = mode; compute_mode != VOIDmode;
4117 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4118 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4119 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4122 if (compute_mode == VOIDmode)
4123 for (compute_mode = mode; compute_mode != VOIDmode;
4124 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4125 if (optab_libfunc (optab1, compute_mode)
4126 || optab_libfunc (optab2, compute_mode))
4129 /* If we still couldn't find a mode, use MODE, but expand_binop will
4131 if (compute_mode == VOIDmode)
4132 compute_mode = mode;
4134 if (target && GET_MODE (target) == compute_mode)
4137 tquotient = gen_reg_rtx (compute_mode);
4139 size = GET_MODE_BITSIZE (compute_mode);
4141 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4142 (mode), and thereby get better code when OP1 is a constant. Do that
4143 later. It will require going over all usages of SIZE below. */
4144 size = GET_MODE_BITSIZE (mode);
4147 /* Only deduct something for a REM if the last divide done was
4148 for a different constant. Then set the constant of the last
4150 max_cost = (unsignedp
4151 ? udiv_cost (speed, compute_mode)
4152 : sdiv_cost (speed, compute_mode));
4153 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4154 && INTVAL (op1) == last_div_const))
4155 max_cost -= (mul_cost (speed, compute_mode)
4156 + add_cost (speed, compute_mode));
4158 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4160 /* Now convert to the best mode to use. */
4161 if (compute_mode != mode)
4163 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4164 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4166 /* convert_modes may have placed op1 into a register, so we
4167 must recompute the following. */
4168 op1_is_constant = CONST_INT_P (op1);
4169 if (op1_is_constant)
4171 wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4172 op1_is_pow2 = (wi::popcount (ext_op1) == 1
4174 && wi::popcount (wi::neg (ext_op1)) == 1));
4180 /* If one of the operands is a volatile MEM, copy it into a register. */
4182 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4183 op0 = force_reg (compute_mode, op0);
4184 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4185 op1 = force_reg (compute_mode, op1);
4187 /* If we need the remainder or if OP1 is constant, we need to
4188 put OP0 in a register in case it has any queued subexpressions. */
4189 if (rem_flag || op1_is_constant)
4190 op0 = force_reg (compute_mode, op0);
4192 last = get_last_insn ();
4194 /* Promote floor rounding to trunc rounding for unsigned operations. */
4197 if (code == FLOOR_DIV_EXPR)
4198 code = TRUNC_DIV_EXPR;
4199 if (code == FLOOR_MOD_EXPR)
4200 code = TRUNC_MOD_EXPR;
4201 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4202 code = TRUNC_DIV_EXPR;
4205 if (op1 != const0_rtx)
4208 case TRUNC_MOD_EXPR:
4209 case TRUNC_DIV_EXPR:
4210 if (op1_is_constant)
4214 unsigned HOST_WIDE_INT mh, ml;
4215 int pre_shift, post_shift;
4217 wide_int wd = rtx_mode_t (op1, compute_mode);
4218 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4220 if (wi::popcount (wd) == 1)
4222 pre_shift = floor_log2 (d);
4225 unsigned HOST_WIDE_INT mask
4226 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4228 = expand_binop (compute_mode, and_optab, op0,
4229 gen_int_mode (mask, compute_mode),
4233 return gen_lowpart (mode, remainder);
4235 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4236 pre_shift, tquotient, 1);
4238 else if (size <= HOST_BITS_PER_WIDE_INT)
4240 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4242 /* Most significant bit of divisor is set; emit an scc
4244 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4245 compute_mode, 1, 1);
4249 /* Find a suitable multiplier and right shift count
4250 instead of multiplying with D. */
4252 mh = choose_multiplier (d, size, size,
4253 &ml, &post_shift, &dummy);
4255 /* If the suggested multiplier is more than SIZE bits,
4256 we can do better for even divisors, using an
4257 initial right shift. */
4258 if (mh != 0 && (d & 1) == 0)
4260 pre_shift = floor_log2 (d & -d);
4261 mh = choose_multiplier (d >> pre_shift, size,
4263 &ml, &post_shift, &dummy);
4273 if (post_shift - 1 >= BITS_PER_WORD)
4277 = (shift_cost (speed, compute_mode, post_shift - 1)
4278 + shift_cost (speed, compute_mode, 1)
4279 + 2 * add_cost (speed, compute_mode));
4280 t1 = expmed_mult_highpart
4282 gen_int_mode (ml, compute_mode),
4283 NULL_RTX, 1, max_cost - extra_cost);
4286 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4289 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4290 t2, 1, NULL_RTX, 1);
4291 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4294 quotient = expand_shift
4295 (RSHIFT_EXPR, compute_mode, t4,
4296 post_shift - 1, tquotient, 1);
4302 if (pre_shift >= BITS_PER_WORD
4303 || post_shift >= BITS_PER_WORD)
4307 (RSHIFT_EXPR, compute_mode, op0,
4308 pre_shift, NULL_RTX, 1);
4310 = (shift_cost (speed, compute_mode, pre_shift)
4311 + shift_cost (speed, compute_mode, post_shift));
4312 t2 = expmed_mult_highpart
4314 gen_int_mode (ml, compute_mode),
4315 NULL_RTX, 1, max_cost - extra_cost);
4318 quotient = expand_shift
4319 (RSHIFT_EXPR, compute_mode, t2,
4320 post_shift, tquotient, 1);
4324 else /* Too wide mode to use tricky code */
4327 insn = get_last_insn ();
4329 set_dst_reg_note (insn, REG_EQUAL,
4330 gen_rtx_UDIV (compute_mode, op0, op1),
4333 else /* TRUNC_DIV, signed */
4335 unsigned HOST_WIDE_INT ml;
4336 int lgup, post_shift;
4338 HOST_WIDE_INT d = INTVAL (op1);
4339 unsigned HOST_WIDE_INT abs_d;
4341 /* Since d might be INT_MIN, we have to cast to
4342 unsigned HOST_WIDE_INT before negating to avoid
4343 undefined signed overflow. */
4345 ? (unsigned HOST_WIDE_INT) d
4346 : - (unsigned HOST_WIDE_INT) d);
4348 /* n rem d = n rem -d */
4349 if (rem_flag && d < 0)
4352 op1 = gen_int_mode (abs_d, compute_mode);
4358 quotient = expand_unop (compute_mode, neg_optab, op0,
4360 else if (size <= HOST_BITS_PER_WIDE_INT
4361 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4363 /* This case is not handled correctly below. */
4364 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4365 compute_mode, 1, 1);
4369 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4370 && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4372 ? smod_pow2_cheap (speed, compute_mode)
4373 : sdiv_pow2_cheap (speed, compute_mode))
4374 /* We assume that cheap metric is true if the
4375 optab has an expander for this mode. */
4376 && ((optab_handler ((rem_flag ? smod_optab
4379 != CODE_FOR_nothing)
4380 || (optab_handler (sdivmod_optab,
4382 != CODE_FOR_nothing)))
4384 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4385 && (size <= HOST_BITS_PER_WIDE_INT
4386 || abs_d != (unsigned HOST_WIDE_INT) d))
4390 remainder = expand_smod_pow2 (compute_mode, op0, d);
4392 return gen_lowpart (mode, remainder);
4395 if (sdiv_pow2_cheap (speed, compute_mode)
4396 && ((optab_handler (sdiv_optab, compute_mode)
4397 != CODE_FOR_nothing)
4398 || (optab_handler (sdivmod_optab, compute_mode)
4399 != CODE_FOR_nothing)))
4400 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4402 gen_int_mode (abs_d,
4406 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4408 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4409 negate the quotient. */
4412 insn = get_last_insn ();
4414 && abs_d < ((unsigned HOST_WIDE_INT) 1
4415 << (HOST_BITS_PER_WIDE_INT - 1)))
4416 set_dst_reg_note (insn, REG_EQUAL,
4417 gen_rtx_DIV (compute_mode, op0,
4423 quotient = expand_unop (compute_mode, neg_optab,
4424 quotient, quotient, 0);
4427 else if (size <= HOST_BITS_PER_WIDE_INT)
4429 choose_multiplier (abs_d, size, size - 1,
4430 &ml, &post_shift, &lgup);
4431 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4435 if (post_shift >= BITS_PER_WORD
4436 || size - 1 >= BITS_PER_WORD)
4439 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4440 + shift_cost (speed, compute_mode, size - 1)
4441 + add_cost (speed, compute_mode));
4442 t1 = expmed_mult_highpart
4443 (compute_mode, op0, gen_int_mode (ml, compute_mode),
4444 NULL_RTX, 0, max_cost - extra_cost);
4448 (RSHIFT_EXPR, compute_mode, t1,
4449 post_shift, NULL_RTX, 0);
4451 (RSHIFT_EXPR, compute_mode, op0,
4452 size - 1, NULL_RTX, 0);
4455 = force_operand (gen_rtx_MINUS (compute_mode,
4460 = force_operand (gen_rtx_MINUS (compute_mode,
4468 if (post_shift >= BITS_PER_WORD
4469 || size - 1 >= BITS_PER_WORD)
4472 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4473 mlr = gen_int_mode (ml, compute_mode);
4474 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4475 + shift_cost (speed, compute_mode, size - 1)
4476 + 2 * add_cost (speed, compute_mode));
4477 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4479 max_cost - extra_cost);
4482 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4486 (RSHIFT_EXPR, compute_mode, t2,
4487 post_shift, NULL_RTX, 0);
4489 (RSHIFT_EXPR, compute_mode, op0,
4490 size - 1, NULL_RTX, 0);
4493 = force_operand (gen_rtx_MINUS (compute_mode,
4498 = force_operand (gen_rtx_MINUS (compute_mode,
4503 else /* Too wide mode to use tricky code */
4506 insn = get_last_insn ();
4508 set_dst_reg_note (insn, REG_EQUAL,
4509 gen_rtx_DIV (compute_mode, op0, op1),
4515 delete_insns_since (last);
4518 case FLOOR_DIV_EXPR:
4519 case FLOOR_MOD_EXPR:
4520 /* We will come here only for signed operations. */
4521 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4523 unsigned HOST_WIDE_INT mh, ml;
4524 int pre_shift, lgup, post_shift;
4525 HOST_WIDE_INT d = INTVAL (op1);
4529 /* We could just as easily deal with negative constants here,
4530 but it does not seem worth the trouble for GCC 2.6. */
4531 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4533 pre_shift = floor_log2 (d);
4536 unsigned HOST_WIDE_INT mask
4537 = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4538 remainder = expand_binop
4539 (compute_mode, and_optab, op0,
4540 gen_int_mode (mask, compute_mode),
4541 remainder, 0, OPTAB_LIB_WIDEN);
4543 return gen_lowpart (mode, remainder);
4545 quotient = expand_shift
4546 (RSHIFT_EXPR, compute_mode, op0,
4547 pre_shift, tquotient, 0);
4553 mh = choose_multiplier (d, size, size - 1,
4554 &ml, &post_shift, &lgup);
4557 if (post_shift < BITS_PER_WORD
4558 && size - 1 < BITS_PER_WORD)
4561 (RSHIFT_EXPR, compute_mode, op0,
4562 size - 1, NULL_RTX, 0);
4563 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4564 NULL_RTX, 0, OPTAB_WIDEN);
4565 extra_cost = (shift_cost (speed, compute_mode, post_shift)
4566 + shift_cost (speed, compute_mode, size - 1)
4567 + 2 * add_cost (speed, compute_mode));
4568 t3 = expmed_mult_highpart
4569 (compute_mode, t2, gen_int_mode (ml, compute_mode),
4570 NULL_RTX, 1, max_cost - extra_cost);
4574 (RSHIFT_EXPR, compute_mode, t3,
4575 post_shift, NULL_RTX, 1);
4576 quotient = expand_binop (compute_mode, xor_optab,
4577 t4, t1, tquotient, 0,
4585 rtx nsign, t1, t2, t3, t4;
4586 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4587 op0, constm1_rtx), NULL_RTX);
4588 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4590 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4591 size - 1, NULL_RTX, 0);
4592 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4594 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4599 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4601 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4610 delete_insns_since (last);
4612 /* Try using an instruction that produces both the quotient and
4613 remainder, using truncation. We can easily compensate the quotient
4614 or remainder to get floor rounding, once we have the remainder.
4615 Notice that we compute also the final remainder value here,
4616 and return the result right away. */
4617 if (target == 0 || GET_MODE (target) != compute_mode)
4618 target = gen_reg_rtx (compute_mode);
4623 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4624 quotient = gen_reg_rtx (compute_mode);
4629 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4630 remainder = gen_reg_rtx (compute_mode);
4633 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4634 quotient, remainder, 0))
4636 /* This could be computed with a branch-less sequence.
4637 Save that for later. */
4639 rtx_code_label *label = gen_label_rtx ();
4640 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4641 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4642 NULL_RTX, 0, OPTAB_WIDEN);
4643 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4644 expand_dec (quotient, const1_rtx);
4645 expand_inc (remainder, op1);
4647 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4650 /* No luck with division elimination or divmod. Have to do it
4651 by conditionally adjusting op0 *and* the result. */
4653 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4657 quotient = gen_reg_rtx (compute_mode);
4658 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4659 label1 = gen_label_rtx ();
4660 label2 = gen_label_rtx ();
4661 label3 = gen_label_rtx ();
4662 label4 = gen_label_rtx ();
4663 label5 = gen_label_rtx ();
4664 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4665 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4666 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4667 quotient, 0, OPTAB_LIB_WIDEN);
4668 if (tem != quotient)
4669 emit_move_insn (quotient, tem);
4670 emit_jump_insn (targetm.gen_jump (label5));
4672 emit_label (label1);
4673 expand_inc (adjusted_op0, const1_rtx);
4674 emit_jump_insn (targetm.gen_jump (label4));
4676 emit_label (label2);
4677 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4678 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4679 quotient, 0, OPTAB_LIB_WIDEN);
4680 if (tem != quotient)
4681 emit_move_insn (quotient, tem);
4682 emit_jump_insn (targetm.gen_jump (label5));
4684 emit_label (label3);
4685 expand_dec (adjusted_op0, const1_rtx);
4686 emit_label (label4);
4687 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4688 quotient, 0, OPTAB_LIB_WIDEN);
4689 if (tem != quotient)
4690 emit_move_insn (quotient, tem);
4691 expand_dec (quotient, const1_rtx);
4692 emit_label (label5);
4701 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4702 && (size <= HOST_BITS_PER_WIDE_INT
4703 || INTVAL (op1) >= 0))
4706 unsigned HOST_WIDE_INT d = INTVAL (op1);
4707 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4708 floor_log2 (d), tquotient, 1);
4709 t2 = expand_binop (compute_mode, and_optab, op0,
4710 gen_int_mode (d - 1, compute_mode),
4711 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4712 t3 = gen_reg_rtx (compute_mode);
4713 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4714 compute_mode, 1, 1);
4717 rtx_code_label *lab;
4718 lab = gen_label_rtx ();
4719 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4720 expand_inc (t1, const1_rtx);
4725 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4731 /* Try using an instruction that produces both the quotient and
4732 remainder, using truncation. We can easily compensate the
4733 quotient or remainder to get ceiling rounding, once we have the
4734 remainder. Notice that we compute also the final remainder
4735 value here, and return the result right away. */
4736 if (target == 0 || GET_MODE (target) != compute_mode)
4737 target = gen_reg_rtx (compute_mode);
4741 remainder = (REG_P (target)
4742 ? target : gen_reg_rtx (compute_mode));
4743 quotient = gen_reg_rtx (compute_mode);
4747 quotient = (REG_P (target)
4748 ? target : gen_reg_rtx (compute_mode));
4749 remainder = gen_reg_rtx (compute_mode);
4752 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4755 /* This could be computed with a branch-less sequence.
4756 Save that for later. */
4757 rtx_code_label *label = gen_label_rtx ();
4758 do_cmp_and_jump (remainder, const0_rtx, EQ,
4759 compute_mode, label);
4760 expand_inc (quotient, const1_rtx);
4761 expand_dec (remainder, op1);
4763 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4766 /* No luck with division elimination or divmod. Have to do it
4767 by conditionally adjusting op0 *and* the result. */
4769 rtx_code_label *label1, *label2;
4770 rtx adjusted_op0, tem;
4772 quotient = gen_reg_rtx (compute_mode);
4773 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4774 label1 = gen_label_rtx ();
4775 label2 = gen_label_rtx ();
4776 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4777 compute_mode, label1);
4778 emit_move_insn (quotient, const0_rtx);
4779 emit_jump_insn (targetm.gen_jump (label2));
4781 emit_label (label1);
4782 expand_dec (adjusted_op0, const1_rtx);
4783 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4784 quotient, 1, OPTAB_LIB_WIDEN);
4785 if (tem != quotient)
4786 emit_move_insn (quotient, tem);
4787 expand_inc (quotient, const1_rtx);
4788 emit_label (label2);
4793 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4794 && INTVAL (op1) >= 0)
4796 /* This is extremely similar to the code for the unsigned case
4797 above. For 2.7 we should merge these variants, but for
4798 2.6.1 I don't want to touch the code for unsigned since that
4799 get used in C. The signed case will only be used by other
4803 unsigned HOST_WIDE_INT d = INTVAL (op1);
4804 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4805 floor_log2 (d), tquotient, 0);
4806 t2 = expand_binop (compute_mode, and_optab, op0,
4807 gen_int_mode (d - 1, compute_mode),
4808 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4809 t3 = gen_reg_rtx (compute_mode);
4810 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4811 compute_mode, 1, 1);
4814 rtx_code_label *lab;
4815 lab = gen_label_rtx ();
4816 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4817 expand_inc (t1, const1_rtx);
4822 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4828 /* Try using an instruction that produces both the quotient and
4829 remainder, using truncation. We can easily compensate the
4830 quotient or remainder to get ceiling rounding, once we have the
4831 remainder. Notice that we compute also the final remainder
4832 value here, and return the result right away. */
4833 if (target == 0 || GET_MODE (target) != compute_mode)
4834 target = gen_reg_rtx (compute_mode);
4837 remainder= (REG_P (target)
4838 ? target : gen_reg_rtx (compute_mode));
4839 quotient = gen_reg_rtx (compute_mode);
4843 quotient = (REG_P (target)
4844 ? target : gen_reg_rtx (compute_mode));
4845 remainder = gen_reg_rtx (compute_mode);
4848 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4851 /* This could be computed with a branch-less sequence.
4852 Save that for later. */
4854 rtx_code_label *label = gen_label_rtx ();
4855 do_cmp_and_jump (remainder, const0_rtx, EQ,
4856 compute_mode, label);
4857 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4858 NULL_RTX, 0, OPTAB_WIDEN);
4859 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4860 expand_inc (quotient, const1_rtx);
4861 expand_dec (remainder, op1);
4863 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4866 /* No luck with division elimination or divmod. Have to do it
4867 by conditionally adjusting op0 *and* the result. */
4869 rtx_code_label *label1, *label2, *label3, *label4, *label5;
4873 quotient = gen_reg_rtx (compute_mode);
4874 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4875 label1 = gen_label_rtx ();
4876 label2 = gen_label_rtx ();
4877 label3 = gen_label_rtx ();
4878 label4 = gen_label_rtx ();
4879 label5 = gen_label_rtx ();
4880 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4881 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4882 compute_mode, label1);
4883 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4884 quotient, 0, OPTAB_LIB_WIDEN);
4885 if (tem != quotient)
4886 emit_move_insn (quotient, tem);
4887 emit_jump_insn (targetm.gen_jump (label5));
4889 emit_label (label1);
4890 expand_dec (adjusted_op0, const1_rtx);
4891 emit_jump_insn (targetm.gen_jump (label4));
4893 emit_label (label2);
4894 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4895 compute_mode, label3);
4896 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4897 quotient, 0, OPTAB_LIB_WIDEN);
4898 if (tem != quotient)
4899 emit_move_insn (quotient, tem);
4900 emit_jump_insn (targetm.gen_jump (label5));
4902 emit_label (label3);
4903 expand_inc (adjusted_op0, const1_rtx);
4904 emit_label (label4);
4905 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4906 quotient, 0, OPTAB_LIB_WIDEN);
4907 if (tem != quotient)
4908 emit_move_insn (quotient, tem);
4909 expand_inc (quotient, const1_rtx);
4910 emit_label (label5);
4915 case EXACT_DIV_EXPR:
4916 if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4918 HOST_WIDE_INT d = INTVAL (op1);
4919 unsigned HOST_WIDE_INT ml;
4923 pre_shift = floor_log2 (d & -d);
4924 ml = invert_mod2n (d >> pre_shift, size);
4925 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4926 pre_shift, NULL_RTX, unsignedp);
4927 quotient = expand_mult (compute_mode, t1,
4928 gen_int_mode (ml, compute_mode),
4931 insn = get_last_insn ();
4932 set_dst_reg_note (insn, REG_EQUAL,
4933 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4934 compute_mode, op0, op1),
4939 case ROUND_DIV_EXPR:
4940 case ROUND_MOD_EXPR:
4944 rtx_code_label *label;
4945 label = gen_label_rtx ();
4946 quotient = gen_reg_rtx (compute_mode);
4947 remainder = gen_reg_rtx (compute_mode);
4948 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4951 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4952 quotient, 1, OPTAB_LIB_WIDEN);
4953 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4954 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4955 remainder, 1, OPTAB_LIB_WIDEN);
4957 tem = plus_constant (compute_mode, op1, -1);
4958 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4959 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4960 expand_inc (quotient, const1_rtx);
4961 expand_dec (remainder, op1);
4966 rtx abs_rem, abs_op1, tem, mask;
4967 rtx_code_label *label;
4968 label = gen_label_rtx ();
4969 quotient = gen_reg_rtx (compute_mode);
4970 remainder = gen_reg_rtx (compute_mode);
4971 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4974 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4975 quotient, 0, OPTAB_LIB_WIDEN);
4976 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4977 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4978 remainder, 0, OPTAB_LIB_WIDEN);
4980 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4981 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4982 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4984 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4985 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4986 NULL_RTX, 0, OPTAB_WIDEN);
4987 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4988 size - 1, NULL_RTX, 0);
4989 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4990 NULL_RTX, 0, OPTAB_WIDEN);
4991 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4992 NULL_RTX, 0, OPTAB_WIDEN);
4993 expand_inc (quotient, tem);
4994 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4995 NULL_RTX, 0, OPTAB_WIDEN);
4996 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4997 NULL_RTX, 0, OPTAB_WIDEN);
4998 expand_dec (remainder, tem);
5001 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5009 if (target && GET_MODE (target) != compute_mode)
5014 /* Try to produce the remainder without producing the quotient.
5015 If we seem to have a divmod pattern that does not require widening,
5016 don't try widening here. We should really have a WIDEN argument
5017 to expand_twoval_binop, since what we'd really like to do here is
5018 1) try a mod insn in compute_mode
5019 2) try a divmod insn in compute_mode
5020 3) try a div insn in compute_mode and multiply-subtract to get
5022 4) try the same things with widening allowed. */
5024 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5027 ((optab_handler (optab2, compute_mode)
5028 != CODE_FOR_nothing)
5029 ? OPTAB_DIRECT : OPTAB_WIDEN));
5032 /* No luck there. Can we do remainder and divide at once
5033 without a library call? */
5034 remainder = gen_reg_rtx (compute_mode);
5035 if (! expand_twoval_binop ((unsignedp
5039 NULL_RTX, remainder, unsignedp))
5044 return gen_lowpart (mode, remainder);
5047 /* Produce the quotient. Try a quotient insn, but not a library call.
5048 If we have a divmod in this mode, use it in preference to widening
5049 the div (for this test we assume it will not fail). Note that optab2
5050 is set to the one of the two optabs that the call below will use. */
5052 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5053 op0, op1, rem_flag ? NULL_RTX : target,
5055 ((optab_handler (optab2, compute_mode)
5056 != CODE_FOR_nothing)
5057 ? OPTAB_DIRECT : OPTAB_WIDEN));
5061 /* No luck there. Try a quotient-and-remainder insn,
5062 keeping the quotient alone. */
5063 quotient = gen_reg_rtx (compute_mode);
5064 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5066 quotient, NULL_RTX, unsignedp))
5070 /* Still no luck. If we are not computing the remainder,
5071 use a library call for the quotient. */
5072 quotient = sign_expand_binop (compute_mode,
5073 udiv_optab, sdiv_optab,
5075 unsignedp, OPTAB_LIB_WIDEN);
5082 if (target && GET_MODE (target) != compute_mode)
5087 /* No divide instruction either. Use library for remainder. */
5088 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5090 unsignedp, OPTAB_LIB_WIDEN);
5091 /* No remainder function. Try a quotient-and-remainder
5092 function, keeping the remainder. */
5095 remainder = gen_reg_rtx (compute_mode);
5096 if (!expand_twoval_binop_libfunc
5097 (unsignedp ? udivmod_optab : sdivmod_optab,
5099 NULL_RTX, remainder,
5100 unsignedp ? UMOD : MOD))
5101 remainder = NULL_RTX;
5106 /* We divided. Now finish doing X - Y * (X / Y). */
5107 remainder = expand_mult (compute_mode, quotient, op1,
5108 NULL_RTX, unsignedp);
5109 remainder = expand_binop (compute_mode, sub_optab, op0,
5110 remainder, target, unsignedp,
5115 return gen_lowpart (mode, rem_flag ? remainder : quotient);
5118 /* Return a tree node with data type TYPE, describing the value of X.
5119 Usually this is an VAR_DECL, if there is no obvious better choice.
5120 X may be an expression, however we only support those expressions
5121 generated by loop.c. */
5124 make_tree (tree type, rtx x)
5128 switch (GET_CODE (x))
5131 case CONST_WIDE_INT:
5132 t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5136 STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5137 if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5138 t = wide_int_to_tree (type,
5139 wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5140 HOST_BITS_PER_WIDE_INT * 2));
5142 t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5148 int units = CONST_VECTOR_NUNITS (x);
5149 tree itype = TREE_TYPE (type);
5153 /* Build a tree with vector elements. */
5154 elts = XALLOCAVEC (tree, units);
5155 for (i = units - 1; i >= 0; --i)
5157 rtx elt = CONST_VECTOR_ELT (x, i);
5158 elts[i] = make_tree (itype, elt);
5161 return build_vector (type, elts);
5165 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5166 make_tree (type, XEXP (x, 1)));
5169 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5170 make_tree (type, XEXP (x, 1)));
5173 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5176 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5177 make_tree (type, XEXP (x, 1)));
5180 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5181 make_tree (type, XEXP (x, 1)));
5184 t = unsigned_type_for (type);
5185 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5186 make_tree (t, XEXP (x, 0)),
5187 make_tree (type, XEXP (x, 1))));
5190 t = signed_type_for (type);
5191 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5192 make_tree (t, XEXP (x, 0)),
5193 make_tree (type, XEXP (x, 1))));
5196 if (TREE_CODE (type) != REAL_TYPE)
5197 t = signed_type_for (type);
5201 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5202 make_tree (t, XEXP (x, 0)),
5203 make_tree (t, XEXP (x, 1))));
5205 t = unsigned_type_for (type);
5206 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5207 make_tree (t, XEXP (x, 0)),
5208 make_tree (t, XEXP (x, 1))));
5212 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5213 GET_CODE (x) == ZERO_EXTEND);
5214 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5217 return make_tree (type, XEXP (x, 0));
5220 t = SYMBOL_REF_DECL (x);
5222 return fold_convert (type, build_fold_addr_expr (t));
5226 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5228 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5229 address mode to pointer mode. */
5230 if (POINTER_TYPE_P (type))
5231 x = convert_memory_address_addr_space
5232 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5234 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5235 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5236 t->decl_with_rtl.rtl = x;
5242 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5243 and returning TARGET.
5245 If TARGET is 0, a pseudo-register or constant is returned. */
5248 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5252 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5253 tem = simplify_binary_operation (AND, mode, op0, op1);
5255 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5259 else if (tem != target)
5260 emit_move_insn (target, tem);
5264 /* Helper function for emit_store_flag. */
5266 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5267 machine_mode mode, machine_mode compare_mode,
5268 int unsignedp, rtx x, rtx y, int normalizep,
5269 machine_mode target_mode)
5271 struct expand_operand ops[4];
5272 rtx op0, comparison, subtarget;
5274 machine_mode result_mode = targetm.cstore_mode (icode);
5276 last = get_last_insn ();
5277 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5278 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5281 delete_insns_since (last);
5285 if (target_mode == VOIDmode)
5286 target_mode = result_mode;
5288 target = gen_reg_rtx (target_mode);
5290 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5292 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5293 create_fixed_operand (&ops[1], comparison);
5294 create_fixed_operand (&ops[2], x);
5295 create_fixed_operand (&ops[3], y);
5296 if (!maybe_expand_insn (icode, 4, ops))
5298 delete_insns_since (last);
5301 subtarget = ops[0].value;
5303 /* If we are converting to a wider mode, first convert to
5304 TARGET_MODE, then normalize. This produces better combining
5305 opportunities on machines that have a SIGN_EXTRACT when we are
5306 testing a single bit. This mostly benefits the 68k.
5308 If STORE_FLAG_VALUE does not have the sign bit set when
5309 interpreted in MODE, we can do this conversion as unsigned, which
5310 is usually more efficient. */
5311 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5313 convert_move (target, subtarget,
5314 val_signbit_known_clear_p (result_mode,
5317 result_mode = target_mode;
5322 /* If we want to keep subexpressions around, don't reuse our last
5327 /* Now normalize to the proper value in MODE. Sometimes we don't
5328 have to do anything. */
5329 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5331 /* STORE_FLAG_VALUE might be the most negative number, so write
5332 the comparison this way to avoid a compiler-time warning. */
5333 else if (- normalizep == STORE_FLAG_VALUE)
5334 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5336 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5337 it hard to use a value of just the sign bit due to ANSI integer
5338 constant typing rules. */
5339 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5340 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5341 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5345 gcc_assert (STORE_FLAG_VALUE & 1);
5347 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5348 if (normalizep == -1)
5349 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5352 /* If we were converting to a smaller mode, do the conversion now. */
5353 if (target_mode != result_mode)
5355 convert_move (target, op0, 0);
5363 /* A subroutine of emit_store_flag only including "tricks" that do not
5364 need a recursive call. These are kept separate to avoid infinite
5368 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5369 machine_mode mode, int unsignedp, int normalizep,
5370 machine_mode target_mode)
5373 enum insn_code icode;
5374 machine_mode compare_mode;
5375 enum mode_class mclass;
5376 enum rtx_code scode;
5379 code = unsigned_condition (code);
5380 scode = swap_condition (code);
5382 /* If one operand is constant, make it the second one. Only do this
5383 if the other operand is not constant as well. */
5385 if (swap_commutative_operands_p (op0, op1))
5387 std::swap (op0, op1);
5388 code = swap_condition (code);
5391 if (mode == VOIDmode)
5392 mode = GET_MODE (op0);
5394 /* For some comparisons with 1 and -1, we can convert this to
5395 comparisons with zero. This will often produce more opportunities for
5396 store-flag insns. */
5401 if (op1 == const1_rtx)
5402 op1 = const0_rtx, code = LE;
5405 if (op1 == constm1_rtx)
5406 op1 = const0_rtx, code = LT;
5409 if (op1 == const1_rtx)
5410 op1 = const0_rtx, code = GT;
5413 if (op1 == constm1_rtx)
5414 op1 = const0_rtx, code = GE;
5417 if (op1 == const1_rtx)
5418 op1 = const0_rtx, code = NE;
5421 if (op1 == const1_rtx)
5422 op1 = const0_rtx, code = EQ;
5428 /* If we are comparing a double-word integer with zero or -1, we can
5429 convert the comparison into one involving a single word. */
5430 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5431 && GET_MODE_CLASS (mode) == MODE_INT
5432 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5435 if ((code == EQ || code == NE)
5436 && (op1 == const0_rtx || op1 == constm1_rtx))
5440 /* Do a logical OR or AND of the two words and compare the
5442 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5443 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5444 tem = expand_binop (word_mode,
5445 op1 == const0_rtx ? ior_optab : and_optab,
5446 op00, op01, NULL_RTX, unsignedp,
5450 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5451 unsignedp, normalizep);
5453 else if ((code == LT || code == GE) && op1 == const0_rtx)
5457 /* If testing the sign bit, can just test on high word. */
5458 op0h = simplify_gen_subreg (word_mode, op0, mode,
5459 subreg_highpart_offset (word_mode,
5461 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5462 unsignedp, normalizep);
5469 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5472 target = gen_reg_rtx (target_mode);
5474 convert_move (target, tem,
5475 !val_signbit_known_set_p (word_mode,
5476 (normalizep ? normalizep
5477 : STORE_FLAG_VALUE)));
5482 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5483 complement of A (for GE) and shifting the sign bit to the low bit. */
5484 if (op1 == const0_rtx && (code == LT || code == GE)
5485 && GET_MODE_CLASS (mode) == MODE_INT
5486 && (normalizep || STORE_FLAG_VALUE == 1
5487 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5494 /* If the result is to be wider than OP0, it is best to convert it
5495 first. If it is to be narrower, it is *incorrect* to convert it
5497 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5499 op0 = convert_modes (target_mode, mode, op0, 0);
5503 if (target_mode != mode)
5507 op0 = expand_unop (mode, one_cmpl_optab, op0,
5508 ((STORE_FLAG_VALUE == 1 || normalizep)
5509 ? 0 : subtarget), 0);
5511 if (STORE_FLAG_VALUE == 1 || normalizep)
5512 /* If we are supposed to produce a 0/1 value, we want to do
5513 a logical shift from the sign bit to the low-order bit; for
5514 a -1/0 value, we do an arithmetic shift. */
5515 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5516 GET_MODE_BITSIZE (mode) - 1,
5517 subtarget, normalizep != -1);
5519 if (mode != target_mode)
5520 op0 = convert_modes (target_mode, mode, op0, 0);
5525 mclass = GET_MODE_CLASS (mode);
5526 for (compare_mode = mode; compare_mode != VOIDmode;
5527 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5529 machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5530 icode = optab_handler (cstore_optab, optab_mode);
5531 if (icode != CODE_FOR_nothing)
5533 do_pending_stack_adjust ();
5534 rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5535 unsignedp, op0, op1, normalizep, target_mode);
5539 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5541 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5542 unsignedp, op1, op0, normalizep, target_mode);
5553 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5554 and storing in TARGET. Normally return TARGET.
5555 Return 0 if that cannot be done.
5557 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5558 it is VOIDmode, they cannot both be CONST_INT.
5560 UNSIGNEDP is for the case where we have to widen the operands
5561 to perform the operation. It says to use zero-extension.
5563 NORMALIZEP is 1 if we should convert the result to be either zero
5564 or one. Normalize is -1 if we should convert the result to be
5565 either zero or -1. If NORMALIZEP is zero, the result will be left
5566 "raw" out of the scc insn. */
5569 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5570 machine_mode mode, int unsignedp, int normalizep)
5572 machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5573 enum rtx_code rcode;
5578 /* If we compare constants, we shouldn't use a store-flag operation,
5579 but a constant load. We can get there via the vanilla route that
5580 usually generates a compare-branch sequence, but will in this case
5581 fold the comparison to a constant, and thus elide the branch. */
5582 if (CONSTANT_P (op0) && CONSTANT_P (op1))
5585 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5590 /* If we reached here, we can't do this with a scc insn, however there
5591 are some comparisons that can be done in other ways. Don't do any
5592 of these cases if branches are very cheap. */
5593 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5596 /* See what we need to return. We can only return a 1, -1, or the
5599 if (normalizep == 0)
5601 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5602 normalizep = STORE_FLAG_VALUE;
5604 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5610 last = get_last_insn ();
5612 /* If optimizing, use different pseudo registers for each insn, instead
5613 of reusing the same pseudo. This leads to better CSE, but slows
5614 down the compiler, since there are more pseudos */
5615 subtarget = (!optimize
5616 && (target_mode == mode)) ? target : NULL_RTX;
5617 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5619 /* For floating-point comparisons, try the reverse comparison or try
5620 changing the "orderedness" of the comparison. */
5621 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5623 enum rtx_code first_code;
5626 rcode = reverse_condition_maybe_unordered (code);
5627 if (can_compare_p (rcode, mode, ccp_store_flag)
5628 && (code == ORDERED || code == UNORDERED
5629 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5630 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5632 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5633 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5635 /* For the reverse comparison, use either an addition or a XOR. */
5637 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5638 optimize_insn_for_speed_p ()) == 0)
5640 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5641 STORE_FLAG_VALUE, target_mode);
5643 return expand_binop (target_mode, add_optab, tem,
5644 gen_int_mode (normalizep, target_mode),
5645 target, 0, OPTAB_WIDEN);
5648 && rtx_cost (trueval, mode, XOR, 1,
5649 optimize_insn_for_speed_p ()) == 0)
5651 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5652 normalizep, target_mode);
5654 return expand_binop (target_mode, xor_optab, tem, trueval,
5655 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5659 delete_insns_since (last);
5661 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5662 if (code == ORDERED || code == UNORDERED)
5665 and_them = split_comparison (code, mode, &first_code, &code);
5667 /* If there are no NaNs, the first comparison should always fall through.
5668 Effectively change the comparison to the other one. */
5669 if (!HONOR_NANS (mode))
5671 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5672 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5676 if (!HAVE_conditional_move)
5679 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5680 conditional move. */
5681 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5682 normalizep, target_mode);
5687 tem = emit_conditional_move (target, code, op0, op1, mode,
5688 tem, const0_rtx, GET_MODE (tem), 0);
5690 tem = emit_conditional_move (target, code, op0, op1, mode,
5691 trueval, tem, GET_MODE (tem), 0);
5694 delete_insns_since (last);
5698 /* The remaining tricks only apply to integer comparisons. */
5700 if (GET_MODE_CLASS (mode) != MODE_INT)
5703 /* If this is an equality comparison of integers, we can try to exclusive-or
5704 (or subtract) the two operands and use a recursive call to try the
5705 comparison with zero. Don't do any of these cases if branches are
5708 if ((code == EQ || code == NE) && op1 != const0_rtx)
5710 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5714 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5717 tem = emit_store_flag (target, code, tem, const0_rtx,
5718 mode, unsignedp, normalizep);
5722 delete_insns_since (last);
5725 /* For integer comparisons, try the reverse comparison. However, for
5726 small X and if we'd have anyway to extend, implementing "X != 0"
5727 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5728 rcode = reverse_condition (code);
5729 if (can_compare_p (rcode, mode, ccp_store_flag)
5730 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5732 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5733 && op1 == const0_rtx))
5735 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5736 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5738 /* Again, for the reverse comparison, use either an addition or a XOR. */
5740 && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5741 optimize_insn_for_speed_p ()) == 0)
5743 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5744 STORE_FLAG_VALUE, target_mode);
5746 tem = expand_binop (target_mode, add_optab, tem,
5747 gen_int_mode (normalizep, target_mode),
5748 target, 0, OPTAB_WIDEN);
5751 && rtx_cost (trueval, mode, XOR, 1,
5752 optimize_insn_for_speed_p ()) == 0)
5754 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5755 normalizep, target_mode);
5757 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5758 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5763 delete_insns_since (last);
5766 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5767 the constant zero. Reject all other comparisons at this point. Only
5768 do LE and GT if branches are expensive since they are expensive on
5769 2-operand machines. */
5771 if (op1 != const0_rtx
5772 || (code != EQ && code != NE
5773 && (BRANCH_COST (optimize_insn_for_speed_p (),
5774 false) <= 1 || (code != LE && code != GT))))
5777 /* Try to put the result of the comparison in the sign bit. Assume we can't
5778 do the necessary operation below. */
5782 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5783 the sign bit set. */
5787 /* This is destructive, so SUBTARGET can't be OP0. */
5788 if (rtx_equal_p (subtarget, op0))
5791 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5794 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5798 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5799 number of bits in the mode of OP0, minus one. */
5803 if (rtx_equal_p (subtarget, op0))
5806 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5807 GET_MODE_BITSIZE (mode) - 1,
5809 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5813 if (code == EQ || code == NE)
5815 /* For EQ or NE, one way to do the comparison is to apply an operation
5816 that converts the operand into a positive number if it is nonzero
5817 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5818 for NE we negate. This puts the result in the sign bit. Then we
5819 normalize with a shift, if needed.
5821 Two operations that can do the above actions are ABS and FFS, so try
5822 them. If that doesn't work, and MODE is smaller than a full word,
5823 we can use zero-extension to the wider mode (an unsigned conversion)
5824 as the operation. */
5826 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5827 that is compensated by the subsequent overflow when subtracting
5830 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5831 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5832 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5833 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5834 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5836 tem = convert_modes (word_mode, mode, op0, 1);
5843 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5846 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5849 /* If we couldn't do it that way, for NE we can "or" the two's complement
5850 of the value with itself. For EQ, we take the one's complement of
5851 that "or", which is an extra insn, so we only handle EQ if branches
5856 || BRANCH_COST (optimize_insn_for_speed_p (),
5859 if (rtx_equal_p (subtarget, op0))
5862 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5863 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5866 if (tem && code == EQ)
5867 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5871 if (tem && normalizep)
5872 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5873 GET_MODE_BITSIZE (mode) - 1,
5874 subtarget, normalizep == 1);
5880 else if (GET_MODE (tem) != target_mode)
5882 convert_move (target, tem, 0);
5885 else if (!subtarget)
5887 emit_move_insn (target, tem);
5892 delete_insns_since (last);
5897 /* Like emit_store_flag, but always succeeds. */
5900 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5901 machine_mode mode, int unsignedp, int normalizep)
5904 rtx_code_label *label;
5905 rtx trueval, falseval;
5907 /* First see if emit_store_flag can do the job. */
5908 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5913 target = gen_reg_rtx (word_mode);
5915 /* If this failed, we have to do this with set/compare/jump/set code.
5916 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5917 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5919 && GET_MODE_CLASS (mode) == MODE_INT
5922 && op1 == const0_rtx)
5924 label = gen_label_rtx ();
5925 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5926 NULL_RTX, NULL, label, -1);
5927 emit_move_insn (target, trueval);
5933 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5934 target = gen_reg_rtx (GET_MODE (target));
5936 /* Jump in the right direction if the target cannot implement CODE
5937 but can jump on its reverse condition. */
5938 falseval = const0_rtx;
5939 if (! can_compare_p (code, mode, ccp_jump)
5940 && (! FLOAT_MODE_P (mode)
5941 || code == ORDERED || code == UNORDERED
5942 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5943 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5945 enum rtx_code rcode;
5946 if (FLOAT_MODE_P (mode))
5947 rcode = reverse_condition_maybe_unordered (code);
5949 rcode = reverse_condition (code);
5951 /* Canonicalize to UNORDERED for the libcall. */
5952 if (can_compare_p (rcode, mode, ccp_jump)
5953 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5956 trueval = const0_rtx;
5961 emit_move_insn (target, trueval);
5962 label = gen_label_rtx ();
5963 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5966 emit_move_insn (target, falseval);
5972 /* Perform possibly multi-word comparison and conditional jump to LABEL
5973 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5974 now a thin wrapper around do_compare_rtx_and_jump. */
5977 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5978 rtx_code_label *label)
5980 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5981 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,