gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987-2016 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "backend.h"
  26 #include "target.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "predict.h"
  30 #include "tm_p.h"
  31 #include "expmed.h"
  32 #include "optabs.h"
  33 #include "emit-rtl.h"
  34 #include "diagnostic-core.h"
  35 #include "fold-const.h"
  36 #include "stor-layout.h"
  37 #include "dojump.h"
  38 #include "explow.h"
  39 #include "expr.h"
  40 #include "langhooks.h"
  41
  42 struct target_expmed default_target_expmed;
  43 #if SWITCHABLE_TARGET
  44 struct target_expmed *this_target_expmed = &default_target_expmed;
  45 #endif
  46
  47 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  48                                    unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    rtx, bool);
  52 static void store_fixed_bit_field_1 (rtx, unsigned HOST_WIDE_INT,
  53                                      unsigned HOST_WIDE_INT,
  54                                      rtx, bool);
  55 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    unsigned HOST_WIDE_INT,
  59                                    rtx, bool);
  60 static rtx extract_fixed_bit_field (machine_mode, rtx,
  61                                     unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  63 static rtx extract_fixed_bit_field_1 (machine_mode, rtx,
  64                                       unsigned HOST_WIDE_INT,
  65                                       unsigned HOST_WIDE_INT, rtx, int, bool);
  66 static rtx lshift_value (machine_mode, unsigned HOST_WIDE_INT, int);
  67 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  68                                     unsigned HOST_WIDE_INT, int, bool);
  69 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, machine_mode, rtx_code_label *);
  70 static rtx expand_smod_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  71 static rtx expand_sdiv_pow2 (machine_mode, rtx, HOST_WIDE_INT);
  72
  73 /* Return a constant integer mask value of mode MODE with BITSIZE ones
  74    followed by BITPOS zeros, or the complement of that if COMPLEMENT.
  75    The mask is truncated if necessary to the width of mode MODE.  The
  76    mask is zero-extended if BITSIZE+BITPOS is too small for MODE.  */
  77
  78 static inline rtx
  79 mask_rtx (machine_mode mode, int bitpos, int bitsize, bool complement)
  80 {
  81   return immed_wide_int_const
  82     (wi::shifted_mask (bitpos, bitsize, complement,
  83                        GET_MODE_PRECISION (mode)), mode);
  84 }
  85
  86 /* Test whether a value is zero of a power of two.  */
  87 #define EXACT_POWER_OF_2_OR_ZERO_P(x) \
  88   (((x) & ((x) - (unsigned HOST_WIDE_INT) 1)) == 0)
  89
  90 struct init_expmed_rtl
  91 {
  92   rtx reg;
  93   rtx plus;
  94   rtx neg;
  95   rtx mult;
  96   rtx sdiv;
  97   rtx udiv;
  98   rtx sdiv_32;
  99   rtx smod_32;
 100   rtx wide_mult;
 101   rtx wide_lshr;
 102   rtx wide_trunc;
 103   rtx shift;
 104   rtx shift_mult;
 105   rtx shift_add;
 106   rtx shift_sub0;
 107   rtx shift_sub1;
 108   rtx zext;
 109   rtx trunc;
 110
 111   rtx pow2[MAX_BITS_PER_WORD];
 112   rtx cint[MAX_BITS_PER_WORD];
 113 };
 114
 115 static void
 116 init_expmed_one_conv (struct init_expmed_rtl *all, machine_mode to_mode,
 117                       machine_mode from_mode, bool speed)
 118 {
 119   int to_size, from_size;
 120   rtx which;
 121
 122   to_size = GET_MODE_PRECISION (to_mode);
 123   from_size = GET_MODE_PRECISION (from_mode);
 124
 125   /* Most partial integers have a precision less than the "full"
 126      integer it requires for storage.  In case one doesn't, for
 127      comparison purposes here, reduce the bit size by one in that
 128      case.  */
 129   if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT
 130       && exact_log2 (to_size) != -1)
 131     to_size --;
 132   if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT
 133       && exact_log2 (from_size) != -1)
 134     from_size --;
 135
 136   /* Assume cost of zero-extend and sign-extend is the same.  */
 137   which = (to_size < from_size ? all->trunc : all->zext);
 138
 139   PUT_MODE (all->reg, from_mode);
 140   set_convert_cost (to_mode, from_mode, speed,
 141                     set_src_cost (which, to_mode, speed));
 142 }
 143
 144 static void
 145 init_expmed_one_mode (struct init_expmed_rtl *all,
 146                       machine_mode mode, int speed)
 147 {
 148   int m, n, mode_bitsize;
 149   machine_mode mode_from;
 150
 151   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 152
 153   PUT_MODE (all->reg, mode);
 154   PUT_MODE (all->plus, mode);
 155   PUT_MODE (all->neg, mode);
 156   PUT_MODE (all->mult, mode);
 157   PUT_MODE (all->sdiv, mode);
 158   PUT_MODE (all->udiv, mode);
 159   PUT_MODE (all->sdiv_32, mode);
 160   PUT_MODE (all->smod_32, mode);
 161   PUT_MODE (all->wide_trunc, mode);
 162   PUT_MODE (all->shift, mode);
 163   PUT_MODE (all->shift_mult, mode);
 164   PUT_MODE (all->shift_add, mode);
 165   PUT_MODE (all->shift_sub0, mode);
 166   PUT_MODE (all->shift_sub1, mode);
 167   PUT_MODE (all->zext, mode);
 168   PUT_MODE (all->trunc, mode);
 169
 170   set_add_cost (speed, mode, set_src_cost (all->plus, mode, speed));
 171   set_neg_cost (speed, mode, set_src_cost (all->neg, mode, speed));
 172   set_mul_cost (speed, mode, set_src_cost (all->mult, mode, speed));
 173   set_sdiv_cost (speed, mode, set_src_cost (all->sdiv, mode, speed));
 174   set_udiv_cost (speed, mode, set_src_cost (all->udiv, mode, speed));
 175
 176   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (all->sdiv_32, mode, speed)
 177                                      <= 2 * add_cost (speed, mode)));
 178   set_smod_pow2_cheap (speed, mode, (set_src_cost (all->smod_32, mode, speed)
 179                                      <= 4 * add_cost (speed, mode)));
 180
 181   set_shift_cost (speed, mode, 0, 0);
 182   {
 183     int cost = add_cost (speed, mode);
 184     set_shiftadd_cost (speed, mode, 0, cost);
 185     set_shiftsub0_cost (speed, mode, 0, cost);
 186     set_shiftsub1_cost (speed, mode, 0, cost);
 187   }
 188
 189   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 190   for (m = 1; m < n; m++)
 191     {
 192       XEXP (all->shift, 1) = all->cint[m];
 193       XEXP (all->shift_mult, 1) = all->pow2[m];
 194
 195       set_shift_cost (speed, mode, m, set_src_cost (all->shift, mode, speed));
 196       set_shiftadd_cost (speed, mode, m, set_src_cost (all->shift_add, mode,
 197                                                        speed));
 198       set_shiftsub0_cost (speed, mode, m, set_src_cost (all->shift_sub0, mode,
 199                                                         speed));
 200       set_shiftsub1_cost (speed, mode, m, set_src_cost (all->shift_sub1, mode,
 201                                                         speed));
 202     }
 203
 204   if (SCALAR_INT_MODE_P (mode))
 205     {
 206       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 207            mode_from = (machine_mode)(mode_from + 1))
 208         init_expmed_one_conv (all, mode, mode_from, speed);
 209     }
 210   if (GET_MODE_CLASS (mode) == MODE_INT)
 211     {
 212       machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 213       if (wider_mode != VOIDmode)
 214         {
 215           PUT_MODE (all->zext, wider_mode);
 216           PUT_MODE (all->wide_mult, wider_mode);
 217           PUT_MODE (all->wide_lshr, wider_mode);
 218           XEXP (all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 219
 220           set_mul_widen_cost (speed, wider_mode,
 221                               set_src_cost (all->wide_mult, wider_mode, speed));
 222           set_mul_highpart_cost (speed, mode,
 223                                  set_src_cost (all->wide_trunc, mode, speed));
 224         }
 225     }
 226 }
 227
 228 void
 229 init_expmed (void)
 230 {
 231   struct init_expmed_rtl all;
 232   machine_mode mode = QImode;
 233   int m, speed;
 234
 235   memset (&all, 0, sizeof all);
 236   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 237     {
 238       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 239       all.cint[m] = GEN_INT (m);
 240     }
 241
 242   /* Avoid using hard regs in ways which may be unsupported.  */
 243   all.reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
 244   all.plus = gen_rtx_PLUS (mode, all.reg, all.reg);
 245   all.neg = gen_rtx_NEG (mode, all.reg);
 246   all.mult = gen_rtx_MULT (mode, all.reg, all.reg);
 247   all.sdiv = gen_rtx_DIV (mode, all.reg, all.reg);
 248   all.udiv = gen_rtx_UDIV (mode, all.reg, all.reg);
 249   all.sdiv_32 = gen_rtx_DIV (mode, all.reg, all.pow2[5]);
 250   all.smod_32 = gen_rtx_MOD (mode, all.reg, all.pow2[5]);
 251   all.zext = gen_rtx_ZERO_EXTEND (mode, all.reg);
 252   all.wide_mult = gen_rtx_MULT (mode, all.zext, all.zext);
 253   all.wide_lshr = gen_rtx_LSHIFTRT (mode, all.wide_mult, all.reg);
 254   all.wide_trunc = gen_rtx_TRUNCATE (mode, all.wide_lshr);
 255   all.shift = gen_rtx_ASHIFT (mode, all.reg, all.reg);
 256   all.shift_mult = gen_rtx_MULT (mode, all.reg, all.reg);
 257   all.shift_add = gen_rtx_PLUS (mode, all.shift_mult, all.reg);
 258   all.shift_sub0 = gen_rtx_MINUS (mode, all.shift_mult, all.reg);
 259   all.shift_sub1 = gen_rtx_MINUS (mode, all.reg, all.shift_mult);
 260   all.trunc = gen_rtx_TRUNCATE (mode, all.reg);
 261
 262   for (speed = 0; speed < 2; speed++)
 263     {
 264       crtl->maybe_hot_insn_p = speed;
 265       set_zero_cost (speed, set_src_cost (const0_rtx, mode, speed));
 266
 267       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 268            mode = (machine_mode)(mode + 1))
 269         init_expmed_one_mode (&all, mode, speed);
 270
 271       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 272         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 273              mode = (machine_mode)(mode + 1))
 274           init_expmed_one_mode (&all, mode, speed);
 275
 276       if (MIN_MODE_VECTOR_INT != VOIDmode)
 277         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 278              mode = (machine_mode)(mode + 1))
 279           init_expmed_one_mode (&all, mode, speed);
 280     }
 281
 282   if (alg_hash_used_p ())
 283     {
 284       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 285       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 286     }
 287   else
 288     set_alg_hash_used_p (true);
 289   default_rtl_profile ();
 290
 291   ggc_free (all.trunc);
 292   ggc_free (all.shift_sub1);
 293   ggc_free (all.shift_sub0);
 294   ggc_free (all.shift_add);
 295   ggc_free (all.shift_mult);
 296   ggc_free (all.shift);
 297   ggc_free (all.wide_trunc);
 298   ggc_free (all.wide_lshr);
 299   ggc_free (all.wide_mult);
 300   ggc_free (all.zext);
 301   ggc_free (all.smod_32);
 302   ggc_free (all.sdiv_32);
 303   ggc_free (all.udiv);
 304   ggc_free (all.sdiv);
 305   ggc_free (all.mult);
 306   ggc_free (all.neg);
 307   ggc_free (all.plus);
 308   ggc_free (all.reg);
 309 }
 310
 311 /* Return an rtx representing minus the value of X.
 312    MODE is the intended mode of the result,
 313    useful if X is a CONST_INT.  */
 314
 315 rtx
 316 negate_rtx (machine_mode mode, rtx x)
 317 {
 318   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 319
 320   if (result == 0)
 321     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 322
 323   return result;
 324 }
 325
 326 /* Whether reverse storage order is supported on the target.  */
 327 static int reverse_storage_order_supported = -1;
 328
 329 /* Check whether reverse storage order is supported on the target.  */
 330
 331 static void
 332 check_reverse_storage_order_support (void)
 333 {
 334   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 335     {
 336       reverse_storage_order_supported = 0;
 337       sorry ("reverse scalar storage order");
 338     }
 339   else
 340     reverse_storage_order_supported = 1;
 341 }
 342
 343 /* Whether reverse FP storage order is supported on the target.  */
 344 static int reverse_float_storage_order_supported = -1;
 345
 346 /* Check whether reverse FP storage order is supported on the target.  */
 347
 348 static void
 349 check_reverse_float_storage_order_support (void)
 350 {
 351   if (FLOAT_WORDS_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 352     {
 353       reverse_float_storage_order_supported = 0;
 354       sorry ("reverse floating-point scalar storage order");
 355     }
 356   else
 357     reverse_float_storage_order_supported = 1;
 358 }
 359
 360 /* Return an rtx representing value of X with reverse storage order.
 361    MODE is the intended mode of the result,
 362    useful if X is a CONST_INT.  */
 363
 364 rtx
 365 flip_storage_order (enum machine_mode mode, rtx x)
 366 {
 367   enum machine_mode int_mode;
 368   rtx result;
 369
 370   if (mode == QImode)
 371     return x;
 372
 373   if (COMPLEX_MODE_P (mode))
 374     {
 375       rtx real = read_complex_part (x, false);
 376       rtx imag = read_complex_part (x, true);
 377
 378       real = flip_storage_order (GET_MODE_INNER (mode), real);
 379       imag = flip_storage_order (GET_MODE_INNER (mode), imag);
 380
 381       return gen_rtx_CONCAT (mode, real, imag);
 382     }
 383
 384   if (__builtin_expect (reverse_storage_order_supported < 0, 0))
 385     check_reverse_storage_order_support ();
 386
 387   if (SCALAR_INT_MODE_P (mode))
 388     int_mode = mode;
 389   else
 390     {
 391       if (FLOAT_MODE_P (mode)
 392           && __builtin_expect (reverse_float_storage_order_supported < 0, 0))
 393         check_reverse_float_storage_order_support ();
 394
 395       int_mode = mode_for_size (GET_MODE_PRECISION (mode), MODE_INT, 0);
 396       if (int_mode == BLKmode)
 397         {
 398           sorry ("reverse storage order for %smode", GET_MODE_NAME (mode));
 399           return x;
 400         }
 401       x = gen_lowpart (int_mode, x);
 402     }
 403
 404   result = simplify_unary_operation (BSWAP, int_mode, x, int_mode);
 405   if (result == 0)
 406     result = expand_unop (int_mode, bswap_optab, x, NULL_RTX, 1);
 407
 408   if (int_mode != mode)
 409     result = gen_lowpart (mode, result);
 410
 411   return result;
 412 }
 413
 414 /* Adjust bitfield memory MEM so that it points to the first unit of mode
 415    MODE that contains a bitfield of size BITSIZE at bit position BITNUM.
 416    If MODE is BLKmode, return a reference to every byte in the bitfield.
 417    Set *NEW_BITNUM to the bit position of the field within the new memory.  */
 418
 419 static rtx
 420 narrow_bit_field_mem (rtx mem, machine_mode mode,
 421                       unsigned HOST_WIDE_INT bitsize,
 422                       unsigned HOST_WIDE_INT bitnum,
 423                       unsigned HOST_WIDE_INT *new_bitnum)
 424 {
 425   if (mode == BLKmode)
 426     {
 427       *new_bitnum = bitnum % BITS_PER_UNIT;
 428       HOST_WIDE_INT offset = bitnum / BITS_PER_UNIT;
 429       HOST_WIDE_INT size = ((*new_bitnum + bitsize + BITS_PER_UNIT - 1)
 430                             / BITS_PER_UNIT);
 431       return adjust_bitfield_address_size (mem, mode, offset, size);
 432     }
 433   else
 434     {
 435       unsigned int unit = GET_MODE_BITSIZE (mode);
 436       *new_bitnum = bitnum % unit;
 437       HOST_WIDE_INT offset = (bitnum - *new_bitnum) / BITS_PER_UNIT;
 438       return adjust_bitfield_address (mem, mode, offset);
 439     }
 440 }
 441
 442 /* The caller wants to perform insertion or extraction PATTERN on a
 443    bitfield of size BITSIZE at BITNUM bits into memory operand OP0.
 444    BITREGION_START and BITREGION_END are as for store_bit_field
 445    and FIELDMODE is the natural mode of the field.
 446
 447    Search for a mode that is compatible with the memory access
 448    restrictions and (where applicable) with a register insertion or
 449    extraction.  Return the new memory on success, storing the adjusted
 450    bit position in *NEW_BITNUM.  Return null otherwise.  */
 451
 452 static rtx
 453 adjust_bit_field_mem_for_reg (enum extraction_pattern pattern,
 454                               rtx op0, HOST_WIDE_INT bitsize,
 455                               HOST_WIDE_INT bitnum,
 456                               unsigned HOST_WIDE_INT bitregion_start,
 457                               unsigned HOST_WIDE_INT bitregion_end,
 458                               machine_mode fieldmode,
 459                               unsigned HOST_WIDE_INT *new_bitnum)
 460 {
 461   bit_field_mode_iterator iter (bitsize, bitnum, bitregion_start,
 462                                 bitregion_end, MEM_ALIGN (op0),
 463                                 MEM_VOLATILE_P (op0));
 464   machine_mode best_mode;
 465   if (iter.next_mode (&best_mode))
 466     {
 467       /* We can use a memory in BEST_MODE.  See whether this is true for
 468          any wider modes.  All other things being equal, we prefer to
 469          use the widest mode possible because it tends to expose more
 470          CSE opportunities.  */
 471       if (!iter.prefer_smaller_modes ())
 472         {
 473           /* Limit the search to the mode required by the corresponding
 474              register insertion or extraction instruction, if any.  */
 475           machine_mode limit_mode = word_mode;
 476           extraction_insn insn;
 477           if (get_best_reg_extraction_insn (&insn, pattern,
 478                                             GET_MODE_BITSIZE (best_mode),
 479                                             fieldmode))
 480             limit_mode = insn.field_mode;
 481
 482           machine_mode wider_mode;
 483           while (iter.next_mode (&wider_mode)
 484                  && GET_MODE_SIZE (wider_mode) <= GET_MODE_SIZE (limit_mode))
 485             best_mode = wider_mode;
 486         }
 487       return narrow_bit_field_mem (op0, best_mode, bitsize, bitnum,
 488                                    new_bitnum);
 489     }
 490   return NULL_RTX;
 491 }
 492
 493 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 494    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 495    offset is then BITNUM / BITS_PER_UNIT.  */
 496
 497 static bool
 498 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 499                      unsigned HOST_WIDE_INT bitsize,
 500                      machine_mode struct_mode)
 501 {
 502   if (BYTES_BIG_ENDIAN)
 503     return (bitnum % BITS_PER_UNIT == 0
 504             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 505                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 506   else
 507     return bitnum % BITS_PER_WORD == 0;
 508 }
 509
 510 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
 511    containing BITSIZE bits starting at BITNUM, with field mode FIELDMODE.
 512    Return false if the access would touch memory outside the range
 513    BITREGION_START to BITREGION_END for conformance to the C++ memory
 514    model.  */
 515
 516 static bool
 517 strict_volatile_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 518                             unsigned HOST_WIDE_INT bitnum,
 519                             machine_mode fieldmode,
 520                             unsigned HOST_WIDE_INT bitregion_start,
 521                             unsigned HOST_WIDE_INT bitregion_end)
 522 {
 523   unsigned HOST_WIDE_INT modesize = GET_MODE_BITSIZE (fieldmode);
 524
 525   /* -fstrict-volatile-bitfields must be enabled and we must have a
 526      volatile MEM.  */
 527   if (!MEM_P (op0)
 528       || !MEM_VOLATILE_P (op0)
 529       || flag_strict_volatile_bitfields <= 0)
 530     return false;
 531
 532   /* Non-integral modes likely only happen with packed structures.
 533      Punt.  */
 534   if (!SCALAR_INT_MODE_P (fieldmode))
 535     return false;
 536
 537   /* The bit size must not be larger than the field mode, and
 538      the field mode must not be larger than a word.  */
 539   if (bitsize > modesize || modesize > BITS_PER_WORD)
 540     return false;
 541
 542   /* Check for cases of unaligned fields that must be split.  */
 543   if (bitnum % modesize + bitsize > modesize)
 544     return false;
 545
 546   /* The memory must be sufficiently aligned for a MODESIZE access.
 547      This condition guarantees, that the memory access will not
 548      touch anything after the end of the structure.  */
 549   if (MEM_ALIGN (op0) < modesize)
 550     return false;
 551
 552   /* Check for cases where the C++ memory model applies.  */
 553   if (bitregion_end != 0
 554       && (bitnum - bitnum % modesize < bitregion_start
 555           || bitnum - bitnum % modesize + modesize - 1 > bitregion_end))
 556     return false;
 557
 558   return true;
 559 }
 560
 561 /* Return true if OP is a memory and if a bitfield of size BITSIZE at
 562    bit number BITNUM can be treated as a simple value of mode MODE.  */
 563
 564 static bool
 565 simple_mem_bitfield_p (rtx op0, unsigned HOST_WIDE_INT bitsize,
 566                        unsigned HOST_WIDE_INT bitnum, machine_mode mode)
 567 {
 568   return (MEM_P (op0)
 569           && bitnum % BITS_PER_UNIT == 0
 570           && bitsize == GET_MODE_BITSIZE (mode)
 571           && (!SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
 572               || (bitnum % GET_MODE_ALIGNMENT (mode) == 0
 573                   && MEM_ALIGN (op0) >= GET_MODE_ALIGNMENT (mode))));
 574 }
 575 \f
 576 /* Try to use instruction INSV to store VALUE into a field of OP0.
 577    BITSIZE and BITNUM are as for store_bit_field.  */
 578
 579 static bool
 580 store_bit_field_using_insv (const extraction_insn *insv, rtx op0,
 581                             unsigned HOST_WIDE_INT bitsize,
 582                             unsigned HOST_WIDE_INT bitnum,
 583                             rtx value)
 584 {
 585   struct expand_operand ops[4];
 586   rtx value1;
 587   rtx xop0 = op0;
 588   rtx_insn *last = get_last_insn ();
 589   bool copy_back = false;
 590
 591   machine_mode op_mode = insv->field_mode;
 592   unsigned int unit = GET_MODE_BITSIZE (op_mode);
 593   if (bitsize == 0 || bitsize > unit)
 594     return false;
 595
 596   if (MEM_P (xop0))
 597     /* Get a reference to the first byte of the field.  */
 598     xop0 = narrow_bit_field_mem (xop0, insv->struct_mode, bitsize, bitnum,
 599                                  &bitnum);
 600   else
 601     {
 602       /* Convert from counting within OP0 to counting in OP_MODE.  */
 603       if (BYTES_BIG_ENDIAN)
 604         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 605
 606       /* If xop0 is a register, we need it in OP_MODE
 607          to make it acceptable to the format of insv.  */
 608       if (GET_CODE (xop0) == SUBREG)
 609         /* We can't just change the mode, because this might clobber op0,
 610            and we will need the original value of op0 if insv fails.  */
 611         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 612       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 613         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 614     }
 615
 616   /* If the destination is a paradoxical subreg such that we need a
 617      truncate to the inner mode, perform the insertion on a temporary and
 618      truncate the result to the original destination.  Note that we can't
 619      just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 620      X) 0)) is (reg:N X).  */
 621   if (GET_CODE (xop0) == SUBREG
 622       && REG_P (SUBREG_REG (xop0))
 623       && !TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 624                                          op_mode))
 625     {
 626       rtx tem = gen_reg_rtx (op_mode);
 627       emit_move_insn (tem, xop0);
 628       xop0 = tem;
 629       copy_back = true;
 630     }
 631
 632   /* There are similar overflow check at the start of store_bit_field_1,
 633      but that only check the situation where the field lies completely
 634      outside the register, while there do have situation where the field
 635      lies partialy in the register, we need to adjust bitsize for this
 636      partial overflow situation.  Without this fix, pr48335-2.c on big-endian
 637      will broken on those arch support bit insert instruction, like arm, aarch64
 638      etc.  */
 639   if (bitsize + bitnum > unit && bitnum < unit)
 640     {
 641       warning (OPT_Wextra, "write of %wu-bit data outside the bound of "
 642                "destination object, data truncated into %wu-bit",
 643                bitsize, unit - bitnum);
 644       bitsize = unit - bitnum;
 645     }
 646
 647   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 648      "backwards" from the size of the unit we are inserting into.
 649      Otherwise, we count bits from the most significant on a
 650      BYTES/BITS_BIG_ENDIAN machine.  */
 651
 652   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 653     bitnum = unit - bitsize - bitnum;
 654
 655   /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 656   value1 = value;
 657   if (GET_MODE (value) != op_mode)
 658     {
 659       if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 660         {
 661           rtx tmp;
 662           /* Optimization: Don't bother really extending VALUE
 663              if it has all the bits we will actually use.  However,
 664              if we must narrow it, be sure we do it correctly.  */
 665
 666           if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 667             {
 668               tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 669               if (! tmp)
 670                 tmp = simplify_gen_subreg (op_mode,
 671                                            force_reg (GET_MODE (value),
 672                                                       value1),
 673                                            GET_MODE (value), 0);
 674             }
 675           else
 676             {
 677               tmp = gen_lowpart_if_possible (op_mode, value1);
 678               if (! tmp)
 679                 tmp = gen_lowpart (op_mode, force_reg (GET_MODE (value),
 680                                                        value1));
 681             }
 682           value1 = tmp;
 683         }
 684       else if (CONST_INT_P (value))
 685         value1 = gen_int_mode (INTVAL (value), op_mode);
 686       else
 687         /* Parse phase is supposed to make VALUE's data type
 688            match that of the component reference, which is a type
 689            at least as wide as the field; so VALUE should have
 690            a mode that corresponds to that type.  */
 691         gcc_assert (CONSTANT_P (value));
 692     }
 693
 694   create_fixed_operand (&ops[0], xop0);
 695   create_integer_operand (&ops[1], bitsize);
 696   create_integer_operand (&ops[2], bitnum);
 697   create_input_operand (&ops[3], value1, op_mode);
 698   if (maybe_expand_insn (insv->icode, 4, ops))
 699     {
 700       if (copy_back)
 701         convert_move (op0, xop0, true);
 702       return true;
 703     }
 704   delete_insns_since (last);
 705   return false;
 706 }
 707
 708 /* A subroutine of store_bit_field, with the same arguments.  Return true
 709    if the operation could be implemented.
 710
 711    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 712    no other way of implementing the operation.  If FALLBACK_P is false,
 713    return false instead.  */
 714
 715 static bool
 716 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 717                    unsigned HOST_WIDE_INT bitnum,
 718                    unsigned HOST_WIDE_INT bitregion_start,
 719                    unsigned HOST_WIDE_INT bitregion_end,
 720                    machine_mode fieldmode,
 721                    rtx value, bool reverse, bool fallback_p)
 722 {
 723   rtx op0 = str_rtx;
 724   rtx orig_value;
 725
 726   while (GET_CODE (op0) == SUBREG)
 727     {
 728       /* The following line once was done only if WORDS_BIG_ENDIAN,
 729          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 730          meaningful at a much higher level; when structures are copied
 731          between memory and regs, the higher-numbered regs
 732          always get higher addresses.  */
 733       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 734       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 735       int byte_offset = 0;
 736
 737       /* Paradoxical subregs need special handling on big-endian machines.  */
 738       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 739         {
 740           int difference = inner_mode_size - outer_mode_size;
 741
 742           if (WORDS_BIG_ENDIAN)
 743             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 744           if (BYTES_BIG_ENDIAN)
 745             byte_offset += difference % UNITS_PER_WORD;
 746         }
 747       else
 748         byte_offset = SUBREG_BYTE (op0);
 749
 750       bitnum += byte_offset * BITS_PER_UNIT;
 751       op0 = SUBREG_REG (op0);
 752     }
 753
 754   /* No action is needed if the target is a register and if the field
 755      lies completely outside that register.  This can occur if the source
 756      code contains an out-of-bounds access to a small array.  */
 757   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 758     return true;
 759
 760   /* Use vec_set patterns for inserting parts of vectors whenever
 761      available.  */
 762   if (VECTOR_MODE_P (GET_MODE (op0))
 763       && !MEM_P (op0)
 764       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 765       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 766       && bitsize == GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
 767       && !(bitnum % GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
 768     {
 769       struct expand_operand ops[3];
 770       machine_mode outermode = GET_MODE (op0);
 771       machine_mode innermode = GET_MODE_INNER (outermode);
 772       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 773       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 774
 775       create_fixed_operand (&ops[0], op0);
 776       create_input_operand (&ops[1], value, innermode);
 777       create_integer_operand (&ops[2], pos);
 778       if (maybe_expand_insn (icode, 3, ops))
 779         return true;
 780     }
 781
 782   /* If the target is a register, overwriting the entire object, or storing
 783      a full-word or multi-word field can be done with just a SUBREG.  */
 784   if (!MEM_P (op0)
 785       && bitsize == GET_MODE_BITSIZE (fieldmode)
 786       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 787           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 788     {
 789       /* Use the subreg machinery either to narrow OP0 to the required
 790          words or to cope with mode punning between equal-sized modes.
 791          In the latter case, use subreg on the rhs side, not lhs.  */
 792       rtx sub;
 793
 794       if (bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
 795         {
 796           sub = simplify_gen_subreg (GET_MODE (op0), value, fieldmode, 0);
 797           if (sub)
 798             {
 799               if (reverse)
 800                 sub = flip_storage_order (GET_MODE (op0), sub);
 801               emit_move_insn (op0, sub);
 802               return true;
 803             }
 804         }
 805       else
 806         {
 807           sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 808                                      bitnum / BITS_PER_UNIT);
 809           if (sub)
 810             {
 811               if (reverse)
 812                 value = flip_storage_order (fieldmode, value);
 813               emit_move_insn (sub, value);
 814               return true;
 815             }
 816         }
 817     }
 818
 819   /* If the target is memory, storing any naturally aligned field can be
 820      done with a simple store.  For targets that support fast unaligned
 821      memory, any naturally sized, unit aligned field can be done directly.  */
 822   if (simple_mem_bitfield_p (op0, bitsize, bitnum, fieldmode))
 823     {
 824       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 825       if (reverse)
 826         value = flip_storage_order (fieldmode, value);
 827       emit_move_insn (op0, value);
 828       return true;
 829     }
 830
 831   /* Make sure we are playing with integral modes.  Pun with subregs
 832      if we aren't.  This must come after the entire register case above,
 833      since that case is valid for any mode.  The following cases are only
 834      valid for integral modes.  */
 835   {
 836     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 837     if (imode != GET_MODE (op0))
 838       {
 839         if (MEM_P (op0))
 840           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
 841         else
 842           {
 843             gcc_assert (imode != BLKmode);
 844             op0 = gen_lowpart (imode, op0);
 845           }
 846       }
 847   }
 848
 849   /* Storing an lsb-aligned field in a register
 850      can be done with a movstrict instruction.  */
 851
 852   if (!MEM_P (op0)
 853       && !reverse
 854       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 855       && bitsize == GET_MODE_BITSIZE (fieldmode)
 856       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 857     {
 858       struct expand_operand ops[2];
 859       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 860       rtx arg0 = op0;
 861       unsigned HOST_WIDE_INT subreg_off;
 862
 863       if (GET_CODE (arg0) == SUBREG)
 864         {
 865           /* Else we've got some float mode source being extracted into
 866              a different float mode destination -- this combination of
 867              subregs results in Severe Tire Damage.  */
 868           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 869                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 870                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 871           arg0 = SUBREG_REG (arg0);
 872         }
 873
 874       subreg_off = bitnum / BITS_PER_UNIT;
 875       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 876         {
 877           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 878
 879           create_fixed_operand (&ops[0], arg0);
 880           /* Shrink the source operand to FIELDMODE.  */
 881           create_convert_operand_to (&ops[1], value, fieldmode, false);
 882           if (maybe_expand_insn (icode, 2, ops))
 883             return true;
 884         }
 885     }
 886
 887   /* Handle fields bigger than a word.  */
 888
 889   if (bitsize > BITS_PER_WORD)
 890     {
 891       /* Here we transfer the words of the field
 892          in the order least significant first.
 893          This is because the most significant word is the one which may
 894          be less than full.
 895          However, only do that if the value is not BLKmode.  */
 896
 897       const bool backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 898       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 899       unsigned int i;
 900       rtx_insn *last;
 901
 902       /* This is the mode we must force value to, so that there will be enough
 903          subwords to extract.  Note that fieldmode will often (always?) be
 904          VOIDmode, because that is what store_field uses to indicate that this
 905          is a bit field, but passing VOIDmode to operand_subword_force
 906          is not allowed.  */
 907       fieldmode = GET_MODE (value);
 908       if (fieldmode == VOIDmode)
 909         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 910
 911       last = get_last_insn ();
 912       for (i = 0; i < nwords; i++)
 913         {
 914           /* If I is 0, use the low-order word in both field and target;
 915              if I is 1, use the next to lowest word; and so on.  */
 916           unsigned int wordnum = (backwards
 917                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 918                                   - i - 1
 919                                   : i);
 920           unsigned int bit_offset = (backwards ^ reverse
 921                                      ? MAX ((int) bitsize - ((int) i + 1)
 922                                             * BITS_PER_WORD,
 923                                             0)
 924                                      : (int) i * BITS_PER_WORD);
 925           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 926           unsigned HOST_WIDE_INT new_bitsize =
 927             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 928
 929           /* If the remaining chunk doesn't have full wordsize we have
 930              to make sure that for big-endian machines the higher order
 931              bits are used.  */
 932           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 933             value_word = simplify_expand_binop (word_mode, lshr_optab,
 934                                                 value_word,
 935                                                 GEN_INT (BITS_PER_WORD
 936                                                          - new_bitsize),
 937                                                 NULL_RTX, true,
 938                                                 OPTAB_LIB_WIDEN);
 939
 940           if (!store_bit_field_1 (op0, new_bitsize,
 941                                   bitnum + bit_offset,
 942                                   bitregion_start, bitregion_end,
 943                                   word_mode,
 944                                   value_word, reverse, fallback_p))
 945             {
 946               delete_insns_since (last);
 947               return false;
 948             }
 949         }
 950       return true;
 951     }
 952
 953   /* If VALUE has a floating-point or complex mode, access it as an
 954      integer of the corresponding size.  This can occur on a machine
 955      with 64 bit registers that uses SFmode for float.  It can also
 956      occur for unaligned float or complex fields.  */
 957   orig_value = value;
 958   if (GET_MODE (value) != VOIDmode
 959       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 960       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 961     {
 962       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 963       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 964     }
 965
 966   /* If OP0 is a multi-word register, narrow it to the affected word.
 967      If the region spans two words, defer to store_split_bit_field.  */
 968   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 969     {
 970       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 971                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 972       gcc_assert (op0);
 973       bitnum %= BITS_PER_WORD;
 974       if (bitnum + bitsize > BITS_PER_WORD)
 975         {
 976           if (!fallback_p)
 977             return false;
 978
 979           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 980                                  bitregion_end, value, reverse);
 981           return true;
 982         }
 983     }
 984
 985   /* From here on we can assume that the field to be stored in fits
 986      within a word.  If the destination is a register, it too fits
 987      in a word.  */
 988
 989   extraction_insn insv;
 990   if (!MEM_P (op0)
 991       && !reverse
 992       && get_best_reg_extraction_insn (&insv, EP_insv,
 993                                        GET_MODE_BITSIZE (GET_MODE (op0)),
 994                                        fieldmode)
 995       && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
 996     return true;
 997
 998   /* If OP0 is a memory, try copying it to a register and seeing if a
 999      cheap register alternative is available.  */
1000   if (MEM_P (op0) && !reverse)
1001     {
1002       if (get_best_mem_extraction_insn (&insv, EP_insv, bitsize, bitnum,
1003                                         fieldmode)
1004           && store_bit_field_using_insv (&insv, op0, bitsize, bitnum, value))
1005         return true;
1006
1007       rtx_insn *last = get_last_insn ();
1008
1009       /* Try loading part of OP0 into a register, inserting the bitfield
1010          into that, and then copying the result back to OP0.  */
1011       unsigned HOST_WIDE_INT bitpos;
1012       rtx xop0 = adjust_bit_field_mem_for_reg (EP_insv, op0, bitsize, bitnum,
1013                                                bitregion_start, bitregion_end,
1014                                                fieldmode, &bitpos);
1015       if (xop0)
1016         {
1017           rtx tempreg = copy_to_reg (xop0);
1018           if (store_bit_field_1 (tempreg, bitsize, bitpos,
1019                                  bitregion_start, bitregion_end,
1020                                  fieldmode, orig_value, reverse, false))
1021             {
1022               emit_move_insn (xop0, tempreg);
1023               return true;
1024             }
1025           delete_insns_since (last);
1026         }
1027     }
1028
1029   if (!fallback_p)
1030     return false;
1031
1032   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
1033                          bitregion_end, value, reverse);
1034   return true;
1035 }
1036
1037 /* Generate code to store value from rtx VALUE
1038    into a bit-field within structure STR_RTX
1039    containing BITSIZE bits starting at bit BITNUM.
1040
1041    BITREGION_START is bitpos of the first bitfield in this region.
1042    BITREGION_END is the bitpos of the ending bitfield in this region.
1043    These two fields are 0, if the C++ memory model does not apply,
1044    or we are not interested in keeping track of bitfield regions.
1045
1046    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
1047
1048    If REVERSE is true, the store is to be done in reverse order.  */
1049
1050 void
1051 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1052                  unsigned HOST_WIDE_INT bitnum,
1053                  unsigned HOST_WIDE_INT bitregion_start,
1054                  unsigned HOST_WIDE_INT bitregion_end,
1055                  machine_mode fieldmode,
1056                  rtx value, bool reverse)
1057 {
1058   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1059   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, fieldmode,
1060                                   bitregion_start, bitregion_end))
1061     {
1062       /* Storing of a full word can be done with a simple store.
1063          We know here that the field can be accessed with one single
1064          instruction.  For targets that support unaligned memory,
1065          an unaligned access may be necessary.  */
1066       if (bitsize == GET_MODE_BITSIZE (fieldmode))
1067         {
1068           str_rtx = adjust_bitfield_address (str_rtx, fieldmode,
1069                                              bitnum / BITS_PER_UNIT);
1070           if (reverse)
1071             value = flip_storage_order (fieldmode, value);
1072           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1073           emit_move_insn (str_rtx, value);
1074         }
1075       else
1076         {
1077           rtx temp;
1078
1079           str_rtx = narrow_bit_field_mem (str_rtx, fieldmode, bitsize, bitnum,
1080                                           &bitnum);
1081           gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (fieldmode));
1082           temp = copy_to_reg (str_rtx);
1083           if (!store_bit_field_1 (temp, bitsize, bitnum, 0, 0,
1084                                   fieldmode, value, reverse, true))
1085             gcc_unreachable ();
1086
1087           emit_move_insn (str_rtx, temp);
1088         }
1089
1090       return;
1091     }
1092
1093   /* Under the C++0x memory model, we must not touch bits outside the
1094      bit region.  Adjust the address to start at the beginning of the
1095      bit region.  */
1096   if (MEM_P (str_rtx) && bitregion_start > 0)
1097     {
1098       machine_mode bestmode;
1099       HOST_WIDE_INT offset, size;
1100
1101       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
1102
1103       offset = bitregion_start / BITS_PER_UNIT;
1104       bitnum -= bitregion_start;
1105       size = (bitnum + bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
1106       bitregion_end -= bitregion_start;
1107       bitregion_start = 0;
1108       bestmode = get_best_mode (bitsize, bitnum,
1109                                 bitregion_start, bitregion_end,
1110                                 MEM_ALIGN (str_rtx), VOIDmode,
1111                                 MEM_VOLATILE_P (str_rtx));
1112       str_rtx = adjust_bitfield_address_size (str_rtx, bestmode, offset, size);
1113     }
1114
1115   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
1116                           bitregion_start, bitregion_end,
1117                           fieldmode, value, reverse, true))
1118     gcc_unreachable ();
1119 }
1120 \f
1121 /* Use shifts and boolean operations to store VALUE into a bit field of
1122    width BITSIZE in OP0, starting at bit BITNUM.
1123
1124    If REVERSE is true, the store is to be done in reverse order.  */
1125
1126 static void
1127 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1128                        unsigned HOST_WIDE_INT bitnum,
1129                        unsigned HOST_WIDE_INT bitregion_start,
1130                        unsigned HOST_WIDE_INT bitregion_end,
1131                        rtx value, bool reverse)
1132 {
1133   /* There is a case not handled here:
1134      a structure with a known alignment of just a halfword
1135      and a field split across two aligned halfwords within the structure.
1136      Or likewise a structure with a known alignment of just a byte
1137      and a field split across two bytes.
1138      Such cases are not supposed to be able to occur.  */
1139
1140   if (MEM_P (op0))
1141     {
1142       machine_mode mode = GET_MODE (op0);
1143       if (GET_MODE_BITSIZE (mode) == 0
1144           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
1145         mode = word_mode;
1146       mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
1147                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
1148
1149       if (mode == VOIDmode)
1150         {
1151           /* The only way this should occur is if the field spans word
1152              boundaries.  */
1153           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
1154                                  bitregion_end, value, reverse);
1155           return;
1156         }
1157
1158       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1159     }
1160
1161   store_fixed_bit_field_1 (op0, bitsize, bitnum, value, reverse);
1162 }
1163
1164 /* Helper function for store_fixed_bit_field, stores
1165    the bit field always using the MODE of OP0.  */
1166
1167 static void
1168 store_fixed_bit_field_1 (rtx op0, unsigned HOST_WIDE_INT bitsize,
1169                          unsigned HOST_WIDE_INT bitnum,
1170                          rtx value, bool reverse)
1171 {
1172   machine_mode mode;
1173   rtx temp;
1174   int all_zero = 0;
1175   int all_one = 0;
1176
1177   mode = GET_MODE (op0);
1178   gcc_assert (SCALAR_INT_MODE_P (mode));
1179
1180   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1181      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
1182
1183   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1184     /* BITNUM is the distance between our msb
1185        and that of the containing datum.
1186        Convert it to the distance from the lsb.  */
1187     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1188
1189   /* Now BITNUM is always the distance between our lsb
1190      and that of OP0.  */
1191
1192   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
1193      we must first convert its mode to MODE.  */
1194
1195   if (CONST_INT_P (value))
1196     {
1197       unsigned HOST_WIDE_INT v = UINTVAL (value);
1198
1199       if (bitsize < HOST_BITS_PER_WIDE_INT)
1200         v &= ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1;
1201
1202       if (v == 0)
1203         all_zero = 1;
1204       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1205                 && v == ((unsigned HOST_WIDE_INT) 1 << bitsize) - 1)
1206                || (bitsize == HOST_BITS_PER_WIDE_INT
1207                    && v == (unsigned HOST_WIDE_INT) -1))
1208         all_one = 1;
1209
1210       value = lshift_value (mode, v, bitnum);
1211     }
1212   else
1213     {
1214       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1215                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1216
1217       if (GET_MODE (value) != mode)
1218         value = convert_to_mode (mode, value, 1);
1219
1220       if (must_and)
1221         value = expand_binop (mode, and_optab, value,
1222                               mask_rtx (mode, 0, bitsize, 0),
1223                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1224       if (bitnum > 0)
1225         value = expand_shift (LSHIFT_EXPR, mode, value,
1226                               bitnum, NULL_RTX, 1);
1227     }
1228
1229   if (reverse)
1230     value = flip_storage_order (mode, value);
1231
1232   /* Now clear the chosen bits in OP0,
1233      except that if VALUE is -1 we need not bother.  */
1234   /* We keep the intermediates in registers to allow CSE to combine
1235      consecutive bitfield assignments.  */
1236
1237   temp = force_reg (mode, op0);
1238
1239   if (! all_one)
1240     {
1241       rtx mask = mask_rtx (mode, bitnum, bitsize, 1);
1242       if (reverse)
1243         mask = flip_storage_order (mode, mask);
1244       temp = expand_binop (mode, and_optab, temp, mask,
1245                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1246       temp = force_reg (mode, temp);
1247     }
1248
1249   /* Now logical-or VALUE into OP0, unless it is zero.  */
1250
1251   if (! all_zero)
1252     {
1253       temp = expand_binop (mode, ior_optab, temp, value,
1254                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1255       temp = force_reg (mode, temp);
1256     }
1257
1258   if (op0 != temp)
1259     {
1260       op0 = copy_rtx (op0);
1261       emit_move_insn (op0, temp);
1262     }
1263 }
1264 \f
1265 /* Store a bit field that is split across multiple accessible memory objects.
1266
1267    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1268    BITSIZE is the field width; BITPOS the position of its first bit
1269    (within the word).
1270    VALUE is the value to store.
1271
1272    If REVERSE is true, the store is to be done in reverse order.
1273
1274    This does not yet handle fields wider than BITS_PER_WORD.  */
1275
1276 static void
1277 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1278                        unsigned HOST_WIDE_INT bitpos,
1279                        unsigned HOST_WIDE_INT bitregion_start,
1280                        unsigned HOST_WIDE_INT bitregion_end,
1281                        rtx value, bool reverse)
1282 {
1283   unsigned int unit, total_bits, bitsdone = 0;
1284
1285   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1286      much at a time.  */
1287   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1288     unit = BITS_PER_WORD;
1289   else
1290     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1291
1292   /* If OP0 is a memory with a mode, then UNIT must not be larger than
1293      OP0's mode as well.  Otherwise, store_fixed_bit_field will call us
1294      again, and we will mutually recurse forever.  */
1295   if (MEM_P (op0) && GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1296     unit = MIN (unit, GET_MODE_BITSIZE (GET_MODE (op0)));
1297
1298   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1299      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1300      that VALUE might be a floating-point constant.  */
1301   if (CONSTANT_P (value) && !CONST_INT_P (value))
1302     {
1303       rtx word = gen_lowpart_common (word_mode, value);
1304
1305       if (word && (value != word))
1306         value = word;
1307       else
1308         value = gen_lowpart_common (word_mode,
1309                                     force_reg (GET_MODE (value) != VOIDmode
1310                                                ? GET_MODE (value)
1311                                                : word_mode, value));
1312     }
1313
1314   total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1315
1316   while (bitsdone < bitsize)
1317     {
1318       unsigned HOST_WIDE_INT thissize;
1319       unsigned HOST_WIDE_INT thispos;
1320       unsigned HOST_WIDE_INT offset;
1321       rtx part, word;
1322
1323       offset = (bitpos + bitsdone) / unit;
1324       thispos = (bitpos + bitsdone) % unit;
1325
1326       /* When region of bytes we can touch is restricted, decrease
1327          UNIT close to the end of the region as needed.  If op0 is a REG
1328          or SUBREG of REG, don't do this, as there can't be data races
1329          on a register and we can expand shorter code in some cases.  */
1330       if (bitregion_end
1331           && unit > BITS_PER_UNIT
1332           && bitpos + bitsdone - thispos + unit > bitregion_end + 1
1333           && !REG_P (op0)
1334           && (GET_CODE (op0) != SUBREG || !REG_P (SUBREG_REG (op0))))
1335         {
1336           unit = unit / 2;
1337           continue;
1338         }
1339
1340       /* THISSIZE must not overrun a word boundary.  Otherwise,
1341          store_fixed_bit_field will call us again, and we will mutually
1342          recurse forever.  */
1343       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1344       thissize = MIN (thissize, unit - thispos);
1345
1346       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
1347         {
1348           /* Fetch successively less significant portions.  */
1349           if (CONST_INT_P (value))
1350             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1351                              >> (bitsize - bitsdone - thissize))
1352                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1353           /* Likewise, but the source is little-endian.  */
1354           else if (reverse)
1355             part = extract_fixed_bit_field (word_mode, value, thissize,
1356                                             bitsize - bitsdone - thissize,
1357                                             NULL_RTX, 1, false);
1358           else
1359             {
1360               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1361               /* The args are chosen so that the last part includes the
1362                  lsb.  Give extract_bit_field the value it needs (with
1363                  endianness compensation) to fetch the piece we want.  */
1364               part = extract_fixed_bit_field (word_mode, value, thissize,
1365                                               total_bits - bitsize + bitsdone,
1366                                               NULL_RTX, 1, false);
1367             }
1368         }
1369       else
1370         {
1371           /* Fetch successively more significant portions.  */
1372           if (CONST_INT_P (value))
1373             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1374                              >> bitsdone)
1375                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1376           /* Likewise, but the source is big-endian.  */
1377           else if (reverse)
1378             part = extract_fixed_bit_field (word_mode, value, thissize,
1379                                             total_bits - bitsdone - thissize,
1380                                             NULL_RTX, 1, false);
1381           else
1382             part = extract_fixed_bit_field (word_mode, value, thissize,
1383                                             bitsdone, NULL_RTX, 1, false);
1384         }
1385
1386       /* If OP0 is a register, then handle OFFSET here.
1387
1388          When handling multiword bitfields, extract_bit_field may pass
1389          down a word_mode SUBREG of a larger REG for a bitfield that actually
1390          crosses a word boundary.  Thus, for a SUBREG, we must find
1391          the current word starting from the base register.  */
1392       if (GET_CODE (op0) == SUBREG)
1393         {
1394           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD)
1395                             + (offset * unit / BITS_PER_WORD);
1396           machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1397           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1398             word = word_offset ? const0_rtx : op0;
1399           else
1400             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1401                                           GET_MODE (SUBREG_REG (op0)));
1402           offset &= BITS_PER_WORD / unit - 1;
1403         }
1404       else if (REG_P (op0))
1405         {
1406           machine_mode op0_mode = GET_MODE (op0);
1407           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1408             word = offset ? const0_rtx : op0;
1409           else
1410             word = operand_subword_force (op0, offset * unit / BITS_PER_WORD,
1411                                           GET_MODE (op0));
1412           offset &= BITS_PER_WORD / unit - 1;
1413         }
1414       else
1415         word = op0;
1416
1417       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1418          it is just an out-of-bounds access.  Ignore it.  */
1419       if (word != const0_rtx)
1420         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1421                                bitregion_start, bitregion_end, part,
1422                                reverse);
1423       bitsdone += thissize;
1424     }
1425 }
1426 \f
1427 /* A subroutine of extract_bit_field_1 that converts return value X
1428    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1429    to extract_bit_field.  */
1430
1431 static rtx
1432 convert_extracted_bit_field (rtx x, machine_mode mode,
1433                              machine_mode tmode, bool unsignedp)
1434 {
1435   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1436     return x;
1437
1438   /* If the x mode is not a scalar integral, first convert to the
1439      integer mode of that size and then access it as a floating-point
1440      value via a SUBREG.  */
1441   if (!SCALAR_INT_MODE_P (tmode))
1442     {
1443       machine_mode smode;
1444
1445       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1446       x = convert_to_mode (smode, x, unsignedp);
1447       x = force_reg (smode, x);
1448       return gen_lowpart (tmode, x);
1449     }
1450
1451   return convert_to_mode (tmode, x, unsignedp);
1452 }
1453
1454 /* Try to use an ext(z)v pattern to extract a field from OP0.
1455    Return the extracted value on success, otherwise return null.
1456    EXT_MODE is the mode of the extraction and the other arguments
1457    are as for extract_bit_field.  */
1458
1459 static rtx
1460 extract_bit_field_using_extv (const extraction_insn *extv, rtx op0,
1461                               unsigned HOST_WIDE_INT bitsize,
1462                               unsigned HOST_WIDE_INT bitnum,
1463                               int unsignedp, rtx target,
1464                               machine_mode mode, machine_mode tmode)
1465 {
1466   struct expand_operand ops[4];
1467   rtx spec_target = target;
1468   rtx spec_target_subreg = 0;
1469   machine_mode ext_mode = extv->field_mode;
1470   unsigned unit = GET_MODE_BITSIZE (ext_mode);
1471
1472   if (bitsize == 0 || unit < bitsize)
1473     return NULL_RTX;
1474
1475   if (MEM_P (op0))
1476     /* Get a reference to the first byte of the field.  */
1477     op0 = narrow_bit_field_mem (op0, extv->struct_mode, bitsize, bitnum,
1478                                 &bitnum);
1479   else
1480     {
1481       /* Convert from counting within OP0 to counting in EXT_MODE.  */
1482       if (BYTES_BIG_ENDIAN)
1483         bitnum += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1484
1485       /* If op0 is a register, we need it in EXT_MODE to make it
1486          acceptable to the format of ext(z)v.  */
1487       if (GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1488         return NULL_RTX;
1489       if (REG_P (op0) && GET_MODE (op0) != ext_mode)
1490         op0 = gen_lowpart_SUBREG (ext_mode, op0);
1491     }
1492
1493   /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1494      "backwards" from the size of the unit we are extracting from.
1495      Otherwise, we count bits from the most significant on a
1496      BYTES/BITS_BIG_ENDIAN machine.  */
1497
1498   if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1499     bitnum = unit - bitsize - bitnum;
1500
1501   if (target == 0)
1502     target = spec_target = gen_reg_rtx (tmode);
1503
1504   if (GET_MODE (target) != ext_mode)
1505     {
1506       /* Don't use LHS paradoxical subreg if explicit truncation is needed
1507          between the mode of the extraction (word_mode) and the target
1508          mode.  Instead, create a temporary and use convert_move to set
1509          the target.  */
1510       if (REG_P (target)
1511           && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (target), ext_mode))
1512         {
1513           target = gen_lowpart (ext_mode, target);
1514           if (GET_MODE_PRECISION (ext_mode)
1515               > GET_MODE_PRECISION (GET_MODE (spec_target)))
1516             spec_target_subreg = target;
1517         }
1518       else
1519         target = gen_reg_rtx (ext_mode);
1520     }
1521
1522   create_output_operand (&ops[0], target, ext_mode);
1523   create_fixed_operand (&ops[1], op0);
1524   create_integer_operand (&ops[2], bitsize);
1525   create_integer_operand (&ops[3], bitnum);
1526   if (maybe_expand_insn (extv->icode, 4, ops))
1527     {
1528       target = ops[0].value;
1529       if (target == spec_target)
1530         return target;
1531       if (target == spec_target_subreg)
1532         return spec_target;
1533       return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1534     }
1535   return NULL_RTX;
1536 }
1537
1538 /* A subroutine of extract_bit_field, with the same arguments.
1539    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1540    if we can find no other means of implementing the operation.
1541    if FALLBACK_P is false, return NULL instead.  */
1542
1543 static rtx
1544 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1545                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1546                      machine_mode mode, machine_mode tmode,
1547                      bool reverse, bool fallback_p)
1548 {
1549   rtx op0 = str_rtx;
1550   machine_mode int_mode;
1551   machine_mode mode1;
1552
1553   if (tmode == VOIDmode)
1554     tmode = mode;
1555
1556   while (GET_CODE (op0) == SUBREG)
1557     {
1558       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1559       op0 = SUBREG_REG (op0);
1560     }
1561
1562   /* If we have an out-of-bounds access to a register, just return an
1563      uninitialized register of the required mode.  This can occur if the
1564      source code contains an out-of-bounds access to a small array.  */
1565   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1566     return gen_reg_rtx (tmode);
1567
1568   if (REG_P (op0)
1569       && mode == GET_MODE (op0)
1570       && bitnum == 0
1571       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1572     {
1573       if (reverse)
1574         op0 = flip_storage_order (mode, op0);
1575       /* We're trying to extract a full register from itself.  */
1576       return op0;
1577     }
1578
1579   /* See if we can get a better vector mode before extracting.  */
1580   if (VECTOR_MODE_P (GET_MODE (op0))
1581       && !MEM_P (op0)
1582       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1583     {
1584       machine_mode new_mode;
1585
1586       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1587         new_mode = MIN_MODE_VECTOR_FLOAT;
1588       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1589         new_mode = MIN_MODE_VECTOR_FRACT;
1590       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1591         new_mode = MIN_MODE_VECTOR_UFRACT;
1592       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1593         new_mode = MIN_MODE_VECTOR_ACCUM;
1594       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1595         new_mode = MIN_MODE_VECTOR_UACCUM;
1596       else
1597         new_mode = MIN_MODE_VECTOR_INT;
1598
1599       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1600         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1601             && targetm.vector_mode_supported_p (new_mode))
1602           break;
1603       if (new_mode != VOIDmode)
1604         op0 = gen_lowpart (new_mode, op0);
1605     }
1606
1607   /* Use vec_extract patterns for extracting parts of vectors whenever
1608      available.  */
1609   if (VECTOR_MODE_P (GET_MODE (op0))
1610       && !MEM_P (op0)
1611       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1612       && ((bitnum + bitsize - 1) / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))
1613           == bitnum / GET_MODE_UNIT_BITSIZE (GET_MODE (op0))))
1614     {
1615       struct expand_operand ops[3];
1616       machine_mode outermode = GET_MODE (op0);
1617       machine_mode innermode = GET_MODE_INNER (outermode);
1618       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1619       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1620
1621       create_output_operand (&ops[0], target, innermode);
1622       create_input_operand (&ops[1], op0, outermode);
1623       create_integer_operand (&ops[2], pos);
1624       if (maybe_expand_insn (icode, 3, ops))
1625         {
1626           target = ops[0].value;
1627           if (GET_MODE (target) != mode)
1628             return gen_lowpart (tmode, target);
1629           return target;
1630         }
1631     }
1632
1633   /* Make sure we are playing with integral modes.  Pun with subregs
1634      if we aren't.  */
1635   {
1636     machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1637     if (imode != GET_MODE (op0))
1638       {
1639         if (MEM_P (op0))
1640           op0 = adjust_bitfield_address_size (op0, imode, 0, MEM_SIZE (op0));
1641         else if (imode != BLKmode)
1642           {
1643             op0 = gen_lowpart (imode, op0);
1644
1645             /* If we got a SUBREG, force it into a register since we
1646                aren't going to be able to do another SUBREG on it.  */
1647             if (GET_CODE (op0) == SUBREG)
1648               op0 = force_reg (imode, op0);
1649           }
1650         else if (REG_P (op0))
1651           {
1652             rtx reg, subreg;
1653             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1654                                             MODE_INT);
1655             reg = gen_reg_rtx (imode);
1656             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1657             emit_move_insn (subreg, op0);
1658             op0 = reg;
1659             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1660           }
1661         else
1662           {
1663             HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (op0));
1664             rtx mem = assign_stack_temp (GET_MODE (op0), size);
1665             emit_move_insn (mem, op0);
1666             op0 = adjust_bitfield_address_size (mem, BLKmode, 0, size);
1667           }
1668       }
1669   }
1670
1671   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1672      If that's wrong, the solution is to test for it and set TARGET to 0
1673      if needed.  */
1674
1675   /* Get the mode of the field to use for atomic access or subreg
1676      conversion.  */
1677   mode1 = mode;
1678   if (SCALAR_INT_MODE_P (tmode))
1679     {
1680       machine_mode try_mode = mode_for_size (bitsize,
1681                                                   GET_MODE_CLASS (tmode), 0);
1682       if (try_mode != BLKmode)
1683         mode1 = try_mode;
1684     }
1685   gcc_assert (mode1 != BLKmode);
1686
1687   /* Extraction of a full MODE1 value can be done with a subreg as long
1688      as the least significant bit of the value is the least significant
1689      bit of either OP0 or a word of OP0.  */
1690   if (!MEM_P (op0)
1691       && !reverse
1692       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1693       && bitsize == GET_MODE_BITSIZE (mode1)
1694       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1695     {
1696       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1697                                      bitnum / BITS_PER_UNIT);
1698       if (sub)
1699         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1700     }
1701
1702   /* Extraction of a full MODE1 value can be done with a load as long as
1703      the field is on a byte boundary and is sufficiently aligned.  */
1704   if (simple_mem_bitfield_p (op0, bitsize, bitnum, mode1))
1705     {
1706       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1707       if (reverse)
1708         op0 = flip_storage_order (mode1, op0);
1709       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1710     }
1711
1712   /* Handle fields bigger than a word.  */
1713
1714   if (bitsize > BITS_PER_WORD)
1715     {
1716       /* Here we transfer the words of the field
1717          in the order least significant first.
1718          This is because the most significant word is the one which may
1719          be less than full.  */
1720
1721       const bool backwards = WORDS_BIG_ENDIAN;
1722       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1723       unsigned int i;
1724       rtx_insn *last;
1725
1726       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1727         target = gen_reg_rtx (mode);
1728
1729       /* In case we're about to clobber a base register or something
1730          (see gcc.c-torture/execute/20040625-1.c).   */
1731       if (reg_mentioned_p (target, str_rtx))
1732         target = gen_reg_rtx (mode);
1733
1734       /* Indicate for flow that the entire target reg is being set.  */
1735       emit_clobber (target);
1736
1737       last = get_last_insn ();
1738       for (i = 0; i < nwords; i++)
1739         {
1740           /* If I is 0, use the low-order word in both field and target;
1741              if I is 1, use the next to lowest word; and so on.  */
1742           /* Word number in TARGET to use.  */
1743           unsigned int wordnum
1744             = (backwards
1745                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1746                : i);
1747           /* Offset from start of field in OP0.  */
1748           unsigned int bit_offset = (backwards ^ reverse
1749                                      ? MAX ((int) bitsize - ((int) i + 1)
1750                                             * BITS_PER_WORD,
1751                                             0)
1752                                      : (int) i * BITS_PER_WORD);
1753           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1754           rtx result_part
1755             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1756                                              bitsize - i * BITS_PER_WORD),
1757                                    bitnum + bit_offset, 1, target_part,
1758                                    mode, word_mode, reverse, fallback_p);
1759
1760           gcc_assert (target_part);
1761           if (!result_part)
1762             {
1763               delete_insns_since (last);
1764               return NULL;
1765             }
1766
1767           if (result_part != target_part)
1768             emit_move_insn (target_part, result_part);
1769         }
1770
1771       if (unsignedp)
1772         {
1773           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1774              need to be zero'd out.  */
1775           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1776             {
1777               unsigned int i, total_words;
1778
1779               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1780               for (i = nwords; i < total_words; i++)
1781                 emit_move_insn
1782                   (operand_subword (target,
1783                                     backwards ? total_words - i - 1 : i,
1784                                     1, VOIDmode),
1785                    const0_rtx);
1786             }
1787           return target;
1788         }
1789
1790       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1791       target = expand_shift (LSHIFT_EXPR, mode, target,
1792                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1793       return expand_shift (RSHIFT_EXPR, mode, target,
1794                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1795     }
1796
1797   /* If OP0 is a multi-word register, narrow it to the affected word.
1798      If the region spans two words, defer to extract_split_bit_field.  */
1799   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1800     {
1801       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1802                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1803       bitnum %= BITS_PER_WORD;
1804       if (bitnum + bitsize > BITS_PER_WORD)
1805         {
1806           if (!fallback_p)
1807             return NULL_RTX;
1808           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1809                                             reverse);
1810           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1811         }
1812     }
1813
1814   /* From here on we know the desired field is smaller than a word.
1815      If OP0 is a register, it too fits within a word.  */
1816   enum extraction_pattern pattern = unsignedp ? EP_extzv : EP_extv;
1817   extraction_insn extv;
1818   if (!MEM_P (op0)
1819       && !reverse
1820       /* ??? We could limit the structure size to the part of OP0 that
1821          contains the field, with appropriate checks for endianness
1822          and TRULY_NOOP_TRUNCATION.  */
1823       && get_best_reg_extraction_insn (&extv, pattern,
1824                                        GET_MODE_BITSIZE (GET_MODE (op0)),
1825                                        tmode))
1826     {
1827       rtx result = extract_bit_field_using_extv (&extv, op0, bitsize, bitnum,
1828                                                  unsignedp, target, mode,
1829                                                  tmode);
1830       if (result)
1831         return result;
1832     }
1833
1834   /* If OP0 is a memory, try copying it to a register and seeing if a
1835      cheap register alternative is available.  */
1836   if (MEM_P (op0) & !reverse)
1837     {
1838       if (get_best_mem_extraction_insn (&extv, pattern, bitsize, bitnum,
1839                                         tmode))
1840         {
1841           rtx result = extract_bit_field_using_extv (&extv, op0, bitsize,
1842                                                      bitnum, unsignedp,
1843                                                      target, mode,
1844                                                      tmode);
1845           if (result)
1846             return result;
1847         }
1848
1849       rtx_insn *last = get_last_insn ();
1850
1851       /* Try loading part of OP0 into a register and extracting the
1852          bitfield from that.  */
1853       unsigned HOST_WIDE_INT bitpos;
1854       rtx xop0 = adjust_bit_field_mem_for_reg (pattern, op0, bitsize, bitnum,
1855                                                0, 0, tmode, &bitpos);
1856       if (xop0)
1857         {
1858           xop0 = copy_to_reg (xop0);
1859           rtx result = extract_bit_field_1 (xop0, bitsize, bitpos,
1860                                             unsignedp, target,
1861                                             mode, tmode, reverse, false);
1862           if (result)
1863             return result;
1864           delete_insns_since (last);
1865         }
1866     }
1867
1868   if (!fallback_p)
1869     return NULL;
1870
1871   /* Find a correspondingly-sized integer field, so we can apply
1872      shifts and masks to it.  */
1873   int_mode = int_mode_for_mode (tmode);
1874   if (int_mode == BLKmode)
1875     int_mode = int_mode_for_mode (mode);
1876   /* Should probably push op0 out to memory and then do a load.  */
1877   gcc_assert (int_mode != BLKmode);
1878
1879   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum, target,
1880                                     unsignedp, reverse);
1881
1882   /* Complex values must be reversed piecewise, so we need to undo the global
1883      reversal, convert to the complex mode and reverse again.  */
1884   if (reverse && COMPLEX_MODE_P (tmode))
1885     {
1886       target = flip_storage_order (int_mode, target);
1887       target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1888       target = flip_storage_order (tmode, target);
1889     }
1890   else
1891     target = convert_extracted_bit_field (target, mode, tmode, unsignedp);
1892
1893   return target;
1894 }
1895
1896 /* Generate code to extract a byte-field from STR_RTX
1897    containing BITSIZE bits, starting at BITNUM,
1898    and put it in TARGET if possible (if TARGET is nonzero).
1899    Regardless of TARGET, we return the rtx for where the value is placed.
1900
1901    STR_RTX is the structure containing the byte (a REG or MEM).
1902    UNSIGNEDP is nonzero if this is an unsigned bit field.
1903    MODE is the natural mode of the field value once extracted.
1904    TMODE is the mode the caller would like the value to have;
1905    but the value may be returned with type MODE instead.
1906
1907    If REVERSE is true, the extraction is to be done in reverse order.
1908
1909    If a TARGET is specified and we can store in it at no extra cost,
1910    we do so, and return TARGET.
1911    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1912    if they are equally easy.  */
1913
1914 rtx
1915 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1916                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1917                    machine_mode mode, machine_mode tmode, bool reverse)
1918 {
1919   machine_mode mode1;
1920
1921   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
1922   if (GET_MODE_BITSIZE (GET_MODE (str_rtx)) > 0)
1923     mode1 = GET_MODE (str_rtx);
1924   else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1925     mode1 = GET_MODE (target);
1926   else
1927     mode1 = tmode;
1928
1929   if (strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, mode1, 0, 0))
1930     {
1931       /* Extraction of a full MODE1 value can be done with a simple load.
1932          We know here that the field can be accessed with one single
1933          instruction.  For targets that support unaligned memory,
1934          an unaligned access may be necessary.  */
1935       if (bitsize == GET_MODE_BITSIZE (mode1))
1936         {
1937           rtx result = adjust_bitfield_address (str_rtx, mode1,
1938                                                 bitnum / BITS_PER_UNIT);
1939           if (reverse)
1940             result = flip_storage_order (mode1, result);
1941           gcc_assert (bitnum % BITS_PER_UNIT == 0);
1942           return convert_extracted_bit_field (result, mode, tmode, unsignedp);
1943         }
1944
1945       str_rtx = narrow_bit_field_mem (str_rtx, mode1, bitsize, bitnum,
1946                                       &bitnum);
1947       gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (mode1));
1948       str_rtx = copy_to_reg (str_rtx);
1949     }
1950
1951   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1952                               target, mode, tmode, reverse, true);
1953 }
1954 \f
1955 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1956    from bit BITNUM of OP0.
1957
1958    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1959    If REVERSE is true, the extraction is to be done in reverse order.
1960
1961    If TARGET is nonzero, attempts to store the value there
1962    and return TARGET, but this is not guaranteed.
1963    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1964
1965 static rtx
1966 extract_fixed_bit_field (machine_mode tmode, rtx op0,
1967                          unsigned HOST_WIDE_INT bitsize,
1968                          unsigned HOST_WIDE_INT bitnum, rtx target,
1969                          int unsignedp, bool reverse)
1970 {
1971   if (MEM_P (op0))
1972     {
1973       machine_mode mode
1974         = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0), word_mode,
1975                          MEM_VOLATILE_P (op0));
1976
1977       if (mode == VOIDmode)
1978         /* The only way this should occur is if the field spans word
1979            boundaries.  */
1980         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp,
1981                                         reverse);
1982
1983       op0 = narrow_bit_field_mem (op0, mode, bitsize, bitnum, &bitnum);
1984     }
1985
1986   return extract_fixed_bit_field_1 (tmode, op0, bitsize, bitnum,
1987                                     target, unsignedp, reverse);
1988 }
1989
1990 /* Helper function for extract_fixed_bit_field, extracts
1991    the bit field always using the MODE of OP0.  */
1992
1993 static rtx
1994 extract_fixed_bit_field_1 (machine_mode tmode, rtx op0,
1995                            unsigned HOST_WIDE_INT bitsize,
1996                            unsigned HOST_WIDE_INT bitnum, rtx target,
1997                            int unsignedp, bool reverse)
1998 {
1999   machine_mode mode = GET_MODE (op0);
2000   gcc_assert (SCALAR_INT_MODE_P (mode));
2001
2002   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
2003      for invalid input, such as extract equivalent of f5 from
2004      gcc.dg/pr48335-2.c.  */
2005
2006   if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2007     /* BITNUM is the distance between our msb and that of OP0.
2008        Convert it to the distance from the lsb.  */
2009     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
2010
2011   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
2012      We have reduced the big-endian case to the little-endian case.  */
2013   if (reverse)
2014     op0 = flip_storage_order (mode, op0);
2015
2016   if (unsignedp)
2017     {
2018       if (bitnum)
2019         {
2020           /* If the field does not already start at the lsb,
2021              shift it so it does.  */
2022           /* Maybe propagate the target for the shift.  */
2023           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2024           if (tmode != mode)
2025             subtarget = 0;
2026           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
2027         }
2028       /* Convert the value to the desired mode.  */
2029       if (mode != tmode)
2030         op0 = convert_to_mode (tmode, op0, 1);
2031
2032       /* Unless the msb of the field used to be the msb when we shifted,
2033          mask out the upper bits.  */
2034
2035       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
2036         return expand_binop (GET_MODE (op0), and_optab, op0,
2037                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
2038                              target, 1, OPTAB_LIB_WIDEN);
2039       return op0;
2040     }
2041
2042   /* To extract a signed bit-field, first shift its msb to the msb of the word,
2043      then arithmetic-shift its lsb to the lsb of the word.  */
2044   op0 = force_reg (mode, op0);
2045
2046   /* Find the narrowest integer mode that contains the field.  */
2047
2048   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
2049        mode = GET_MODE_WIDER_MODE (mode))
2050     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
2051       {
2052         op0 = convert_to_mode (mode, op0, 0);
2053         break;
2054       }
2055
2056   if (mode != tmode)
2057     target = 0;
2058
2059   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
2060     {
2061       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
2062       /* Maybe propagate the target for the shift.  */
2063       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
2064       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
2065     }
2066
2067   return expand_shift (RSHIFT_EXPR, mode, op0,
2068                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
2069 }
2070
2071 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
2072    VALUE << BITPOS.  */
2073
2074 static rtx
2075 lshift_value (machine_mode mode, unsigned HOST_WIDE_INT value,
2076               int bitpos)
2077 {
2078   return immed_wide_int_const (wi::lshift (value, bitpos), mode);
2079 }
2080 \f
2081 /* Extract a bit field that is split across two words
2082    and return an RTX for the result.
2083
2084    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2085    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2086    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
2087
2088    If REVERSE is true, the extraction is to be done in reverse order.  */
2089
2090 static rtx
2091 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2092                          unsigned HOST_WIDE_INT bitpos, int unsignedp,
2093                          bool reverse)
2094 {
2095   unsigned int unit;
2096   unsigned int bitsdone = 0;
2097   rtx result = NULL_RTX;
2098   int first = 1;
2099
2100   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2101      much at a time.  */
2102   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2103     unit = BITS_PER_WORD;
2104   else
2105     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2106
2107   while (bitsdone < bitsize)
2108     {
2109       unsigned HOST_WIDE_INT thissize;
2110       rtx part, word;
2111       unsigned HOST_WIDE_INT thispos;
2112       unsigned HOST_WIDE_INT offset;
2113
2114       offset = (bitpos + bitsdone) / unit;
2115       thispos = (bitpos + bitsdone) % unit;
2116
2117       /* THISSIZE must not overrun a word boundary.  Otherwise,
2118          extract_fixed_bit_field will call us again, and we will mutually
2119          recurse forever.  */
2120       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2121       thissize = MIN (thissize, unit - thispos);
2122
2123       /* If OP0 is a register, then handle OFFSET here.
2124
2125          When handling multiword bitfields, extract_bit_field may pass
2126          down a word_mode SUBREG of a larger REG for a bitfield that actually
2127          crosses a word boundary.  Thus, for a SUBREG, we must find
2128          the current word starting from the base register.  */
2129       if (GET_CODE (op0) == SUBREG)
2130         {
2131           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2132           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2133                                         GET_MODE (SUBREG_REG (op0)));
2134           offset = 0;
2135         }
2136       else if (REG_P (op0))
2137         {
2138           word = operand_subword_force (op0, offset, GET_MODE (op0));
2139           offset = 0;
2140         }
2141       else
2142         word = op0;
2143
2144       /* Extract the parts in bit-counting order,
2145          whose meaning is determined by BYTES_PER_UNIT.
2146          OFFSET is in UNITs, and UNIT is in bits.  */
2147       part = extract_fixed_bit_field (word_mode, word, thissize,
2148                                       offset * unit + thispos, 0, 1, reverse);
2149       bitsdone += thissize;
2150
2151       /* Shift this part into place for the result.  */
2152       if (reverse ? !BYTES_BIG_ENDIAN : BYTES_BIG_ENDIAN)
2153         {
2154           if (bitsize != bitsdone)
2155             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2156                                  bitsize - bitsdone, 0, 1);
2157         }
2158       else
2159         {
2160           if (bitsdone != thissize)
2161             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2162                                  bitsdone - thissize, 0, 1);
2163         }
2164
2165       if (first)
2166         result = part;
2167       else
2168         /* Combine the parts with bitwise or.  This works
2169            because we extracted each part as an unsigned bit field.  */
2170         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2171                                OPTAB_LIB_WIDEN);
2172
2173       first = 0;
2174     }
2175
2176   /* Unsigned bit field: we are done.  */
2177   if (unsignedp)
2178     return result;
2179   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2180   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2181                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2182   return expand_shift (RSHIFT_EXPR, word_mode, result,
2183                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2184 }
2185 \f
2186 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2187    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2188    MODE, fill the upper bits with zeros.  Fail if the layout of either
2189    mode is unknown (as for CC modes) or if the extraction would involve
2190    unprofitable mode punning.  Return the value on success, otherwise
2191    return null.
2192
2193    This is different from gen_lowpart* in these respects:
2194
2195      - the returned value must always be considered an rvalue
2196
2197      - when MODE is wider than SRC_MODE, the extraction involves
2198        a zero extension
2199
2200      - when MODE is smaller than SRC_MODE, the extraction involves
2201        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2202
2203    In other words, this routine performs a computation, whereas the
2204    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2205    operations.  */
2206
2207 rtx
2208 extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
2209 {
2210   machine_mode int_mode, src_int_mode;
2211
2212   if (mode == src_mode)
2213     return src;
2214
2215   if (CONSTANT_P (src))
2216     {
2217       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2218          fails, it will happily create (subreg (symbol_ref)) or similar
2219          invalid SUBREGs.  */
2220       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2221       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2222       if (ret)
2223         return ret;
2224
2225       if (GET_MODE (src) == VOIDmode
2226           || !validate_subreg (mode, src_mode, src, byte))
2227         return NULL_RTX;
2228
2229       src = force_reg (GET_MODE (src), src);
2230       return gen_rtx_SUBREG (mode, src, byte);
2231     }
2232
2233   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2234     return NULL_RTX;
2235
2236   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2237       && MODES_TIEABLE_P (mode, src_mode))
2238     {
2239       rtx x = gen_lowpart_common (mode, src);
2240       if (x)
2241         return x;
2242     }
2243
2244   src_int_mode = int_mode_for_mode (src_mode);
2245   int_mode = int_mode_for_mode (mode);
2246   if (src_int_mode == BLKmode || int_mode == BLKmode)
2247     return NULL_RTX;
2248
2249   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2250     return NULL_RTX;
2251   if (!MODES_TIEABLE_P (int_mode, mode))
2252     return NULL_RTX;
2253
2254   src = gen_lowpart (src_int_mode, src);
2255   src = convert_modes (int_mode, src_int_mode, src, true);
2256   src = gen_lowpart (mode, src);
2257   return src;
2258 }
2259 \f
2260 /* Add INC into TARGET.  */
2261
2262 void
2263 expand_inc (rtx target, rtx inc)
2264 {
2265   rtx value = expand_binop (GET_MODE (target), add_optab,
2266                             target, inc,
2267                             target, 0, OPTAB_LIB_WIDEN);
2268   if (value != target)
2269     emit_move_insn (target, value);
2270 }
2271
2272 /* Subtract DEC from TARGET.  */
2273
2274 void
2275 expand_dec (rtx target, rtx dec)
2276 {
2277   rtx value = expand_binop (GET_MODE (target), sub_optab,
2278                             target, dec,
2279                             target, 0, OPTAB_LIB_WIDEN);
2280   if (value != target)
2281     emit_move_insn (target, value);
2282 }
2283 \f
2284 /* Output a shift instruction for expression code CODE,
2285    with SHIFTED being the rtx for the value to shift,
2286    and AMOUNT the rtx for the amount to shift by.
2287    Store the result in the rtx TARGET, if that is convenient.
2288    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2289    Return the rtx for where the value is.  */
2290
2291 static rtx
2292 expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted,
2293                 rtx amount, rtx target, int unsignedp)
2294 {
2295   rtx op1, temp = 0;
2296   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2297   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2298   optab lshift_optab = ashl_optab;
2299   optab rshift_arith_optab = ashr_optab;
2300   optab rshift_uns_optab = lshr_optab;
2301   optab lrotate_optab = rotl_optab;
2302   optab rrotate_optab = rotr_optab;
2303   machine_mode op1_mode;
2304   machine_mode scalar_mode = mode;
2305   int attempt;
2306   bool speed = optimize_insn_for_speed_p ();
2307
2308   if (VECTOR_MODE_P (mode))
2309     scalar_mode = GET_MODE_INNER (mode);
2310   op1 = amount;
2311   op1_mode = GET_MODE (op1);
2312
2313   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2314      shift amount is a vector, use the vector/vector shift patterns.  */
2315   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2316     {
2317       lshift_optab = vashl_optab;
2318       rshift_arith_optab = vashr_optab;
2319       rshift_uns_optab = vlshr_optab;
2320       lrotate_optab = vrotl_optab;
2321       rrotate_optab = vrotr_optab;
2322     }
2323
2324   /* Previously detected shift-counts computed by NEGATE_EXPR
2325      and shifted in the other direction; but that does not work
2326      on all machines.  */
2327
2328   if (SHIFT_COUNT_TRUNCATED)
2329     {
2330       if (CONST_INT_P (op1)
2331           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2332               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (scalar_mode)))
2333         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2334                        % GET_MODE_BITSIZE (scalar_mode));
2335       else if (GET_CODE (op1) == SUBREG
2336                && subreg_lowpart_p (op1)
2337                && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (op1)))
2338                && SCALAR_INT_MODE_P (GET_MODE (op1)))
2339         op1 = SUBREG_REG (op1);
2340     }
2341
2342   /* Canonicalize rotates by constant amount.  If op1 is bitsize / 2,
2343      prefer left rotation, if op1 is from bitsize / 2 + 1 to
2344      bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1
2345      amount instead.  */
2346   if (rotate
2347       && CONST_INT_P (op1)
2348       && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left,
2349                    GET_MODE_BITSIZE (scalar_mode) - 1))
2350     {
2351       op1 = GEN_INT (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1));
2352       left = !left;
2353       code = left ? LROTATE_EXPR : RROTATE_EXPR;
2354     }
2355
2356   /* Rotation of 16bit values by 8 bits is effectively equivalent to a bswaphi.
2357      Note that this is not the case for bigger values.  For instance a rotation
2358      of 0x01020304 by 16 bits gives 0x03040102 which is different from
2359      0x04030201 (bswapsi).  */
2360   if (rotate
2361       && CONST_INT_P (op1)
2362       && INTVAL (op1) == BITS_PER_UNIT
2363       && GET_MODE_SIZE (scalar_mode) == 2
2364       && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing)
2365     return expand_unop (HImode, bswap_optab, shifted, NULL_RTX,
2366                                   unsignedp);
2367
2368   if (op1 == const0_rtx)
2369     return shifted;
2370
2371   /* Check whether its cheaper to implement a left shift by a constant
2372      bit count by a sequence of additions.  */
2373   if (code == LSHIFT_EXPR
2374       && CONST_INT_P (op1)
2375       && INTVAL (op1) > 0
2376       && INTVAL (op1) < GET_MODE_PRECISION (scalar_mode)
2377       && INTVAL (op1) < MAX_BITS_PER_WORD
2378       && (shift_cost (speed, mode, INTVAL (op1))
2379           > INTVAL (op1) * add_cost (speed, mode))
2380       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2381     {
2382       int i;
2383       for (i = 0; i < INTVAL (op1); i++)
2384         {
2385           temp = force_reg (mode, shifted);
2386           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2387                                   unsignedp, OPTAB_LIB_WIDEN);
2388         }
2389       return shifted;
2390     }
2391
2392   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2393     {
2394       enum optab_methods methods;
2395
2396       if (attempt == 0)
2397         methods = OPTAB_DIRECT;
2398       else if (attempt == 1)
2399         methods = OPTAB_WIDEN;
2400       else
2401         methods = OPTAB_LIB_WIDEN;
2402
2403       if (rotate)
2404         {
2405           /* Widening does not work for rotation.  */
2406           if (methods == OPTAB_WIDEN)
2407             continue;
2408           else if (methods == OPTAB_LIB_WIDEN)
2409             {
2410               /* If we have been unable to open-code this by a rotation,
2411                  do it as the IOR of two shifts.  I.e., to rotate A
2412                  by N bits, compute
2413                  (A << N) | ((unsigned) A >> ((-N) & (C - 1)))
2414                  where C is the bitsize of A.
2415
2416                  It is theoretically possible that the target machine might
2417                  not be able to perform either shift and hence we would
2418                  be making two libcalls rather than just the one for the
2419                  shift (similarly if IOR could not be done).  We will allow
2420                  this extremely unlikely lossage to avoid complicating the
2421                  code below.  */
2422
2423               rtx subtarget = target == shifted ? 0 : target;
2424               rtx new_amount, other_amount;
2425               rtx temp1;
2426
2427               new_amount = op1;
2428               if (op1 == const0_rtx)
2429                 return shifted;
2430               else if (CONST_INT_P (op1))
2431                 other_amount = GEN_INT (GET_MODE_BITSIZE (scalar_mode)
2432                                         - INTVAL (op1));
2433               else
2434                 {
2435                   other_amount
2436                     = simplify_gen_unary (NEG, GET_MODE (op1),
2437                                           op1, GET_MODE (op1));
2438                   HOST_WIDE_INT mask = GET_MODE_PRECISION (scalar_mode) - 1;
2439                   other_amount
2440                     = simplify_gen_binary (AND, GET_MODE (op1), other_amount,
2441                                            gen_int_mode (mask, GET_MODE (op1)));
2442                 }
2443
2444               shifted = force_reg (mode, shifted);
2445
2446               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2447                                      mode, shifted, new_amount, 0, 1);
2448               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2449                                       mode, shifted, other_amount,
2450                                       subtarget, 1);
2451               return expand_binop (mode, ior_optab, temp, temp1, target,
2452                                    unsignedp, methods);
2453             }
2454
2455           temp = expand_binop (mode,
2456                                left ? lrotate_optab : rrotate_optab,
2457                                shifted, op1, target, unsignedp, methods);
2458         }
2459       else if (unsignedp)
2460         temp = expand_binop (mode,
2461                              left ? lshift_optab : rshift_uns_optab,
2462                              shifted, op1, target, unsignedp, methods);
2463
2464       /* Do arithmetic shifts.
2465          Also, if we are going to widen the operand, we can just as well
2466          use an arithmetic right-shift instead of a logical one.  */
2467       if (temp == 0 && ! rotate
2468           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2469         {
2470           enum optab_methods methods1 = methods;
2471
2472           /* If trying to widen a log shift to an arithmetic shift,
2473              don't accept an arithmetic shift of the same size.  */
2474           if (unsignedp)
2475             methods1 = OPTAB_MUST_WIDEN;
2476
2477           /* Arithmetic shift */
2478
2479           temp = expand_binop (mode,
2480                                left ? lshift_optab : rshift_arith_optab,
2481                                shifted, op1, target, unsignedp, methods1);
2482         }
2483
2484       /* We used to try extzv here for logical right shifts, but that was
2485          only useful for one machine, the VAX, and caused poor code
2486          generation there for lshrdi3, so the code was deleted and a
2487          define_expand for lshrsi3 was added to vax.md.  */
2488     }
2489
2490   gcc_assert (temp);
2491   return temp;
2492 }
2493
2494 /* Output a shift instruction for expression code CODE,
2495    with SHIFTED being the rtx for the value to shift,
2496    and AMOUNT the amount to shift by.
2497    Store the result in the rtx TARGET, if that is convenient.
2498    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2499    Return the rtx for where the value is.  */
2500
2501 rtx
2502 expand_shift (enum tree_code code, machine_mode mode, rtx shifted,
2503               int amount, rtx target, int unsignedp)
2504 {
2505   return expand_shift_1 (code, mode,
2506                          shifted, GEN_INT (amount), target, unsignedp);
2507 }
2508
2509 /* Output a shift instruction for expression code CODE,
2510    with SHIFTED being the rtx for the value to shift,
2511    and AMOUNT the tree for the amount to shift by.
2512    Store the result in the rtx TARGET, if that is convenient.
2513    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2514    Return the rtx for where the value is.  */
2515
2516 rtx
2517 expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted,
2518                        tree amount, rtx target, int unsignedp)
2519 {
2520   return expand_shift_1 (code, mode,
2521                          shifted, expand_normal (amount), target, unsignedp);
2522 }
2523
2524 \f
2525 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2526                         const struct mult_cost *, machine_mode mode);
2527 static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx,
2528                               const struct algorithm *, enum mult_variant);
2529 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2530 static rtx extract_high_half (machine_mode, rtx);
2531 static rtx expmed_mult_highpart (machine_mode, rtx, rtx, rtx, int, int);
2532 static rtx expmed_mult_highpart_optab (machine_mode, rtx, rtx, rtx,
2533                                        int, int);
2534 /* Compute and return the best algorithm for multiplying by T.
2535    The algorithm must cost less than cost_limit
2536    If retval.cost >= COST_LIMIT, no algorithm was found and all
2537    other field of the returned struct are undefined.
2538    MODE is the machine mode of the multiplication.  */
2539
2540 static void
2541 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2542             const struct mult_cost *cost_limit, machine_mode mode)
2543 {
2544   int m;
2545   struct algorithm *alg_in, *best_alg;
2546   struct mult_cost best_cost;
2547   struct mult_cost new_limit;
2548   int op_cost, op_latency;
2549   unsigned HOST_WIDE_INT orig_t = t;
2550   unsigned HOST_WIDE_INT q;
2551   int maxm, hash_index;
2552   bool cache_hit = false;
2553   enum alg_code cache_alg = alg_zero;
2554   bool speed = optimize_insn_for_speed_p ();
2555   machine_mode imode;
2556   struct alg_hash_entry *entry_ptr;
2557
2558   /* Indicate that no algorithm is yet found.  If no algorithm
2559      is found, this value will be returned and indicate failure.  */
2560   alg_out->cost.cost = cost_limit->cost + 1;
2561   alg_out->cost.latency = cost_limit->latency + 1;
2562
2563   if (cost_limit->cost < 0
2564       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2565     return;
2566
2567   /* Be prepared for vector modes.  */
2568   imode = GET_MODE_INNER (mode);
2569
2570   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2571
2572   /* Restrict the bits of "t" to the multiplication's mode.  */
2573   t &= GET_MODE_MASK (imode);
2574
2575   /* t == 1 can be done in zero cost.  */
2576   if (t == 1)
2577     {
2578       alg_out->ops = 1;
2579       alg_out->cost.cost = 0;
2580       alg_out->cost.latency = 0;
2581       alg_out->op[0] = alg_m;
2582       return;
2583     }
2584
2585   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2586      fail now.  */
2587   if (t == 0)
2588     {
2589       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2590         return;
2591       else
2592         {
2593           alg_out->ops = 1;
2594           alg_out->cost.cost = zero_cost (speed);
2595           alg_out->cost.latency = zero_cost (speed);
2596           alg_out->op[0] = alg_zero;
2597           return;
2598         }
2599     }
2600
2601   /* We'll be needing a couple extra algorithm structures now.  */
2602
2603   alg_in = XALLOCA (struct algorithm);
2604   best_alg = XALLOCA (struct algorithm);
2605   best_cost = *cost_limit;
2606
2607   /* Compute the hash index.  */
2608   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2609
2610   /* See if we already know what to do for T.  */
2611   entry_ptr = alg_hash_entry_ptr (hash_index);
2612   if (entry_ptr->t == t
2613       && entry_ptr->mode == mode
2614       && entry_ptr->mode == mode
2615       && entry_ptr->speed == speed
2616       && entry_ptr->alg != alg_unknown)
2617     {
2618       cache_alg = entry_ptr->alg;
2619
2620       if (cache_alg == alg_impossible)
2621         {
2622           /* The cache tells us that it's impossible to synthesize
2623              multiplication by T within entry_ptr->cost.  */
2624           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2625             /* COST_LIMIT is at least as restrictive as the one
2626                recorded in the hash table, in which case we have no
2627                hope of synthesizing a multiplication.  Just
2628                return.  */
2629             return;
2630
2631           /* If we get here, COST_LIMIT is less restrictive than the
2632              one recorded in the hash table, so we may be able to
2633              synthesize a multiplication.  Proceed as if we didn't
2634              have the cache entry.  */
2635         }
2636       else
2637         {
2638           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2639             /* The cached algorithm shows that this multiplication
2640                requires more cost than COST_LIMIT.  Just return.  This
2641                way, we don't clobber this cache entry with
2642                alg_impossible but retain useful information.  */
2643             return;
2644
2645           cache_hit = true;
2646
2647           switch (cache_alg)
2648             {
2649             case alg_shift:
2650               goto do_alg_shift;
2651
2652             case alg_add_t_m2:
2653             case alg_sub_t_m2:
2654               goto do_alg_addsub_t_m2;
2655
2656             case alg_add_factor:
2657             case alg_sub_factor:
2658               goto do_alg_addsub_factor;
2659
2660             case alg_add_t2_m:
2661               goto do_alg_add_t2_m;
2662
2663             case alg_sub_t2_m:
2664               goto do_alg_sub_t2_m;
2665
2666             default:
2667               gcc_unreachable ();
2668             }
2669         }
2670     }
2671
2672   /* If we have a group of zero bits at the low-order part of T, try
2673      multiplying by the remaining bits and then doing a shift.  */
2674
2675   if ((t & 1) == 0)
2676     {
2677     do_alg_shift:
2678       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2679       if (m < maxm)
2680         {
2681           q = t >> m;
2682           /* The function expand_shift will choose between a shift and
2683              a sequence of additions, so the observed cost is given as
2684              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2685           op_cost = m * add_cost (speed, mode);
2686           if (shift_cost (speed, mode, m) < op_cost)
2687             op_cost = shift_cost (speed, mode, m);
2688           new_limit.cost = best_cost.cost - op_cost;
2689           new_limit.latency = best_cost.latency - op_cost;
2690           synth_mult (alg_in, q, &new_limit, mode);
2691
2692           alg_in->cost.cost += op_cost;
2693           alg_in->cost.latency += op_cost;
2694           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2695             {
2696               best_cost = alg_in->cost;
2697               std::swap (alg_in, best_alg);
2698               best_alg->log[best_alg->ops] = m;
2699               best_alg->op[best_alg->ops] = alg_shift;
2700             }
2701
2702           /* See if treating ORIG_T as a signed number yields a better
2703              sequence.  Try this sequence only for a negative ORIG_T
2704              as it would be useless for a non-negative ORIG_T.  */
2705           if ((HOST_WIDE_INT) orig_t < 0)
2706             {
2707               /* Shift ORIG_T as follows because a right shift of a
2708                  negative-valued signed type is implementation
2709                  defined.  */
2710               q = ~(~orig_t >> m);
2711               /* The function expand_shift will choose between a shift
2712                  and a sequence of additions, so the observed cost is
2713                  given as MIN (m * add_cost(speed, mode),
2714                  shift_cost(speed, mode, m)).  */
2715               op_cost = m * add_cost (speed, mode);
2716               if (shift_cost (speed, mode, m) < op_cost)
2717                 op_cost = shift_cost (speed, mode, m);
2718               new_limit.cost = best_cost.cost - op_cost;
2719               new_limit.latency = best_cost.latency - op_cost;
2720               synth_mult (alg_in, q, &new_limit, mode);
2721
2722               alg_in->cost.cost += op_cost;
2723               alg_in->cost.latency += op_cost;
2724               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2725                 {
2726                   best_cost = alg_in->cost;
2727                   std::swap (alg_in, best_alg);
2728                   best_alg->log[best_alg->ops] = m;
2729                   best_alg->op[best_alg->ops] = alg_shift;
2730                 }
2731             }
2732         }
2733       if (cache_hit)
2734         goto done;
2735     }
2736
2737   /* If we have an odd number, add or subtract one.  */
2738   if ((t & 1) != 0)
2739     {
2740       unsigned HOST_WIDE_INT w;
2741
2742     do_alg_addsub_t_m2:
2743       for (w = 1; (w & t) != 0; w <<= 1)
2744         ;
2745       /* If T was -1, then W will be zero after the loop.  This is another
2746          case where T ends with ...111.  Handling this with (T + 1) and
2747          subtract 1 produces slightly better code and results in algorithm
2748          selection much faster than treating it like the ...0111 case
2749          below.  */
2750       if (w == 0
2751           || (w > 2
2752               /* Reject the case where t is 3.
2753                  Thus we prefer addition in that case.  */
2754               && t != 3))
2755         {
2756           /* T ends with ...111.  Multiply by (T + 1) and subtract T.  */
2757
2758           op_cost = add_cost (speed, mode);
2759           new_limit.cost = best_cost.cost - op_cost;
2760           new_limit.latency = best_cost.latency - op_cost;
2761           synth_mult (alg_in, t + 1, &new_limit, mode);
2762
2763           alg_in->cost.cost += op_cost;
2764           alg_in->cost.latency += op_cost;
2765           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2766             {
2767               best_cost = alg_in->cost;
2768               std::swap (alg_in, best_alg);
2769               best_alg->log[best_alg->ops] = 0;
2770               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2771             }
2772         }
2773       else
2774         {
2775           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add T.  */
2776
2777           op_cost = add_cost (speed, mode);
2778           new_limit.cost = best_cost.cost - op_cost;
2779           new_limit.latency = best_cost.latency - op_cost;
2780           synth_mult (alg_in, t - 1, &new_limit, mode);
2781
2782           alg_in->cost.cost += op_cost;
2783           alg_in->cost.latency += op_cost;
2784           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2785             {
2786               best_cost = alg_in->cost;
2787               std::swap (alg_in, best_alg);
2788               best_alg->log[best_alg->ops] = 0;
2789               best_alg->op[best_alg->ops] = alg_add_t_m2;
2790             }
2791         }
2792
2793       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2794          quickly with a - a * n for some appropriate constant n.  */
2795       m = exact_log2 (-orig_t + 1);
2796       if (m >= 0 && m < maxm)
2797         {
2798           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2799           /* If the target has a cheap shift-and-subtract insn use
2800              that in preference to a shift insn followed by a sub insn.
2801              Assume that the shift-and-sub is "atomic" with a latency
2802              equal to it's cost, otherwise assume that on superscalar
2803              hardware the shift may be executed concurrently with the
2804              earlier steps in the algorithm.  */
2805           if (shiftsub1_cost (speed, mode, m) <= op_cost)
2806             {
2807               op_cost = shiftsub1_cost (speed, mode, m);
2808               op_latency = op_cost;
2809             }
2810           else
2811             op_latency = add_cost (speed, mode);
2812
2813           new_limit.cost = best_cost.cost - op_cost;
2814           new_limit.latency = best_cost.latency - op_latency;
2815           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2816                       &new_limit, mode);
2817
2818           alg_in->cost.cost += op_cost;
2819           alg_in->cost.latency += op_latency;
2820           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2821             {
2822               best_cost = alg_in->cost;
2823               std::swap (alg_in, best_alg);
2824               best_alg->log[best_alg->ops] = m;
2825               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2826             }
2827         }
2828
2829       if (cache_hit)
2830         goto done;
2831     }
2832
2833   /* Look for factors of t of the form
2834      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2835      If we find such a factor, we can multiply by t using an algorithm that
2836      multiplies by q, shift the result by m and add/subtract it to itself.
2837
2838      We search for large factors first and loop down, even if large factors
2839      are less probable than small; if we find a large factor we will find a
2840      good sequence quickly, and therefore be able to prune (by decreasing
2841      COST_LIMIT) the search.  */
2842
2843  do_alg_addsub_factor:
2844   for (m = floor_log2 (t - 1); m >= 2; m--)
2845     {
2846       unsigned HOST_WIDE_INT d;
2847
2848       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2849       if (t % d == 0 && t > d && m < maxm
2850           && (!cache_hit || cache_alg == alg_add_factor))
2851         {
2852           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2853           if (shiftadd_cost (speed, mode, m) <= op_cost)
2854             op_cost = shiftadd_cost (speed, mode, m);
2855
2856           op_latency = op_cost;
2857
2858
2859           new_limit.cost = best_cost.cost - op_cost;
2860           new_limit.latency = best_cost.latency - op_latency;
2861           synth_mult (alg_in, t / d, &new_limit, mode);
2862
2863           alg_in->cost.cost += op_cost;
2864           alg_in->cost.latency += op_latency;
2865           if (alg_in->cost.latency < op_cost)
2866             alg_in->cost.latency = op_cost;
2867           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2868             {
2869               best_cost = alg_in->cost;
2870               std::swap (alg_in, best_alg);
2871               best_alg->log[best_alg->ops] = m;
2872               best_alg->op[best_alg->ops] = alg_add_factor;
2873             }
2874           /* Other factors will have been taken care of in the recursion.  */
2875           break;
2876         }
2877
2878       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2879       if (t % d == 0 && t > d && m < maxm
2880           && (!cache_hit || cache_alg == alg_sub_factor))
2881         {
2882           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2883           if (shiftsub0_cost (speed, mode, m) <= op_cost)
2884             op_cost = shiftsub0_cost (speed, mode, m);
2885
2886           op_latency = op_cost;
2887
2888           new_limit.cost = best_cost.cost - op_cost;
2889           new_limit.latency = best_cost.latency - op_latency;
2890           synth_mult (alg_in, t / d, &new_limit, mode);
2891
2892           alg_in->cost.cost += op_cost;
2893           alg_in->cost.latency += op_latency;
2894           if (alg_in->cost.latency < op_cost)
2895             alg_in->cost.latency = op_cost;
2896           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2897             {
2898               best_cost = alg_in->cost;
2899               std::swap (alg_in, best_alg);
2900               best_alg->log[best_alg->ops] = m;
2901               best_alg->op[best_alg->ops] = alg_sub_factor;
2902             }
2903           break;
2904         }
2905     }
2906   if (cache_hit)
2907     goto done;
2908
2909   /* Try shift-and-add (load effective address) instructions,
2910      i.e. do a*3, a*5, a*9.  */
2911   if ((t & 1) != 0)
2912     {
2913     do_alg_add_t2_m:
2914       q = t - 1;
2915       q = q & -q;
2916       m = exact_log2 (q);
2917       if (m >= 0 && m < maxm)
2918         {
2919           op_cost = shiftadd_cost (speed, mode, m);
2920           new_limit.cost = best_cost.cost - op_cost;
2921           new_limit.latency = best_cost.latency - op_cost;
2922           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2923
2924           alg_in->cost.cost += op_cost;
2925           alg_in->cost.latency += op_cost;
2926           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2927             {
2928               best_cost = alg_in->cost;
2929               std::swap (alg_in, best_alg);
2930               best_alg->log[best_alg->ops] = m;
2931               best_alg->op[best_alg->ops] = alg_add_t2_m;
2932             }
2933         }
2934       if (cache_hit)
2935         goto done;
2936
2937     do_alg_sub_t2_m:
2938       q = t + 1;
2939       q = q & -q;
2940       m = exact_log2 (q);
2941       if (m >= 0 && m < maxm)
2942         {
2943           op_cost = shiftsub0_cost (speed, mode, m);
2944           new_limit.cost = best_cost.cost - op_cost;
2945           new_limit.latency = best_cost.latency - op_cost;
2946           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2947
2948           alg_in->cost.cost += op_cost;
2949           alg_in->cost.latency += op_cost;
2950           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2951             {
2952               best_cost = alg_in->cost;
2953               std::swap (alg_in, best_alg);
2954               best_alg->log[best_alg->ops] = m;
2955               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2956             }
2957         }
2958       if (cache_hit)
2959         goto done;
2960     }
2961
2962  done:
2963   /* If best_cost has not decreased, we have not found any algorithm.  */
2964   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2965     {
2966       /* We failed to find an algorithm.  Record alg_impossible for
2967          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2968          we are asked to find an algorithm for T within the same or
2969          lower COST_LIMIT, we can immediately return to the
2970          caller.  */
2971       entry_ptr->t = t;
2972       entry_ptr->mode = mode;
2973       entry_ptr->speed = speed;
2974       entry_ptr->alg = alg_impossible;
2975       entry_ptr->cost = *cost_limit;
2976       return;
2977     }
2978
2979   /* Cache the result.  */
2980   if (!cache_hit)
2981     {
2982       entry_ptr->t = t;
2983       entry_ptr->mode = mode;
2984       entry_ptr->speed = speed;
2985       entry_ptr->alg = best_alg->op[best_alg->ops];
2986       entry_ptr->cost.cost = best_cost.cost;
2987       entry_ptr->cost.latency = best_cost.latency;
2988     }
2989
2990   /* If we are getting a too long sequence for `struct algorithm'
2991      to record, make this search fail.  */
2992   if (best_alg->ops == MAX_BITS_PER_WORD)
2993     return;
2994
2995   /* Copy the algorithm from temporary space to the space at alg_out.
2996      We avoid using structure assignment because the majority of
2997      best_alg is normally undefined, and this is a critical function.  */
2998   alg_out->ops = best_alg->ops + 1;
2999   alg_out->cost = best_cost;
3000   memcpy (alg_out->op, best_alg->op,
3001           alg_out->ops * sizeof *alg_out->op);
3002   memcpy (alg_out->log, best_alg->log,
3003           alg_out->ops * sizeof *alg_out->log);
3004 }
3005 \f
3006 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
3007    Try three variations:
3008
3009        - a shift/add sequence based on VAL itself
3010        - a shift/add sequence based on -VAL, followed by a negation
3011        - a shift/add sequence based on VAL - 1, followed by an addition.
3012
3013    Return true if the cheapest of these cost less than MULT_COST,
3014    describing the algorithm in *ALG and final fixup in *VARIANT.  */
3015
3016 bool
3017 choose_mult_variant (machine_mode mode, HOST_WIDE_INT val,
3018                      struct algorithm *alg, enum mult_variant *variant,
3019                      int mult_cost)
3020 {
3021   struct algorithm alg2;
3022   struct mult_cost limit;
3023   int op_cost;
3024   bool speed = optimize_insn_for_speed_p ();
3025
3026   /* Fail quickly for impossible bounds.  */
3027   if (mult_cost < 0)
3028     return false;
3029
3030   /* Ensure that mult_cost provides a reasonable upper bound.
3031      Any constant multiplication can be performed with less
3032      than 2 * bits additions.  */
3033   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
3034   if (mult_cost > op_cost)
3035     mult_cost = op_cost;
3036
3037   *variant = basic_variant;
3038   limit.cost = mult_cost;
3039   limit.latency = mult_cost;
3040   synth_mult (alg, val, &limit, mode);
3041
3042   /* This works only if the inverted value actually fits in an
3043      `unsigned int' */
3044   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
3045     {
3046       op_cost = neg_cost (speed, mode);
3047       if (MULT_COST_LESS (&alg->cost, mult_cost))
3048         {
3049           limit.cost = alg->cost.cost - op_cost;
3050           limit.latency = alg->cost.latency - op_cost;
3051         }
3052       else
3053         {
3054           limit.cost = mult_cost - op_cost;
3055           limit.latency = mult_cost - op_cost;
3056         }
3057
3058       synth_mult (&alg2, -val, &limit, mode);
3059       alg2.cost.cost += op_cost;
3060       alg2.cost.latency += op_cost;
3061       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3062         *alg = alg2, *variant = negate_variant;
3063     }
3064
3065   /* This proves very useful for division-by-constant.  */
3066   op_cost = add_cost (speed, mode);
3067   if (MULT_COST_LESS (&alg->cost, mult_cost))
3068     {
3069       limit.cost = alg->cost.cost - op_cost;
3070       limit.latency = alg->cost.latency - op_cost;
3071     }
3072   else
3073     {
3074       limit.cost = mult_cost - op_cost;
3075       limit.latency = mult_cost - op_cost;
3076     }
3077
3078   synth_mult (&alg2, val - 1, &limit, mode);
3079   alg2.cost.cost += op_cost;
3080   alg2.cost.latency += op_cost;
3081   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
3082     *alg = alg2, *variant = add_variant;
3083
3084   return MULT_COST_LESS (&alg->cost, mult_cost);
3085 }
3086
3087 /* A subroutine of expand_mult, used for constant multiplications.
3088    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
3089    convenient.  Use the shift/add sequence described by ALG and apply
3090    the final fixup specified by VARIANT.  */
3091
3092 static rtx
3093 expand_mult_const (machine_mode mode, rtx op0, HOST_WIDE_INT val,
3094                    rtx target, const struct algorithm *alg,
3095                    enum mult_variant variant)
3096 {
3097   HOST_WIDE_INT val_so_far;
3098   rtx_insn *insn;
3099   rtx accum, tem;
3100   int opno;
3101   machine_mode nmode;
3102
3103   /* Avoid referencing memory over and over and invalid sharing
3104      on SUBREGs.  */
3105   op0 = force_reg (mode, op0);
3106
3107   /* ACCUM starts out either as OP0 or as a zero, depending on
3108      the first operation.  */
3109
3110   if (alg->op[0] == alg_zero)
3111     {
3112       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
3113       val_so_far = 0;
3114     }
3115   else if (alg->op[0] == alg_m)
3116     {
3117       accum = copy_to_mode_reg (mode, op0);
3118       val_so_far = 1;
3119     }
3120   else
3121     gcc_unreachable ();
3122
3123   for (opno = 1; opno < alg->ops; opno++)
3124     {
3125       int log = alg->log[opno];
3126       rtx shift_subtarget = optimize ? 0 : accum;
3127       rtx add_target
3128         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
3129            && !optimize)
3130           ? target : 0;
3131       rtx accum_target = optimize ? 0 : accum;
3132       rtx accum_inner;
3133
3134       switch (alg->op[opno])
3135         {
3136         case alg_shift:
3137           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3138           /* REG_EQUAL note will be attached to the following insn.  */
3139           emit_move_insn (accum, tem);
3140           val_so_far <<= log;
3141           break;
3142
3143         case alg_add_t_m2:
3144           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3145           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3146                                  add_target ? add_target : accum_target);
3147           val_so_far += (HOST_WIDE_INT) 1 << log;
3148           break;
3149
3150         case alg_sub_t_m2:
3151           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3152           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3153                                  add_target ? add_target : accum_target);
3154           val_so_far -= (HOST_WIDE_INT) 1 << log;
3155           break;
3156
3157         case alg_add_t2_m:
3158           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3159                                 log, shift_subtarget, 0);
3160           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3161                                  add_target ? add_target : accum_target);
3162           val_so_far = (val_so_far << log) + 1;
3163           break;
3164
3165         case alg_sub_t2_m:
3166           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3167                                 log, shift_subtarget, 0);
3168           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3169                                  add_target ? add_target : accum_target);
3170           val_so_far = (val_so_far << log) - 1;
3171           break;
3172
3173         case alg_add_factor:
3174           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3175           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3176                                  add_target ? add_target : accum_target);
3177           val_so_far += val_so_far << log;
3178           break;
3179
3180         case alg_sub_factor:
3181           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3182           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3183                                  (add_target
3184                                   ? add_target : (optimize ? 0 : tem)));
3185           val_so_far = (val_so_far << log) - val_so_far;
3186           break;
3187
3188         default:
3189           gcc_unreachable ();
3190         }
3191
3192       if (SCALAR_INT_MODE_P (mode))
3193         {
3194           /* Write a REG_EQUAL note on the last insn so that we can cse
3195              multiplication sequences.  Note that if ACCUM is a SUBREG,
3196              we've set the inner register and must properly indicate that.  */
3197           tem = op0, nmode = mode;
3198           accum_inner = accum;
3199           if (GET_CODE (accum) == SUBREG)
3200             {
3201               accum_inner = SUBREG_REG (accum);
3202               nmode = GET_MODE (accum_inner);
3203               tem = gen_lowpart (nmode, op0);
3204             }
3205
3206           insn = get_last_insn ();
3207           set_dst_reg_note (insn, REG_EQUAL,
3208                             gen_rtx_MULT (nmode, tem,
3209                                           gen_int_mode (val_so_far, nmode)),
3210                             accum_inner);
3211         }
3212     }
3213
3214   if (variant == negate_variant)
3215     {
3216       val_so_far = -val_so_far;
3217       accum = expand_unop (mode, neg_optab, accum, target, 0);
3218     }
3219   else if (variant == add_variant)
3220     {
3221       val_so_far = val_so_far + 1;
3222       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3223     }
3224
3225   /* Compare only the bits of val and val_so_far that are significant
3226      in the result mode, to avoid sign-/zero-extension confusion.  */
3227   nmode = GET_MODE_INNER (mode);
3228   val &= GET_MODE_MASK (nmode);
3229   val_so_far &= GET_MODE_MASK (nmode);
3230   gcc_assert (val == val_so_far);
3231
3232   return accum;
3233 }
3234
3235 /* Perform a multiplication and return an rtx for the result.
3236    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3237    TARGET is a suggestion for where to store the result (an rtx).
3238
3239    We check specially for a constant integer as OP1.
3240    If you want this check for OP0 as well, then before calling
3241    you should swap the two operands if OP0 would be constant.  */
3242
3243 rtx
3244 expand_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3245              int unsignedp)
3246 {
3247   enum mult_variant variant;
3248   struct algorithm algorithm;
3249   rtx scalar_op1;
3250   int max_cost;
3251   bool speed = optimize_insn_for_speed_p ();
3252   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3253
3254   if (CONSTANT_P (op0))
3255     std::swap (op0, op1);
3256
3257   /* For vectors, there are several simplifications that can be made if
3258      all elements of the vector constant are identical.  */
3259   scalar_op1 = unwrap_const_vec_duplicate (op1);
3260
3261   if (INTEGRAL_MODE_P (mode))
3262     {
3263       rtx fake_reg;
3264       HOST_WIDE_INT coeff;
3265       bool is_neg;
3266       int mode_bitsize;
3267
3268       if (op1 == CONST0_RTX (mode))
3269         return op1;
3270       if (op1 == CONST1_RTX (mode))
3271         return op0;
3272       if (op1 == CONSTM1_RTX (mode))
3273         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3274                             op0, target, 0);
3275
3276       if (do_trapv)
3277         goto skip_synth;
3278
3279       /* If mode is integer vector mode, check if the backend supports
3280          vector lshift (by scalar or vector) at all.  If not, we can't use
3281          synthetized multiply.  */
3282       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
3283           && optab_handler (vashl_optab, mode) == CODE_FOR_nothing
3284           && optab_handler (ashl_optab, mode) == CODE_FOR_nothing)
3285         goto skip_synth;
3286
3287       /* These are the operations that are potentially turned into
3288          a sequence of shifts and additions.  */
3289       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3290
3291       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3292          less than or equal in size to `unsigned int' this doesn't matter.
3293          If the mode is larger than `unsigned int', then synth_mult works
3294          only if the constant value exactly fits in an `unsigned int' without
3295          any truncation.  This means that multiplying by negative values does
3296          not work; results are off by 2^32 on a 32 bit machine.  */
3297       if (CONST_INT_P (scalar_op1))
3298         {
3299           coeff = INTVAL (scalar_op1);
3300           is_neg = coeff < 0;
3301         }
3302 #if TARGET_SUPPORTS_WIDE_INT
3303       else if (CONST_WIDE_INT_P (scalar_op1))
3304 #else
3305       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3306 #endif
3307         {
3308           int shift = wi::exact_log2 (std::make_pair (scalar_op1, mode));
3309           /* Perfect power of 2 (other than 1, which is handled above).  */
3310           if (shift > 0)
3311             return expand_shift (LSHIFT_EXPR, mode, op0,
3312                                  shift, target, unsignedp);
3313           else
3314             goto skip_synth;
3315         }
3316       else
3317         goto skip_synth;
3318
3319       /* We used to test optimize here, on the grounds that it's better to
3320          produce a smaller program when -O is not used.  But this causes
3321          such a terrible slowdown sometimes that it seems better to always
3322          use synth_mult.  */
3323
3324       /* Special case powers of two.  */
3325       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff)
3326           && !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
3327         return expand_shift (LSHIFT_EXPR, mode, op0,
3328                              floor_log2 (coeff), target, unsignedp);
3329
3330       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3331
3332       /* Attempt to handle multiplication of DImode values by negative
3333          coefficients, by performing the multiplication by a positive
3334          multiplier and then inverting the result.  */
3335       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3336         {
3337           /* Its safe to use -coeff even for INT_MIN, as the
3338              result is interpreted as an unsigned coefficient.
3339              Exclude cost of op0 from max_cost to match the cost
3340              calculation of the synth_mult.  */
3341           coeff = -(unsigned HOST_WIDE_INT) coeff;
3342           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1),
3343                                     mode, speed)
3344                       - neg_cost (speed, mode));
3345           if (max_cost <= 0)
3346             goto skip_synth;
3347
3348           /* Special case powers of two.  */
3349           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3350             {
3351               rtx temp = expand_shift (LSHIFT_EXPR, mode, op0,
3352                                        floor_log2 (coeff), target, unsignedp);
3353               return expand_unop (mode, neg_optab, temp, target, 0);
3354             }
3355
3356           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3357                                    max_cost))
3358             {
3359               rtx temp = expand_mult_const (mode, op0, coeff, NULL_RTX,
3360                                             &algorithm, variant);
3361               return expand_unop (mode, neg_optab, temp, target, 0);
3362             }
3363           goto skip_synth;
3364         }
3365
3366       /* Exclude cost of op0 from max_cost to match the cost
3367          calculation of the synth_mult.  */
3368       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), mode, speed);
3369       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3370         return expand_mult_const (mode, op0, coeff, target,
3371                                   &algorithm, variant);
3372     }
3373  skip_synth:
3374
3375   /* Expand x*2.0 as x+x.  */
3376   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1)
3377       && real_equal (CONST_DOUBLE_REAL_VALUE (scalar_op1), &dconst2))
3378     {
3379       op0 = force_reg (GET_MODE (op0), op0);
3380       return expand_binop (mode, add_optab, op0, op0,
3381                            target, unsignedp, OPTAB_LIB_WIDEN);
3382     }
3383
3384   /* This used to use umul_optab if unsigned, but for non-widening multiply
3385      there is no difference between signed and unsigned.  */
3386   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3387                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3388   gcc_assert (op0);
3389   return op0;
3390 }
3391
3392 /* Return a cost estimate for multiplying a register by the given
3393    COEFFicient in the given MODE and SPEED.  */
3394
3395 int
3396 mult_by_coeff_cost (HOST_WIDE_INT coeff, machine_mode mode, bool speed)
3397 {
3398   int max_cost;
3399   struct algorithm algorithm;
3400   enum mult_variant variant;
3401
3402   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3403   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg),
3404                            mode, speed);
3405   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3406     return algorithm.cost.cost;
3407   else
3408     return max_cost;
3409 }
3410
3411 /* Perform a widening multiplication and return an rtx for the result.
3412    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3413    TARGET is a suggestion for where to store the result (an rtx).
3414    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3415    or smul_widen_optab.
3416
3417    We check specially for a constant integer as OP1, comparing the
3418    cost of a widening multiply against the cost of a sequence of shifts
3419    and adds.  */
3420
3421 rtx
3422 expand_widening_mult (machine_mode mode, rtx op0, rtx op1, rtx target,
3423                       int unsignedp, optab this_optab)
3424 {
3425   bool speed = optimize_insn_for_speed_p ();
3426   rtx cop1;
3427
3428   if (CONST_INT_P (op1)
3429       && GET_MODE (op0) != VOIDmode
3430       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3431                                 this_optab == umul_widen_optab))
3432       && CONST_INT_P (cop1)
3433       && (INTVAL (cop1) >= 0
3434           || HWI_COMPUTABLE_MODE_P (mode)))
3435     {
3436       HOST_WIDE_INT coeff = INTVAL (cop1);
3437       int max_cost;
3438       enum mult_variant variant;
3439       struct algorithm algorithm;
3440
3441       if (coeff == 0)
3442         return CONST0_RTX (mode);
3443
3444       /* Special case powers of two.  */
3445       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3446         {
3447           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3448           return expand_shift (LSHIFT_EXPR, mode, op0,
3449                                floor_log2 (coeff), target, unsignedp);
3450         }
3451
3452       /* Exclude cost of op0 from max_cost to match the cost
3453          calculation of the synth_mult.  */
3454       max_cost = mul_widen_cost (speed, mode);
3455       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3456                                max_cost))
3457         {
3458           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3459           return expand_mult_const (mode, op0, coeff, target,
3460                                     &algorithm, variant);
3461         }
3462     }
3463   return expand_binop (mode, this_optab, op0, op1, target,
3464                        unsignedp, OPTAB_LIB_WIDEN);
3465 }
3466 \f
3467 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3468    replace division by D, and put the least significant N bits of the result
3469    in *MULTIPLIER_PTR and return the most significant bit.
3470
3471    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3472    needed precision is in PRECISION (should be <= N).
3473
3474    PRECISION should be as small as possible so this function can choose
3475    multiplier more freely.
3476
3477    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3478    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3479
3480    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3481    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3482
3483 unsigned HOST_WIDE_INT
3484 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3485                    unsigned HOST_WIDE_INT *multiplier_ptr,
3486                    int *post_shift_ptr, int *lgup_ptr)
3487 {
3488   int lgup, post_shift;
3489   int pow, pow2;
3490
3491   /* lgup = ceil(log2(divisor)); */
3492   lgup = ceil_log2 (d);
3493
3494   gcc_assert (lgup <= n);
3495
3496   pow = n + lgup;
3497   pow2 = n + lgup - precision;
3498
3499   /* mlow = 2^(N + lgup)/d */
3500   wide_int val = wi::set_bit_in_zero (pow, HOST_BITS_PER_DOUBLE_INT);
3501   wide_int mlow = wi::udiv_trunc (val, d);
3502
3503   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3504   val |= wi::set_bit_in_zero (pow2, HOST_BITS_PER_DOUBLE_INT);
3505   wide_int mhigh = wi::udiv_trunc (val, d);
3506
3507   /* If precision == N, then mlow, mhigh exceed 2^N
3508      (but they do not exceed 2^(N+1)).  */
3509
3510   /* Reduce to lowest terms.  */
3511   for (post_shift = lgup; post_shift > 0; post_shift--)
3512     {
3513       unsigned HOST_WIDE_INT ml_lo = wi::extract_uhwi (mlow, 1,
3514                                                        HOST_BITS_PER_WIDE_INT);
3515       unsigned HOST_WIDE_INT mh_lo = wi::extract_uhwi (mhigh, 1,
3516                                                        HOST_BITS_PER_WIDE_INT);
3517       if (ml_lo >= mh_lo)
3518         break;
3519
3520       mlow = wi::uhwi (ml_lo, HOST_BITS_PER_DOUBLE_INT);
3521       mhigh = wi::uhwi (mh_lo, HOST_BITS_PER_DOUBLE_INT);
3522     }
3523
3524   *post_shift_ptr = post_shift;
3525   *lgup_ptr = lgup;
3526   if (n < HOST_BITS_PER_WIDE_INT)
3527     {
3528       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3529       *multiplier_ptr = mhigh.to_uhwi () & mask;
3530       return mhigh.to_uhwi () >= mask;
3531     }
3532   else
3533     {
3534       *multiplier_ptr = mhigh.to_uhwi ();
3535       return wi::extract_uhwi (mhigh, HOST_BITS_PER_WIDE_INT, 1);
3536     }
3537 }
3538
3539 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3540    congruent to 1 (mod 2**N).  */
3541
3542 static unsigned HOST_WIDE_INT
3543 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3544 {
3545   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3546
3547   /* The algorithm notes that the choice y = x satisfies
3548      x*y == 1 mod 2^3, since x is assumed odd.
3549      Each iteration doubles the number of bits of significance in y.  */
3550
3551   unsigned HOST_WIDE_INT mask;
3552   unsigned HOST_WIDE_INT y = x;
3553   int nbit = 3;
3554
3555   mask = (n == HOST_BITS_PER_WIDE_INT
3556           ? ~(unsigned HOST_WIDE_INT) 0
3557           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3558
3559   while (nbit < n)
3560     {
3561       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3562       nbit *= 2;
3563     }
3564   return y;
3565 }
3566
3567 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3568    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3569    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3570    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3571    become signed.
3572
3573    The result is put in TARGET if that is convenient.
3574
3575    MODE is the mode of operation.  */
3576
3577 rtx
3578 expand_mult_highpart_adjust (machine_mode mode, rtx adj_operand, rtx op0,
3579                              rtx op1, rtx target, int unsignedp)
3580 {
3581   rtx tem;
3582   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3583
3584   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3585                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3586   tem = expand_and (mode, tem, op1, NULL_RTX);
3587   adj_operand
3588     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3589                      adj_operand);
3590
3591   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3592                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3593   tem = expand_and (mode, tem, op0, NULL_RTX);
3594   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3595                           target);
3596
3597   return target;
3598 }
3599
3600 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3601
3602 static rtx
3603 extract_high_half (machine_mode mode, rtx op)
3604 {
3605   machine_mode wider_mode;
3606
3607   if (mode == word_mode)
3608     return gen_highpart (mode, op);
3609
3610   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3611
3612   wider_mode = GET_MODE_WIDER_MODE (mode);
3613   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3614                      GET_MODE_BITSIZE (mode), 0, 1);
3615   return convert_modes (mode, wider_mode, op, 0);
3616 }
3617
3618 /* Like expmed_mult_highpart, but only consider using a multiplication
3619    optab.  OP1 is an rtx for the constant operand.  */
3620
3621 static rtx
3622 expmed_mult_highpart_optab (machine_mode mode, rtx op0, rtx op1,
3623                             rtx target, int unsignedp, int max_cost)
3624 {
3625   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3626   machine_mode wider_mode;
3627   optab moptab;
3628   rtx tem;
3629   int size;
3630   bool speed = optimize_insn_for_speed_p ();
3631
3632   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3633
3634   wider_mode = GET_MODE_WIDER_MODE (mode);
3635   size = GET_MODE_BITSIZE (mode);
3636
3637   /* Firstly, try using a multiplication insn that only generates the needed
3638      high part of the product, and in the sign flavor of unsignedp.  */
3639   if (mul_highpart_cost (speed, mode) < max_cost)
3640     {
3641       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3642       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3643                           unsignedp, OPTAB_DIRECT);
3644       if (tem)
3645         return tem;
3646     }
3647
3648   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3649      Need to adjust the result after the multiplication.  */
3650   if (size - 1 < BITS_PER_WORD
3651       && (mul_highpart_cost (speed, mode)
3652           + 2 * shift_cost (speed, mode, size-1)
3653           + 4 * add_cost (speed, mode) < max_cost))
3654     {
3655       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3656       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3657                           unsignedp, OPTAB_DIRECT);
3658       if (tem)
3659         /* We used the wrong signedness.  Adjust the result.  */
3660         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3661                                             tem, unsignedp);
3662     }
3663
3664   /* Try widening multiplication.  */
3665   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3666   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3667       && mul_widen_cost (speed, wider_mode) < max_cost)
3668     {
3669       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3670                           unsignedp, OPTAB_WIDEN);
3671       if (tem)
3672         return extract_high_half (mode, tem);
3673     }
3674
3675   /* Try widening the mode and perform a non-widening multiplication.  */
3676   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3677       && size - 1 < BITS_PER_WORD
3678       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3679           < max_cost))
3680     {
3681       rtx_insn *insns;
3682       rtx wop0, wop1;
3683
3684       /* We need to widen the operands, for example to ensure the
3685          constant multiplier is correctly sign or zero extended.
3686          Use a sequence to clean-up any instructions emitted by
3687          the conversions if things don't work out.  */
3688       start_sequence ();
3689       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3690       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3691       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3692                           unsignedp, OPTAB_WIDEN);
3693       insns = get_insns ();
3694       end_sequence ();
3695
3696       if (tem)
3697         {
3698           emit_insn (insns);
3699           return extract_high_half (mode, tem);
3700         }
3701     }
3702
3703   /* Try widening multiplication of opposite signedness, and adjust.  */
3704   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3705   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3706       && size - 1 < BITS_PER_WORD
3707       && (mul_widen_cost (speed, wider_mode)
3708           + 2 * shift_cost (speed, mode, size-1)
3709           + 4 * add_cost (speed, mode) < max_cost))
3710     {
3711       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3712                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3713       if (tem != 0)
3714         {
3715           tem = extract_high_half (mode, tem);
3716           /* We used the wrong signedness.  Adjust the result.  */
3717           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3718                                               target, unsignedp);
3719         }
3720     }
3721
3722   return 0;
3723 }
3724
3725 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3726    putting the high half of the result in TARGET if that is convenient,
3727    and return where the result is.  If the operation can not be performed,
3728    0 is returned.
3729
3730    MODE is the mode of operation and result.
3731
3732    UNSIGNEDP nonzero means unsigned multiply.
3733
3734    MAX_COST is the total allowed cost for the expanded RTL.  */
3735
3736 static rtx
3737 expmed_mult_highpart (machine_mode mode, rtx op0, rtx op1,
3738                       rtx target, int unsignedp, int max_cost)
3739 {
3740   machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3741   unsigned HOST_WIDE_INT cnst1;
3742   int extra_cost;
3743   bool sign_adjust = false;
3744   enum mult_variant variant;
3745   struct algorithm alg;
3746   rtx tem;
3747   bool speed = optimize_insn_for_speed_p ();
3748
3749   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3750   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3751   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3752
3753   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3754
3755   /* We can't optimize modes wider than BITS_PER_WORD.
3756      ??? We might be able to perform double-word arithmetic if
3757      mode == word_mode, however all the cost calculations in
3758      synth_mult etc. assume single-word operations.  */
3759   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3760     return expmed_mult_highpart_optab (mode, op0, op1, target,
3761                                        unsignedp, max_cost);
3762
3763   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3764
3765   /* Check whether we try to multiply by a negative constant.  */
3766   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3767     {
3768       sign_adjust = true;
3769       extra_cost += add_cost (speed, mode);
3770     }
3771
3772   /* See whether shift/add multiplication is cheap enough.  */
3773   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3774                            max_cost - extra_cost))
3775     {
3776       /* See whether the specialized multiplication optabs are
3777          cheaper than the shift/add version.  */
3778       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3779                                         alg.cost.cost + extra_cost);
3780       if (tem)
3781         return tem;
3782
3783       tem = convert_to_mode (wider_mode, op0, unsignedp);
3784       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3785       tem = extract_high_half (mode, tem);
3786
3787       /* Adjust result for signedness.  */
3788       if (sign_adjust)
3789         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3790
3791       return tem;
3792     }
3793   return expmed_mult_highpart_optab (mode, op0, op1, target,
3794                                      unsignedp, max_cost);
3795 }
3796
3797
3798 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3799
3800 static rtx
3801 expand_smod_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3802 {
3803   rtx result, temp, shift;
3804   rtx_code_label *label;
3805   int logd;
3806   int prec = GET_MODE_PRECISION (mode);
3807
3808   logd = floor_log2 (d);
3809   result = gen_reg_rtx (mode);
3810
3811   /* Avoid conditional branches when they're expensive.  */
3812   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3813       && optimize_insn_for_speed_p ())
3814     {
3815       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3816                                       mode, 0, -1);
3817       if (signmask)
3818         {
3819           HOST_WIDE_INT masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3820           signmask = force_reg (mode, signmask);
3821           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3822
3823           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3824              which instruction sequence to use.  If logical right shifts
3825              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3826              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3827
3828           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3829           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3830               || (set_src_cost (temp, mode, optimize_insn_for_speed_p ())
3831                   > COSTS_N_INSNS (2)))
3832             {
3833               temp = expand_binop (mode, xor_optab, op0, signmask,
3834                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3835               temp = expand_binop (mode, sub_optab, temp, signmask,
3836                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3837               temp = expand_binop (mode, and_optab, temp,
3838                                    gen_int_mode (masklow, mode),
3839                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3840               temp = expand_binop (mode, xor_optab, temp, signmask,
3841                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3842               temp = expand_binop (mode, sub_optab, temp, signmask,
3843                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3844             }
3845           else
3846             {
3847               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3848                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3849               signmask = force_reg (mode, signmask);
3850
3851               temp = expand_binop (mode, add_optab, op0, signmask,
3852                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3853               temp = expand_binop (mode, and_optab, temp,
3854                                    gen_int_mode (masklow, mode),
3855                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3856               temp = expand_binop (mode, sub_optab, temp, signmask,
3857                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3858             }
3859           return temp;
3860         }
3861     }
3862
3863   /* Mask contains the mode's signbit and the significant bits of the
3864      modulus.  By including the signbit in the operation, many targets
3865      can avoid an explicit compare operation in the following comparison
3866      against zero.  */
3867   wide_int mask = wi::mask (logd, false, prec);
3868   mask = wi::set_bit (mask, prec - 1);
3869
3870   temp = expand_binop (mode, and_optab, op0,
3871                        immed_wide_int_const (mask, mode),
3872                        result, 1, OPTAB_LIB_WIDEN);
3873   if (temp != result)
3874     emit_move_insn (result, temp);
3875
3876   label = gen_label_rtx ();
3877   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3878
3879   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3880                        0, OPTAB_LIB_WIDEN);
3881
3882   mask = wi::mask (logd, true, prec);
3883   temp = expand_binop (mode, ior_optab, temp,
3884                        immed_wide_int_const (mask, mode),
3885                        result, 1, OPTAB_LIB_WIDEN);
3886   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3887                        0, OPTAB_LIB_WIDEN);
3888   if (temp != result)
3889     emit_move_insn (result, temp);
3890   emit_label (label);
3891   return result;
3892 }
3893
3894 /* Expand signed division of OP0 by a power of two D in mode MODE.
3895    This routine is only called for positive values of D.  */
3896
3897 static rtx
3898 expand_sdiv_pow2 (machine_mode mode, rtx op0, HOST_WIDE_INT d)
3899 {
3900   rtx temp;
3901   rtx_code_label *label;
3902   int logd;
3903
3904   logd = floor_log2 (d);
3905
3906   if (d == 2
3907       && BRANCH_COST (optimize_insn_for_speed_p (),
3908                       false) >= 1)
3909     {
3910       temp = gen_reg_rtx (mode);
3911       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3912       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3913                            0, OPTAB_LIB_WIDEN);
3914       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3915     }
3916
3917   if (HAVE_conditional_move
3918       && BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2)
3919     {
3920       rtx temp2;
3921
3922       start_sequence ();
3923       temp2 = copy_to_mode_reg (mode, op0);
3924       temp = expand_binop (mode, add_optab, temp2, gen_int_mode (d - 1, mode),
3925                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3926       temp = force_reg (mode, temp);
3927
3928       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3929       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3930                                      mode, temp, temp2, mode, 0);
3931       if (temp2)
3932         {
3933           rtx_insn *seq = get_insns ();
3934           end_sequence ();
3935           emit_insn (seq);
3936           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3937         }
3938       end_sequence ();
3939     }
3940
3941   if (BRANCH_COST (optimize_insn_for_speed_p (),
3942                    false) >= 2)
3943     {
3944       int ushift = GET_MODE_BITSIZE (mode) - logd;
3945
3946       temp = gen_reg_rtx (mode);
3947       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3948       if (GET_MODE_BITSIZE (mode) >= BITS_PER_WORD
3949           || shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3950              > COSTS_N_INSNS (1))
3951         temp = expand_binop (mode, and_optab, temp, gen_int_mode (d - 1, mode),
3952                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3953       else
3954         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3955                              ushift, NULL_RTX, 1);
3956       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3957                            0, OPTAB_LIB_WIDEN);
3958       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3959     }
3960
3961   label = gen_label_rtx ();
3962   temp = copy_to_mode_reg (mode, op0);
3963   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3964   expand_inc (temp, gen_int_mode (d - 1, mode));
3965   emit_label (label);
3966   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3967 }
3968 \f
3969 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3970    if that is convenient, and returning where the result is.
3971    You may request either the quotient or the remainder as the result;
3972    specify REM_FLAG nonzero to get the remainder.
3973
3974    CODE is the expression code for which kind of division this is;
3975    it controls how rounding is done.  MODE is the machine mode to use.
3976    UNSIGNEDP nonzero means do unsigned division.  */
3977
3978 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3979    and then correct it by or'ing in missing high bits
3980    if result of ANDI is nonzero.
3981    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3982    This could optimize to a bfexts instruction.
3983    But C doesn't use these operations, so their optimizations are
3984    left for later.  */
3985 /* ??? For modulo, we don't actually need the highpart of the first product,
3986    the low part will do nicely.  And for small divisors, the second multiply
3987    can also be a low-part only multiply or even be completely left out.
3988    E.g. to calculate the remainder of a division by 3 with a 32 bit
3989    multiply, multiply with 0x55555556 and extract the upper two bits;
3990    the result is exact for inputs up to 0x1fffffff.
3991    The input range can be reduced by using cross-sum rules.
3992    For odd divisors >= 3, the following table gives right shift counts
3993    so that if a number is shifted by an integer multiple of the given
3994    amount, the remainder stays the same:
3995    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3996    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3997    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3998    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3999    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
4000
4001    Cross-sum rules for even numbers can be derived by leaving as many bits
4002    to the right alone as the divisor has zeros to the right.
4003    E.g. if x is an unsigned 32 bit number:
4004    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
4005    */
4006
4007 rtx
4008 expand_divmod (int rem_flag, enum tree_code code, machine_mode mode,
4009                rtx op0, rtx op1, rtx target, int unsignedp)
4010 {
4011   machine_mode compute_mode;
4012   rtx tquotient;
4013   rtx quotient = 0, remainder = 0;
4014   rtx_insn *last;
4015   int size;
4016   rtx_insn *insn;
4017   optab optab1, optab2;
4018   int op1_is_constant, op1_is_pow2 = 0;
4019   int max_cost, extra_cost;
4020   static HOST_WIDE_INT last_div_const = 0;
4021   bool speed = optimize_insn_for_speed_p ();
4022
4023   op1_is_constant = CONST_INT_P (op1);
4024   if (op1_is_constant)
4025     {
4026       wide_int ext_op1 = rtx_mode_t (op1, mode);
4027       op1_is_pow2 = (wi::popcount (ext_op1) == 1
4028                      || (! unsignedp
4029                          && wi::popcount (wi::neg (ext_op1)) == 1));
4030     }
4031
4032   /*
4033      This is the structure of expand_divmod:
4034
4035      First comes code to fix up the operands so we can perform the operations
4036      correctly and efficiently.
4037
4038      Second comes a switch statement with code specific for each rounding mode.
4039      For some special operands this code emits all RTL for the desired
4040      operation, for other cases, it generates only a quotient and stores it in
4041      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
4042      to indicate that it has not done anything.
4043
4044      Last comes code that finishes the operation.  If QUOTIENT is set and
4045      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
4046      QUOTIENT is not set, it is computed using trunc rounding.
4047
4048      We try to generate special code for division and remainder when OP1 is a
4049      constant.  If |OP1| = 2**n we can use shifts and some other fast
4050      operations.  For other values of OP1, we compute a carefully selected
4051      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
4052      by m.
4053
4054      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
4055      half of the product.  Different strategies for generating the product are
4056      implemented in expmed_mult_highpart.
4057
4058      If what we actually want is the remainder, we generate that by another
4059      by-constant multiplication and a subtraction.  */
4060
4061   /* We shouldn't be called with OP1 == const1_rtx, but some of the
4062      code below will malfunction if we are, so check here and handle
4063      the special case if so.  */
4064   if (op1 == const1_rtx)
4065     return rem_flag ? const0_rtx : op0;
4066
4067     /* When dividing by -1, we could get an overflow.
4068      negv_optab can handle overflows.  */
4069   if (! unsignedp && op1 == constm1_rtx)
4070     {
4071       if (rem_flag)
4072         return const0_rtx;
4073       return expand_unop (mode, flag_trapv && GET_MODE_CLASS (mode) == MODE_INT
4074                           ? negv_optab : neg_optab, op0, target, 0);
4075     }
4076
4077   if (target
4078       /* Don't use the function value register as a target
4079          since we have to read it as well as write it,
4080          and function-inlining gets confused by this.  */
4081       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
4082           /* Don't clobber an operand while doing a multi-step calculation.  */
4083           || ((rem_flag || op1_is_constant)
4084               && (reg_mentioned_p (target, op0)
4085                   || (MEM_P (op0) && MEM_P (target))))
4086           || reg_mentioned_p (target, op1)
4087           || (MEM_P (op1) && MEM_P (target))))
4088     target = 0;
4089
4090   /* Get the mode in which to perform this computation.  Normally it will
4091      be MODE, but sometimes we can't do the desired operation in MODE.
4092      If so, pick a wider mode in which we can do the operation.  Convert
4093      to that mode at the start to avoid repeated conversions.
4094
4095      First see what operations we need.  These depend on the expression
4096      we are evaluating.  (We assume that divxx3 insns exist under the
4097      same conditions that modxx3 insns and that these insns don't normally
4098      fail.  If these assumptions are not correct, we may generate less
4099      efficient code in some cases.)
4100
4101      Then see if we find a mode in which we can open-code that operation
4102      (either a division, modulus, or shift).  Finally, check for the smallest
4103      mode for which we can do the operation with a library call.  */
4104
4105   /* We might want to refine this now that we have division-by-constant
4106      optimization.  Since expmed_mult_highpart tries so many variants, it is
4107      not straightforward to generalize this.  Maybe we should make an array
4108      of possible modes in init_expmed?  Save this for GCC 2.7.  */
4109
4110   optab1 = (op1_is_pow2
4111             ? (unsignedp ? lshr_optab : ashr_optab)
4112             : (unsignedp ? udiv_optab : sdiv_optab));
4113   optab2 = (op1_is_pow2 ? optab1
4114             : (unsignedp ? udivmod_optab : sdivmod_optab));
4115
4116   for (compute_mode = mode; compute_mode != VOIDmode;
4117        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4118     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4119         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4120       break;
4121
4122   if (compute_mode == VOIDmode)
4123     for (compute_mode = mode; compute_mode != VOIDmode;
4124          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4125       if (optab_libfunc (optab1, compute_mode)
4126           || optab_libfunc (optab2, compute_mode))
4127         break;
4128
4129   /* If we still couldn't find a mode, use MODE, but expand_binop will
4130      probably die.  */
4131   if (compute_mode == VOIDmode)
4132     compute_mode = mode;
4133
4134   if (target && GET_MODE (target) == compute_mode)
4135     tquotient = target;
4136   else
4137     tquotient = gen_reg_rtx (compute_mode);
4138
4139   size = GET_MODE_BITSIZE (compute_mode);
4140 #if 0
4141   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4142      (mode), and thereby get better code when OP1 is a constant.  Do that
4143      later.  It will require going over all usages of SIZE below.  */
4144   size = GET_MODE_BITSIZE (mode);
4145 #endif
4146
4147   /* Only deduct something for a REM if the last divide done was
4148      for a different constant.   Then set the constant of the last
4149      divide.  */
4150   max_cost = (unsignedp
4151               ? udiv_cost (speed, compute_mode)
4152               : sdiv_cost (speed, compute_mode));
4153   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4154                      && INTVAL (op1) == last_div_const))
4155     max_cost -= (mul_cost (speed, compute_mode)
4156                  + add_cost (speed, compute_mode));
4157
4158   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4159
4160   /* Now convert to the best mode to use.  */
4161   if (compute_mode != mode)
4162     {
4163       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4164       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4165
4166       /* convert_modes may have placed op1 into a register, so we
4167          must recompute the following.  */
4168       op1_is_constant = CONST_INT_P (op1);
4169       if (op1_is_constant)
4170         {
4171           wide_int ext_op1 = rtx_mode_t (op1, compute_mode);
4172           op1_is_pow2 = (wi::popcount (ext_op1) == 1
4173                          || (! unsignedp
4174                              && wi::popcount (wi::neg (ext_op1)) == 1));
4175         }
4176       else
4177         op1_is_pow2 = 0;
4178     }
4179
4180   /* If one of the operands is a volatile MEM, copy it into a register.  */
4181
4182   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4183     op0 = force_reg (compute_mode, op0);
4184   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4185     op1 = force_reg (compute_mode, op1);
4186
4187   /* If we need the remainder or if OP1 is constant, we need to
4188      put OP0 in a register in case it has any queued subexpressions.  */
4189   if (rem_flag || op1_is_constant)
4190     op0 = force_reg (compute_mode, op0);
4191
4192   last = get_last_insn ();
4193
4194   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4195   if (unsignedp)
4196     {
4197       if (code == FLOOR_DIV_EXPR)
4198         code = TRUNC_DIV_EXPR;
4199       if (code == FLOOR_MOD_EXPR)
4200         code = TRUNC_MOD_EXPR;
4201       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4202         code = TRUNC_DIV_EXPR;
4203     }
4204
4205   if (op1 != const0_rtx)
4206     switch (code)
4207       {
4208       case TRUNC_MOD_EXPR:
4209       case TRUNC_DIV_EXPR:
4210         if (op1_is_constant)
4211           {
4212             if (unsignedp)
4213               {
4214                 unsigned HOST_WIDE_INT mh, ml;
4215                 int pre_shift, post_shift;
4216                 int dummy;
4217                 wide_int wd = rtx_mode_t (op1, compute_mode);
4218                 unsigned HOST_WIDE_INT d = wd.to_uhwi ();
4219
4220                 if (wi::popcount (wd) == 1)
4221                   {
4222                     pre_shift = floor_log2 (d);
4223                     if (rem_flag)
4224                       {
4225                         unsigned HOST_WIDE_INT mask
4226                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4227                         remainder
4228                           = expand_binop (compute_mode, and_optab, op0,
4229                                           gen_int_mode (mask, compute_mode),
4230                                           remainder, 1,
4231                                           OPTAB_LIB_WIDEN);
4232                         if (remainder)
4233                           return gen_lowpart (mode, remainder);
4234                       }
4235                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4236                                              pre_shift, tquotient, 1);
4237                   }
4238                 else if (size <= HOST_BITS_PER_WIDE_INT)
4239                   {
4240                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4241                       {
4242                         /* Most significant bit of divisor is set; emit an scc
4243                            insn.  */
4244                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4245                                                           compute_mode, 1, 1);
4246                       }
4247                     else
4248                       {
4249                         /* Find a suitable multiplier and right shift count
4250                            instead of multiplying with D.  */
4251
4252                         mh = choose_multiplier (d, size, size,
4253                                                 &ml, &post_shift, &dummy);
4254
4255                         /* If the suggested multiplier is more than SIZE bits,
4256                            we can do better for even divisors, using an
4257                            initial right shift.  */
4258                         if (mh != 0 && (d & 1) == 0)
4259                           {
4260                             pre_shift = floor_log2 (d & -d);
4261                             mh = choose_multiplier (d >> pre_shift, size,
4262                                                     size - pre_shift,
4263                                                     &ml, &post_shift, &dummy);
4264                             gcc_assert (!mh);
4265                           }
4266                         else
4267                           pre_shift = 0;
4268
4269                         if (mh != 0)
4270                           {
4271                             rtx t1, t2, t3, t4;
4272
4273                             if (post_shift - 1 >= BITS_PER_WORD)
4274                               goto fail1;
4275
4276                             extra_cost
4277                               = (shift_cost (speed, compute_mode, post_shift - 1)
4278                                  + shift_cost (speed, compute_mode, 1)
4279                                  + 2 * add_cost (speed, compute_mode));
4280                             t1 = expmed_mult_highpart
4281                               (compute_mode, op0,
4282                                gen_int_mode (ml, compute_mode),
4283                                NULL_RTX, 1, max_cost - extra_cost);
4284                             if (t1 == 0)
4285                               goto fail1;
4286                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4287                                                                op0, t1),
4288                                                 NULL_RTX);
4289                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4290                                                t2, 1, NULL_RTX, 1);
4291                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4292                                                               t1, t3),
4293                                                 NULL_RTX);
4294                             quotient = expand_shift
4295                               (RSHIFT_EXPR, compute_mode, t4,
4296                                post_shift - 1, tquotient, 1);
4297                           }
4298                         else
4299                           {
4300                             rtx t1, t2;
4301
4302                             if (pre_shift >= BITS_PER_WORD
4303                                 || post_shift >= BITS_PER_WORD)
4304                               goto fail1;
4305
4306                             t1 = expand_shift
4307                               (RSHIFT_EXPR, compute_mode, op0,
4308                                pre_shift, NULL_RTX, 1);
4309                             extra_cost
4310                               = (shift_cost (speed, compute_mode, pre_shift)
4311                                  + shift_cost (speed, compute_mode, post_shift));
4312                             t2 = expmed_mult_highpart
4313                               (compute_mode, t1,
4314                                gen_int_mode (ml, compute_mode),
4315                                NULL_RTX, 1, max_cost - extra_cost);
4316                             if (t2 == 0)
4317                               goto fail1;
4318                             quotient = expand_shift
4319                               (RSHIFT_EXPR, compute_mode, t2,
4320                                post_shift, tquotient, 1);
4321                           }
4322                       }
4323                   }
4324                 else            /* Too wide mode to use tricky code */
4325                   break;
4326
4327                 insn = get_last_insn ();
4328                 if (insn != last)
4329                   set_dst_reg_note (insn, REG_EQUAL,
4330                                     gen_rtx_UDIV (compute_mode, op0, op1),
4331                                     quotient);
4332               }
4333             else                /* TRUNC_DIV, signed */
4334               {
4335                 unsigned HOST_WIDE_INT ml;
4336                 int lgup, post_shift;
4337                 rtx mlr;
4338                 HOST_WIDE_INT d = INTVAL (op1);
4339                 unsigned HOST_WIDE_INT abs_d;
4340
4341                 /* Since d might be INT_MIN, we have to cast to
4342                    unsigned HOST_WIDE_INT before negating to avoid
4343                    undefined signed overflow.  */
4344                 abs_d = (d >= 0
4345                          ? (unsigned HOST_WIDE_INT) d
4346                          : - (unsigned HOST_WIDE_INT) d);
4347
4348                 /* n rem d = n rem -d */
4349                 if (rem_flag && d < 0)
4350                   {
4351                     d = abs_d;
4352                     op1 = gen_int_mode (abs_d, compute_mode);
4353                   }
4354
4355                 if (d == 1)
4356                   quotient = op0;
4357                 else if (d == -1)
4358                   quotient = expand_unop (compute_mode, neg_optab, op0,
4359                                           tquotient, 0);
4360                 else if (size <= HOST_BITS_PER_WIDE_INT
4361                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4362                   {
4363                     /* This case is not handled correctly below.  */
4364                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4365                                                 compute_mode, 1, 1);
4366                     if (quotient == 0)
4367                       goto fail1;
4368                   }
4369                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4370                          && (size <= HOST_BITS_PER_WIDE_INT || d >= 0)
4371                          && (rem_flag
4372                              ? smod_pow2_cheap (speed, compute_mode)
4373                              : sdiv_pow2_cheap (speed, compute_mode))
4374                          /* We assume that cheap metric is true if the
4375                             optab has an expander for this mode.  */
4376                          && ((optab_handler ((rem_flag ? smod_optab
4377                                               : sdiv_optab),
4378                                              compute_mode)
4379                               != CODE_FOR_nothing)
4380                              || (optab_handler (sdivmod_optab,
4381                                                 compute_mode)
4382                                  != CODE_FOR_nothing)))
4383                   ;
4384                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d)
4385                          && (size <= HOST_BITS_PER_WIDE_INT
4386                              || abs_d != (unsigned HOST_WIDE_INT) d))
4387                   {
4388                     if (rem_flag)
4389                       {
4390                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4391                         if (remainder)
4392                           return gen_lowpart (mode, remainder);
4393                       }
4394
4395                     if (sdiv_pow2_cheap (speed, compute_mode)
4396                         && ((optab_handler (sdiv_optab, compute_mode)
4397                              != CODE_FOR_nothing)
4398                             || (optab_handler (sdivmod_optab, compute_mode)
4399                                 != CODE_FOR_nothing)))
4400                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4401                                                 compute_mode, op0,
4402                                                 gen_int_mode (abs_d,
4403                                                               compute_mode),
4404                                                 NULL_RTX, 0);
4405                     else
4406                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4407
4408                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4409                        negate the quotient.  */
4410                     if (d < 0)
4411                       {
4412                         insn = get_last_insn ();
4413                         if (insn != last
4414                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4415                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4416                           set_dst_reg_note (insn, REG_EQUAL,
4417                                             gen_rtx_DIV (compute_mode, op0,
4418                                                          gen_int_mode
4419                                                            (abs_d,
4420                                                             compute_mode)),
4421                                             quotient);
4422
4423                         quotient = expand_unop (compute_mode, neg_optab,
4424                                                 quotient, quotient, 0);
4425                       }
4426                   }
4427                 else if (size <= HOST_BITS_PER_WIDE_INT)
4428                   {
4429                     choose_multiplier (abs_d, size, size - 1,
4430                                        &ml, &post_shift, &lgup);
4431                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4432                       {
4433                         rtx t1, t2, t3;
4434
4435                         if (post_shift >= BITS_PER_WORD
4436                             || size - 1 >= BITS_PER_WORD)
4437                           goto fail1;
4438
4439                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4440                                       + shift_cost (speed, compute_mode, size - 1)
4441                                       + add_cost (speed, compute_mode));
4442                         t1 = expmed_mult_highpart
4443                           (compute_mode, op0, gen_int_mode (ml, compute_mode),
4444                            NULL_RTX, 0, max_cost - extra_cost);
4445                         if (t1 == 0)
4446                           goto fail1;
4447                         t2 = expand_shift
4448                           (RSHIFT_EXPR, compute_mode, t1,
4449                            post_shift, NULL_RTX, 0);
4450                         t3 = expand_shift
4451                           (RSHIFT_EXPR, compute_mode, op0,
4452                            size - 1, NULL_RTX, 0);
4453                         if (d < 0)
4454                           quotient
4455                             = force_operand (gen_rtx_MINUS (compute_mode,
4456                                                             t3, t2),
4457                                              tquotient);
4458                         else
4459                           quotient
4460                             = force_operand (gen_rtx_MINUS (compute_mode,
4461                                                             t2, t3),
4462                                              tquotient);
4463                       }
4464                     else
4465                       {
4466                         rtx t1, t2, t3, t4;
4467
4468                         if (post_shift >= BITS_PER_WORD
4469                             || size - 1 >= BITS_PER_WORD)
4470                           goto fail1;
4471
4472                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4473                         mlr = gen_int_mode (ml, compute_mode);
4474                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4475                                       + shift_cost (speed, compute_mode, size - 1)
4476                                       + 2 * add_cost (speed, compute_mode));
4477                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4478                                                    NULL_RTX, 0,
4479                                                    max_cost - extra_cost);
4480                         if (t1 == 0)
4481                           goto fail1;
4482                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4483                                                           t1, op0),
4484                                             NULL_RTX);
4485                         t3 = expand_shift
4486                           (RSHIFT_EXPR, compute_mode, t2,
4487                            post_shift, NULL_RTX, 0);
4488                         t4 = expand_shift
4489                           (RSHIFT_EXPR, compute_mode, op0,
4490                            size - 1, NULL_RTX, 0);
4491                         if (d < 0)
4492                           quotient
4493                             = force_operand (gen_rtx_MINUS (compute_mode,
4494                                                             t4, t3),
4495                                              tquotient);
4496                         else
4497                           quotient
4498                             = force_operand (gen_rtx_MINUS (compute_mode,
4499                                                             t3, t4),
4500                                              tquotient);
4501                       }
4502                   }
4503                 else            /* Too wide mode to use tricky code */
4504                   break;
4505
4506                 insn = get_last_insn ();
4507                 if (insn != last)
4508                   set_dst_reg_note (insn, REG_EQUAL,
4509                                     gen_rtx_DIV (compute_mode, op0, op1),
4510                                     quotient);
4511               }
4512             break;
4513           }
4514       fail1:
4515         delete_insns_since (last);
4516         break;
4517
4518       case FLOOR_DIV_EXPR:
4519       case FLOOR_MOD_EXPR:
4520       /* We will come here only for signed operations.  */
4521         if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4522           {
4523             unsigned HOST_WIDE_INT mh, ml;
4524             int pre_shift, lgup, post_shift;
4525             HOST_WIDE_INT d = INTVAL (op1);
4526
4527             if (d > 0)
4528               {
4529                 /* We could just as easily deal with negative constants here,
4530                    but it does not seem worth the trouble for GCC 2.6.  */
4531                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4532                   {
4533                     pre_shift = floor_log2 (d);
4534                     if (rem_flag)
4535                       {
4536                         unsigned HOST_WIDE_INT mask
4537                           = ((unsigned HOST_WIDE_INT) 1 << pre_shift) - 1;
4538                         remainder = expand_binop
4539                           (compute_mode, and_optab, op0,
4540                            gen_int_mode (mask, compute_mode),
4541                            remainder, 0, OPTAB_LIB_WIDEN);
4542                         if (remainder)
4543                           return gen_lowpart (mode, remainder);
4544                       }
4545                     quotient = expand_shift
4546                       (RSHIFT_EXPR, compute_mode, op0,
4547                        pre_shift, tquotient, 0);
4548                   }
4549                 else
4550                   {
4551                     rtx t1, t2, t3, t4;
4552
4553                     mh = choose_multiplier (d, size, size - 1,
4554                                             &ml, &post_shift, &lgup);
4555                     gcc_assert (!mh);
4556
4557                     if (post_shift < BITS_PER_WORD
4558                         && size - 1 < BITS_PER_WORD)
4559                       {
4560                         t1 = expand_shift
4561                           (RSHIFT_EXPR, compute_mode, op0,
4562                            size - 1, NULL_RTX, 0);
4563                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4564                                            NULL_RTX, 0, OPTAB_WIDEN);
4565                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4566                                       + shift_cost (speed, compute_mode, size - 1)
4567                                       + 2 * add_cost (speed, compute_mode));
4568                         t3 = expmed_mult_highpart
4569                           (compute_mode, t2, gen_int_mode (ml, compute_mode),
4570                            NULL_RTX, 1, max_cost - extra_cost);
4571                         if (t3 != 0)
4572                           {
4573                             t4 = expand_shift
4574                               (RSHIFT_EXPR, compute_mode, t3,
4575                                post_shift, NULL_RTX, 1);
4576                             quotient = expand_binop (compute_mode, xor_optab,
4577                                                      t4, t1, tquotient, 0,
4578                                                      OPTAB_WIDEN);
4579                           }
4580                       }
4581                   }
4582               }
4583             else
4584               {
4585                 rtx nsign, t1, t2, t3, t4;
4586                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4587                                                   op0, constm1_rtx), NULL_RTX);
4588                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4589                                    0, OPTAB_WIDEN);
4590                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
4591                                       size - 1, NULL_RTX, 0);
4592                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4593                                     NULL_RTX);
4594                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4595                                     NULL_RTX, 0);
4596                 if (t4)
4597                   {
4598                     rtx t5;
4599                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4600                                       NULL_RTX, 0);
4601                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4602                                                             t4, t5),
4603                                               tquotient);
4604                   }
4605               }
4606           }
4607
4608         if (quotient != 0)
4609           break;
4610         delete_insns_since (last);
4611
4612         /* Try using an instruction that produces both the quotient and
4613            remainder, using truncation.  We can easily compensate the quotient
4614            or remainder to get floor rounding, once we have the remainder.
4615            Notice that we compute also the final remainder value here,
4616            and return the result right away.  */
4617         if (target == 0 || GET_MODE (target) != compute_mode)
4618           target = gen_reg_rtx (compute_mode);
4619
4620         if (rem_flag)
4621           {
4622             remainder
4623               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4624             quotient = gen_reg_rtx (compute_mode);
4625           }
4626         else
4627           {
4628             quotient
4629               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4630             remainder = gen_reg_rtx (compute_mode);
4631           }
4632
4633         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4634                                  quotient, remainder, 0))
4635           {
4636             /* This could be computed with a branch-less sequence.
4637                Save that for later.  */
4638             rtx tem;
4639             rtx_code_label *label = gen_label_rtx ();
4640             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4641             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4642                                 NULL_RTX, 0, OPTAB_WIDEN);
4643             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4644             expand_dec (quotient, const1_rtx);
4645             expand_inc (remainder, op1);
4646             emit_label (label);
4647             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4648           }
4649
4650         /* No luck with division elimination or divmod.  Have to do it
4651            by conditionally adjusting op0 *and* the result.  */
4652         {
4653           rtx_code_label *label1, *label2, *label3, *label4, *label5;
4654           rtx adjusted_op0;
4655           rtx tem;
4656
4657           quotient = gen_reg_rtx (compute_mode);
4658           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4659           label1 = gen_label_rtx ();
4660           label2 = gen_label_rtx ();
4661           label3 = gen_label_rtx ();
4662           label4 = gen_label_rtx ();
4663           label5 = gen_label_rtx ();
4664           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4665           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4666           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4667                               quotient, 0, OPTAB_LIB_WIDEN);
4668           if (tem != quotient)
4669             emit_move_insn (quotient, tem);
4670           emit_jump_insn (targetm.gen_jump (label5));
4671           emit_barrier ();
4672           emit_label (label1);
4673           expand_inc (adjusted_op0, const1_rtx);
4674           emit_jump_insn (targetm.gen_jump (label4));
4675           emit_barrier ();
4676           emit_label (label2);
4677           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4678           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4679                               quotient, 0, OPTAB_LIB_WIDEN);
4680           if (tem != quotient)
4681             emit_move_insn (quotient, tem);
4682           emit_jump_insn (targetm.gen_jump (label5));
4683           emit_barrier ();
4684           emit_label (label3);
4685           expand_dec (adjusted_op0, const1_rtx);
4686           emit_label (label4);
4687           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4688                               quotient, 0, OPTAB_LIB_WIDEN);
4689           if (tem != quotient)
4690             emit_move_insn (quotient, tem);
4691           expand_dec (quotient, const1_rtx);
4692           emit_label (label5);
4693         }
4694         break;
4695
4696       case CEIL_DIV_EXPR:
4697       case CEIL_MOD_EXPR:
4698         if (unsignedp)
4699           {
4700             if (op1_is_constant
4701                 && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4702                 && (size <= HOST_BITS_PER_WIDE_INT
4703                     || INTVAL (op1) >= 0))
4704               {
4705                 rtx t1, t2, t3;
4706                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4707                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4708                                    floor_log2 (d), tquotient, 1);
4709                 t2 = expand_binop (compute_mode, and_optab, op0,
4710                                    gen_int_mode (d - 1, compute_mode),
4711                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4712                 t3 = gen_reg_rtx (compute_mode);
4713                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4714                                       compute_mode, 1, 1);
4715                 if (t3 == 0)
4716                   {
4717                     rtx_code_label *lab;
4718                     lab = gen_label_rtx ();
4719                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4720                     expand_inc (t1, const1_rtx);
4721                     emit_label (lab);
4722                     quotient = t1;
4723                   }
4724                 else
4725                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4726                                                           t1, t3),
4727                                             tquotient);
4728                 break;
4729               }
4730
4731             /* Try using an instruction that produces both the quotient and
4732                remainder, using truncation.  We can easily compensate the
4733                quotient or remainder to get ceiling rounding, once we have the
4734                remainder.  Notice that we compute also the final remainder
4735                value here, and return the result right away.  */
4736             if (target == 0 || GET_MODE (target) != compute_mode)
4737               target = gen_reg_rtx (compute_mode);
4738
4739             if (rem_flag)
4740               {
4741                 remainder = (REG_P (target)
4742                              ? target : gen_reg_rtx (compute_mode));
4743                 quotient = gen_reg_rtx (compute_mode);
4744               }
4745             else
4746               {
4747                 quotient = (REG_P (target)
4748                             ? target : gen_reg_rtx (compute_mode));
4749                 remainder = gen_reg_rtx (compute_mode);
4750               }
4751
4752             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4753                                      remainder, 1))
4754               {
4755                 /* This could be computed with a branch-less sequence.
4756                    Save that for later.  */
4757                 rtx_code_label *label = gen_label_rtx ();
4758                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4759                                  compute_mode, label);
4760                 expand_inc (quotient, const1_rtx);
4761                 expand_dec (remainder, op1);
4762                 emit_label (label);
4763                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4764               }
4765
4766             /* No luck with division elimination or divmod.  Have to do it
4767                by conditionally adjusting op0 *and* the result.  */
4768             {
4769               rtx_code_label *label1, *label2;
4770               rtx adjusted_op0, tem;
4771
4772               quotient = gen_reg_rtx (compute_mode);
4773               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4774               label1 = gen_label_rtx ();
4775               label2 = gen_label_rtx ();
4776               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4777                                compute_mode, label1);
4778               emit_move_insn  (quotient, const0_rtx);
4779               emit_jump_insn (targetm.gen_jump (label2));
4780               emit_barrier ();
4781               emit_label (label1);
4782               expand_dec (adjusted_op0, const1_rtx);
4783               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4784                                   quotient, 1, OPTAB_LIB_WIDEN);
4785               if (tem != quotient)
4786                 emit_move_insn (quotient, tem);
4787               expand_inc (quotient, const1_rtx);
4788               emit_label (label2);
4789             }
4790           }
4791         else /* signed */
4792           {
4793             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4794                 && INTVAL (op1) >= 0)
4795               {
4796                 /* This is extremely similar to the code for the unsigned case
4797                    above.  For 2.7 we should merge these variants, but for
4798                    2.6.1 I don't want to touch the code for unsigned since that
4799                    get used in C.  The signed case will only be used by other
4800                    languages (Ada).  */
4801
4802                 rtx t1, t2, t3;
4803                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4804                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4805                                    floor_log2 (d), tquotient, 0);
4806                 t2 = expand_binop (compute_mode, and_optab, op0,
4807                                    gen_int_mode (d - 1, compute_mode),
4808                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4809                 t3 = gen_reg_rtx (compute_mode);
4810                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4811                                       compute_mode, 1, 1);
4812                 if (t3 == 0)
4813                   {
4814                     rtx_code_label *lab;
4815                     lab = gen_label_rtx ();
4816                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4817                     expand_inc (t1, const1_rtx);
4818                     emit_label (lab);
4819                     quotient = t1;
4820                   }
4821                 else
4822                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4823                                                           t1, t3),
4824                                             tquotient);
4825                 break;
4826               }
4827
4828             /* Try using an instruction that produces both the quotient and
4829                remainder, using truncation.  We can easily compensate the
4830                quotient or remainder to get ceiling rounding, once we have the
4831                remainder.  Notice that we compute also the final remainder
4832                value here, and return the result right away.  */
4833             if (target == 0 || GET_MODE (target) != compute_mode)
4834               target = gen_reg_rtx (compute_mode);
4835             if (rem_flag)
4836               {
4837                 remainder= (REG_P (target)
4838                             ? target : gen_reg_rtx (compute_mode));
4839                 quotient = gen_reg_rtx (compute_mode);
4840               }
4841             else
4842               {
4843                 quotient = (REG_P (target)
4844                             ? target : gen_reg_rtx (compute_mode));
4845                 remainder = gen_reg_rtx (compute_mode);
4846               }
4847
4848             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4849                                      remainder, 0))
4850               {
4851                 /* This could be computed with a branch-less sequence.
4852                    Save that for later.  */
4853                 rtx tem;
4854                 rtx_code_label *label = gen_label_rtx ();
4855                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4856                                  compute_mode, label);
4857                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4858                                     NULL_RTX, 0, OPTAB_WIDEN);
4859                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4860                 expand_inc (quotient, const1_rtx);
4861                 expand_dec (remainder, op1);
4862                 emit_label (label);
4863                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4864               }
4865
4866             /* No luck with division elimination or divmod.  Have to do it
4867                by conditionally adjusting op0 *and* the result.  */
4868             {
4869               rtx_code_label *label1, *label2, *label3, *label4, *label5;
4870               rtx adjusted_op0;
4871               rtx tem;
4872
4873               quotient = gen_reg_rtx (compute_mode);
4874               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4875               label1 = gen_label_rtx ();
4876               label2 = gen_label_rtx ();
4877               label3 = gen_label_rtx ();
4878               label4 = gen_label_rtx ();
4879               label5 = gen_label_rtx ();
4880               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4881               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4882                                compute_mode, label1);
4883               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4884                                   quotient, 0, OPTAB_LIB_WIDEN);
4885               if (tem != quotient)
4886                 emit_move_insn (quotient, tem);
4887               emit_jump_insn (targetm.gen_jump (label5));
4888               emit_barrier ();
4889               emit_label (label1);
4890               expand_dec (adjusted_op0, const1_rtx);
4891               emit_jump_insn (targetm.gen_jump (label4));
4892               emit_barrier ();
4893               emit_label (label2);
4894               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4895                                compute_mode, label3);
4896               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4897                                   quotient, 0, OPTAB_LIB_WIDEN);
4898               if (tem != quotient)
4899                 emit_move_insn (quotient, tem);
4900               emit_jump_insn (targetm.gen_jump (label5));
4901               emit_barrier ();
4902               emit_label (label3);
4903               expand_inc (adjusted_op0, const1_rtx);
4904               emit_label (label4);
4905               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4906                                   quotient, 0, OPTAB_LIB_WIDEN);
4907               if (tem != quotient)
4908                 emit_move_insn (quotient, tem);
4909               expand_inc (quotient, const1_rtx);
4910               emit_label (label5);
4911             }
4912           }
4913         break;
4914
4915       case EXACT_DIV_EXPR:
4916         if (op1_is_constant && size <= HOST_BITS_PER_WIDE_INT)
4917           {
4918             HOST_WIDE_INT d = INTVAL (op1);
4919             unsigned HOST_WIDE_INT ml;
4920             int pre_shift;
4921             rtx t1;
4922
4923             pre_shift = floor_log2 (d & -d);
4924             ml = invert_mod2n (d >> pre_shift, size);
4925             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4926                                pre_shift, NULL_RTX, unsignedp);
4927             quotient = expand_mult (compute_mode, t1,
4928                                     gen_int_mode (ml, compute_mode),
4929                                     NULL_RTX, 1);
4930
4931             insn = get_last_insn ();
4932             set_dst_reg_note (insn, REG_EQUAL,
4933                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4934                                               compute_mode, op0, op1),
4935                               quotient);
4936           }
4937         break;
4938
4939       case ROUND_DIV_EXPR:
4940       case ROUND_MOD_EXPR:
4941         if (unsignedp)
4942           {
4943             rtx tem;
4944             rtx_code_label *label;
4945             label = gen_label_rtx ();
4946             quotient = gen_reg_rtx (compute_mode);
4947             remainder = gen_reg_rtx (compute_mode);
4948             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4949               {
4950                 rtx tem;
4951                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4952                                          quotient, 1, OPTAB_LIB_WIDEN);
4953                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4954                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4955                                           remainder, 1, OPTAB_LIB_WIDEN);
4956               }
4957             tem = plus_constant (compute_mode, op1, -1);
4958             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4959             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4960             expand_inc (quotient, const1_rtx);
4961             expand_dec (remainder, op1);
4962             emit_label (label);
4963           }
4964         else
4965           {
4966             rtx abs_rem, abs_op1, tem, mask;
4967             rtx_code_label *label;
4968             label = gen_label_rtx ();
4969             quotient = gen_reg_rtx (compute_mode);
4970             remainder = gen_reg_rtx (compute_mode);
4971             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4972               {
4973                 rtx tem;
4974                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4975                                          quotient, 0, OPTAB_LIB_WIDEN);
4976                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4977                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4978                                           remainder, 0, OPTAB_LIB_WIDEN);
4979               }
4980             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4981             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4982             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4983                                 1, NULL_RTX, 1);
4984             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4985             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4986                                 NULL_RTX, 0, OPTAB_WIDEN);
4987             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4988                                  size - 1, NULL_RTX, 0);
4989             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4990                                 NULL_RTX, 0, OPTAB_WIDEN);
4991             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4992                                 NULL_RTX, 0, OPTAB_WIDEN);
4993             expand_inc (quotient, tem);
4994             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4995                                 NULL_RTX, 0, OPTAB_WIDEN);
4996             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4997                                 NULL_RTX, 0, OPTAB_WIDEN);
4998             expand_dec (remainder, tem);
4999             emit_label (label);
5000           }
5001         return gen_lowpart (mode, rem_flag ? remainder : quotient);
5002
5003       default:
5004         gcc_unreachable ();
5005       }
5006
5007   if (quotient == 0)
5008     {
5009       if (target && GET_MODE (target) != compute_mode)
5010         target = 0;
5011
5012       if (rem_flag)
5013         {
5014           /* Try to produce the remainder without producing the quotient.
5015              If we seem to have a divmod pattern that does not require widening,
5016              don't try widening here.  We should really have a WIDEN argument
5017              to expand_twoval_binop, since what we'd really like to do here is
5018              1) try a mod insn in compute_mode
5019              2) try a divmod insn in compute_mode
5020              3) try a div insn in compute_mode and multiply-subtract to get
5021                 remainder
5022              4) try the same things with widening allowed.  */
5023           remainder
5024             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5025                                  op0, op1, target,
5026                                  unsignedp,
5027                                  ((optab_handler (optab2, compute_mode)
5028                                    != CODE_FOR_nothing)
5029                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
5030           if (remainder == 0)
5031             {
5032               /* No luck there.  Can we do remainder and divide at once
5033                  without a library call?  */
5034               remainder = gen_reg_rtx (compute_mode);
5035               if (! expand_twoval_binop ((unsignedp
5036                                           ? udivmod_optab
5037                                           : sdivmod_optab),
5038                                          op0, op1,
5039                                          NULL_RTX, remainder, unsignedp))
5040                 remainder = 0;
5041             }
5042
5043           if (remainder)
5044             return gen_lowpart (mode, remainder);
5045         }
5046
5047       /* Produce the quotient.  Try a quotient insn, but not a library call.
5048          If we have a divmod in this mode, use it in preference to widening
5049          the div (for this test we assume it will not fail). Note that optab2
5050          is set to the one of the two optabs that the call below will use.  */
5051       quotient
5052         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
5053                              op0, op1, rem_flag ? NULL_RTX : target,
5054                              unsignedp,
5055                              ((optab_handler (optab2, compute_mode)
5056                                != CODE_FOR_nothing)
5057                               ? OPTAB_DIRECT : OPTAB_WIDEN));
5058
5059       if (quotient == 0)
5060         {
5061           /* No luck there.  Try a quotient-and-remainder insn,
5062              keeping the quotient alone.  */
5063           quotient = gen_reg_rtx (compute_mode);
5064           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
5065                                      op0, op1,
5066                                      quotient, NULL_RTX, unsignedp))
5067             {
5068               quotient = 0;
5069               if (! rem_flag)
5070                 /* Still no luck.  If we are not computing the remainder,
5071                    use a library call for the quotient.  */
5072                 quotient = sign_expand_binop (compute_mode,
5073                                               udiv_optab, sdiv_optab,
5074                                               op0, op1, target,
5075                                               unsignedp, OPTAB_LIB_WIDEN);
5076             }
5077         }
5078     }
5079
5080   if (rem_flag)
5081     {
5082       if (target && GET_MODE (target) != compute_mode)
5083         target = 0;
5084
5085       if (quotient == 0)
5086         {
5087           /* No divide instruction either.  Use library for remainder.  */
5088           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
5089                                          op0, op1, target,
5090                                          unsignedp, OPTAB_LIB_WIDEN);
5091           /* No remainder function.  Try a quotient-and-remainder
5092              function, keeping the remainder.  */
5093           if (!remainder)
5094             {
5095               remainder = gen_reg_rtx (compute_mode);
5096               if (!expand_twoval_binop_libfunc
5097                   (unsignedp ? udivmod_optab : sdivmod_optab,
5098                    op0, op1,
5099                    NULL_RTX, remainder,
5100                    unsignedp ? UMOD : MOD))
5101                 remainder = NULL_RTX;
5102             }
5103         }
5104       else
5105         {
5106           /* We divided.  Now finish doing X - Y * (X / Y).  */
5107           remainder = expand_mult (compute_mode, quotient, op1,
5108                                    NULL_RTX, unsignedp);
5109           remainder = expand_binop (compute_mode, sub_optab, op0,
5110                                     remainder, target, unsignedp,
5111                                     OPTAB_LIB_WIDEN);
5112         }
5113     }
5114
5115   return gen_lowpart (mode, rem_flag ? remainder : quotient);
5116 }
5117 \f
5118 /* Return a tree node with data type TYPE, describing the value of X.
5119    Usually this is an VAR_DECL, if there is no obvious better choice.
5120    X may be an expression, however we only support those expressions
5121    generated by loop.c.  */
5122
5123 tree
5124 make_tree (tree type, rtx x)
5125 {
5126   tree t;
5127
5128   switch (GET_CODE (x))
5129     {
5130     case CONST_INT:
5131     case CONST_WIDE_INT:
5132       t = wide_int_to_tree (type, std::make_pair (x, TYPE_MODE (type)));
5133       return t;
5134
5135     case CONST_DOUBLE:
5136       STATIC_ASSERT (HOST_BITS_PER_WIDE_INT * 2 <= MAX_BITSIZE_MODE_ANY_INT);
5137       if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (x) == VOIDmode)
5138         t = wide_int_to_tree (type,
5139                               wide_int::from_array (&CONST_DOUBLE_LOW (x), 2,
5140                                                     HOST_BITS_PER_WIDE_INT * 2));
5141       else
5142         t = build_real (type, *CONST_DOUBLE_REAL_VALUE (x));
5143
5144       return t;
5145
5146     case CONST_VECTOR:
5147       {
5148         int units = CONST_VECTOR_NUNITS (x);
5149         tree itype = TREE_TYPE (type);
5150         tree *elts;
5151         int i;
5152
5153         /* Build a tree with vector elements.  */
5154         elts = XALLOCAVEC (tree, units);
5155         for (i = units - 1; i >= 0; --i)
5156           {
5157             rtx elt = CONST_VECTOR_ELT (x, i);
5158             elts[i] = make_tree (itype, elt);
5159           }
5160
5161         return build_vector (type, elts);
5162       }
5163
5164     case PLUS:
5165       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5166                           make_tree (type, XEXP (x, 1)));
5167
5168     case MINUS:
5169       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5170                           make_tree (type, XEXP (x, 1)));
5171
5172     case NEG:
5173       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5174
5175     case MULT:
5176       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5177                           make_tree (type, XEXP (x, 1)));
5178
5179     case ASHIFT:
5180       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5181                           make_tree (type, XEXP (x, 1)));
5182
5183     case LSHIFTRT:
5184       t = unsigned_type_for (type);
5185       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5186                                          make_tree (t, XEXP (x, 0)),
5187                                          make_tree (type, XEXP (x, 1))));
5188
5189     case ASHIFTRT:
5190       t = signed_type_for (type);
5191       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5192                                          make_tree (t, XEXP (x, 0)),
5193                                          make_tree (type, XEXP (x, 1))));
5194
5195     case DIV:
5196       if (TREE_CODE (type) != REAL_TYPE)
5197         t = signed_type_for (type);
5198       else
5199         t = type;
5200
5201       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5202                                          make_tree (t, XEXP (x, 0)),
5203                                          make_tree (t, XEXP (x, 1))));
5204     case UDIV:
5205       t = unsigned_type_for (type);
5206       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5207                                          make_tree (t, XEXP (x, 0)),
5208                                          make_tree (t, XEXP (x, 1))));
5209
5210     case SIGN_EXTEND:
5211     case ZERO_EXTEND:
5212       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5213                                           GET_CODE (x) == ZERO_EXTEND);
5214       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5215
5216     case CONST:
5217       return make_tree (type, XEXP (x, 0));
5218
5219     case SYMBOL_REF:
5220       t = SYMBOL_REF_DECL (x);
5221       if (t)
5222         return fold_convert (type, build_fold_addr_expr (t));
5223       /* fall through.  */
5224
5225     default:
5226       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5227
5228       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5229          address mode to pointer mode.  */
5230       if (POINTER_TYPE_P (type))
5231         x = convert_memory_address_addr_space
5232               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5233
5234       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5235          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5236       t->decl_with_rtl.rtl = x;
5237
5238       return t;
5239     }
5240 }
5241 \f
5242 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5243    and returning TARGET.
5244
5245    If TARGET is 0, a pseudo-register or constant is returned.  */
5246
5247 rtx
5248 expand_and (machine_mode mode, rtx op0, rtx op1, rtx target)
5249 {
5250   rtx tem = 0;
5251
5252   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5253     tem = simplify_binary_operation (AND, mode, op0, op1);
5254   if (tem == 0)
5255     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5256
5257   if (target == 0)
5258     target = tem;
5259   else if (tem != target)
5260     emit_move_insn (target, tem);
5261   return target;
5262 }
5263
5264 /* Helper function for emit_store_flag.  */
5265 rtx
5266 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5267              machine_mode mode, machine_mode compare_mode,
5268              int unsignedp, rtx x, rtx y, int normalizep,
5269              machine_mode target_mode)
5270 {
5271   struct expand_operand ops[4];
5272   rtx op0, comparison, subtarget;
5273   rtx_insn *last;
5274   machine_mode result_mode = targetm.cstore_mode (icode);
5275
5276   last = get_last_insn ();
5277   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5278   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5279   if (!x || !y)
5280     {
5281       delete_insns_since (last);
5282       return NULL_RTX;
5283     }
5284
5285   if (target_mode == VOIDmode)
5286     target_mode = result_mode;
5287   if (!target)
5288     target = gen_reg_rtx (target_mode);
5289
5290   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5291
5292   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5293   create_fixed_operand (&ops[1], comparison);
5294   create_fixed_operand (&ops[2], x);
5295   create_fixed_operand (&ops[3], y);
5296   if (!maybe_expand_insn (icode, 4, ops))
5297     {
5298       delete_insns_since (last);
5299       return NULL_RTX;
5300     }
5301   subtarget = ops[0].value;
5302
5303   /* If we are converting to a wider mode, first convert to
5304      TARGET_MODE, then normalize.  This produces better combining
5305      opportunities on machines that have a SIGN_EXTRACT when we are
5306      testing a single bit.  This mostly benefits the 68k.
5307
5308      If STORE_FLAG_VALUE does not have the sign bit set when
5309      interpreted in MODE, we can do this conversion as unsigned, which
5310      is usually more efficient.  */
5311   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5312     {
5313       convert_move (target, subtarget,
5314                     val_signbit_known_clear_p (result_mode,
5315                                                STORE_FLAG_VALUE));
5316       op0 = target;
5317       result_mode = target_mode;
5318     }
5319   else
5320     op0 = subtarget;
5321
5322   /* If we want to keep subexpressions around, don't reuse our last
5323      target.  */
5324   if (optimize)
5325     subtarget = 0;
5326
5327   /* Now normalize to the proper value in MODE.  Sometimes we don't
5328      have to do anything.  */
5329   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5330     ;
5331   /* STORE_FLAG_VALUE might be the most negative number, so write
5332      the comparison this way to avoid a compiler-time warning.  */
5333   else if (- normalizep == STORE_FLAG_VALUE)
5334     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5335
5336   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5337      it hard to use a value of just the sign bit due to ANSI integer
5338      constant typing rules.  */
5339   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5340     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5341                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5342                         normalizep == 1);
5343   else
5344     {
5345       gcc_assert (STORE_FLAG_VALUE & 1);
5346
5347       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5348       if (normalizep == -1)
5349         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5350     }
5351
5352   /* If we were converting to a smaller mode, do the conversion now.  */
5353   if (target_mode != result_mode)
5354     {
5355       convert_move (target, op0, 0);
5356       return target;
5357     }
5358   else
5359     return op0;
5360 }
5361
5362
5363 /* A subroutine of emit_store_flag only including "tricks" that do not
5364    need a recursive call.  These are kept separate to avoid infinite
5365    loops.  */
5366
5367 static rtx
5368 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5369                    machine_mode mode, int unsignedp, int normalizep,
5370                    machine_mode target_mode)
5371 {
5372   rtx subtarget;
5373   enum insn_code icode;
5374   machine_mode compare_mode;
5375   enum mode_class mclass;
5376   enum rtx_code scode;
5377
5378   if (unsignedp)
5379     code = unsigned_condition (code);
5380   scode = swap_condition (code);
5381
5382   /* If one operand is constant, make it the second one.  Only do this
5383      if the other operand is not constant as well.  */
5384
5385   if (swap_commutative_operands_p (op0, op1))
5386     {
5387       std::swap (op0, op1);
5388       code = swap_condition (code);
5389     }
5390
5391   if (mode == VOIDmode)
5392     mode = GET_MODE (op0);
5393
5394   /* For some comparisons with 1 and -1, we can convert this to
5395      comparisons with zero.  This will often produce more opportunities for
5396      store-flag insns.  */
5397
5398   switch (code)
5399     {
5400     case LT:
5401       if (op1 == const1_rtx)
5402         op1 = const0_rtx, code = LE;
5403       break;
5404     case LE:
5405       if (op1 == constm1_rtx)
5406         op1 = const0_rtx, code = LT;
5407       break;
5408     case GE:
5409       if (op1 == const1_rtx)
5410         op1 = const0_rtx, code = GT;
5411       break;
5412     case GT:
5413       if (op1 == constm1_rtx)
5414         op1 = const0_rtx, code = GE;
5415       break;
5416     case GEU:
5417       if (op1 == const1_rtx)
5418         op1 = const0_rtx, code = NE;
5419       break;
5420     case LTU:
5421       if (op1 == const1_rtx)
5422         op1 = const0_rtx, code = EQ;
5423       break;
5424     default:
5425       break;
5426     }
5427
5428   /* If we are comparing a double-word integer with zero or -1, we can
5429      convert the comparison into one involving a single word.  */
5430   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5431       && GET_MODE_CLASS (mode) == MODE_INT
5432       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5433     {
5434       rtx tem;
5435       if ((code == EQ || code == NE)
5436           && (op1 == const0_rtx || op1 == constm1_rtx))
5437         {
5438           rtx op00, op01;
5439
5440           /* Do a logical OR or AND of the two words and compare the
5441              result.  */
5442           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5443           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5444           tem = expand_binop (word_mode,
5445                               op1 == const0_rtx ? ior_optab : and_optab,
5446                               op00, op01, NULL_RTX, unsignedp,
5447                               OPTAB_DIRECT);
5448
5449           if (tem != 0)
5450             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5451                                    unsignedp, normalizep);
5452         }
5453       else if ((code == LT || code == GE) && op1 == const0_rtx)
5454         {
5455           rtx op0h;
5456
5457           /* If testing the sign bit, can just test on high word.  */
5458           op0h = simplify_gen_subreg (word_mode, op0, mode,
5459                                       subreg_highpart_offset (word_mode,
5460                                                               mode));
5461           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5462                                  unsignedp, normalizep);
5463         }
5464       else
5465         tem = NULL_RTX;
5466
5467       if (tem)
5468         {
5469           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5470             return tem;
5471           if (!target)
5472             target = gen_reg_rtx (target_mode);
5473
5474           convert_move (target, tem,
5475                         !val_signbit_known_set_p (word_mode,
5476                                                   (normalizep ? normalizep
5477                                                    : STORE_FLAG_VALUE)));
5478           return target;
5479         }
5480     }
5481
5482   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5483      complement of A (for GE) and shifting the sign bit to the low bit.  */
5484   if (op1 == const0_rtx && (code == LT || code == GE)
5485       && GET_MODE_CLASS (mode) == MODE_INT
5486       && (normalizep || STORE_FLAG_VALUE == 1
5487           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5488     {
5489       subtarget = target;
5490
5491       if (!target)
5492         target_mode = mode;
5493
5494       /* If the result is to be wider than OP0, it is best to convert it
5495          first.  If it is to be narrower, it is *incorrect* to convert it
5496          first.  */
5497       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5498         {
5499           op0 = convert_modes (target_mode, mode, op0, 0);
5500           mode = target_mode;
5501         }
5502
5503       if (target_mode != mode)
5504         subtarget = 0;
5505
5506       if (code == GE)
5507         op0 = expand_unop (mode, one_cmpl_optab, op0,
5508                            ((STORE_FLAG_VALUE == 1 || normalizep)
5509                             ? 0 : subtarget), 0);
5510
5511       if (STORE_FLAG_VALUE == 1 || normalizep)
5512         /* If we are supposed to produce a 0/1 value, we want to do
5513            a logical shift from the sign bit to the low-order bit; for
5514            a -1/0 value, we do an arithmetic shift.  */
5515         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5516                             GET_MODE_BITSIZE (mode) - 1,
5517                             subtarget, normalizep != -1);
5518
5519       if (mode != target_mode)
5520         op0 = convert_modes (target_mode, mode, op0, 0);
5521
5522       return op0;
5523     }
5524
5525   mclass = GET_MODE_CLASS (mode);
5526   for (compare_mode = mode; compare_mode != VOIDmode;
5527        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5528     {
5529      machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5530      icode = optab_handler (cstore_optab, optab_mode);
5531      if (icode != CODE_FOR_nothing)
5532         {
5533           do_pending_stack_adjust ();
5534           rtx tem = emit_cstore (target, icode, code, mode, compare_mode,
5535                                  unsignedp, op0, op1, normalizep, target_mode);
5536           if (tem)
5537             return tem;
5538
5539           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5540             {
5541               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5542                                  unsignedp, op1, op0, normalizep, target_mode);
5543               if (tem)
5544                 return tem;
5545             }
5546           break;
5547         }
5548     }
5549
5550   return 0;
5551 }
5552
5553 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5554    and storing in TARGET.  Normally return TARGET.
5555    Return 0 if that cannot be done.
5556
5557    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5558    it is VOIDmode, they cannot both be CONST_INT.
5559
5560    UNSIGNEDP is for the case where we have to widen the operands
5561    to perform the operation.  It says to use zero-extension.
5562
5563    NORMALIZEP is 1 if we should convert the result to be either zero
5564    or one.  Normalize is -1 if we should convert the result to be
5565    either zero or -1.  If NORMALIZEP is zero, the result will be left
5566    "raw" out of the scc insn.  */
5567
5568 rtx
5569 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5570                  machine_mode mode, int unsignedp, int normalizep)
5571 {
5572   machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5573   enum rtx_code rcode;
5574   rtx subtarget;
5575   rtx tem, trueval;
5576   rtx_insn *last;
5577
5578   /* If we compare constants, we shouldn't use a store-flag operation,
5579      but a constant load.  We can get there via the vanilla route that
5580      usually generates a compare-branch sequence, but will in this case
5581      fold the comparison to a constant, and thus elide the branch.  */
5582   if (CONSTANT_P (op0) && CONSTANT_P (op1))
5583     return NULL_RTX;
5584
5585   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5586                            target_mode);
5587   if (tem)
5588     return tem;
5589
5590   /* If we reached here, we can't do this with a scc insn, however there
5591      are some comparisons that can be done in other ways.  Don't do any
5592      of these cases if branches are very cheap.  */
5593   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5594     return 0;
5595
5596   /* See what we need to return.  We can only return a 1, -1, or the
5597      sign bit.  */
5598
5599   if (normalizep == 0)
5600     {
5601       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5602         normalizep = STORE_FLAG_VALUE;
5603
5604       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5605         ;
5606       else
5607         return 0;
5608     }
5609
5610   last = get_last_insn ();
5611
5612   /* If optimizing, use different pseudo registers for each insn, instead
5613      of reusing the same pseudo.  This leads to better CSE, but slows
5614      down the compiler, since there are more pseudos */
5615   subtarget = (!optimize
5616                && (target_mode == mode)) ? target : NULL_RTX;
5617   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5618
5619   /* For floating-point comparisons, try the reverse comparison or try
5620      changing the "orderedness" of the comparison.  */
5621   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5622     {
5623       enum rtx_code first_code;
5624       bool and_them;
5625
5626       rcode = reverse_condition_maybe_unordered (code);
5627       if (can_compare_p (rcode, mode, ccp_store_flag)
5628           && (code == ORDERED || code == UNORDERED
5629               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5630               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5631         {
5632           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5633                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5634
5635           /* For the reverse comparison, use either an addition or a XOR.  */
5636           if (want_add
5637               && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5638                            optimize_insn_for_speed_p ()) == 0)
5639             {
5640               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5641                                        STORE_FLAG_VALUE, target_mode);
5642               if (tem)
5643                 return expand_binop (target_mode, add_optab, tem,
5644                                      gen_int_mode (normalizep, target_mode),
5645                                      target, 0, OPTAB_WIDEN);
5646             }
5647           else if (!want_add
5648                    && rtx_cost (trueval, mode, XOR, 1,
5649                                 optimize_insn_for_speed_p ()) == 0)
5650             {
5651               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5652                                        normalizep, target_mode);
5653               if (tem)
5654                 return expand_binop (target_mode, xor_optab, tem, trueval,
5655                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5656             }
5657         }
5658
5659       delete_insns_since (last);
5660
5661       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5662       if (code == ORDERED || code == UNORDERED)
5663         return 0;
5664
5665       and_them = split_comparison (code, mode, &first_code, &code);
5666
5667       /* If there are no NaNs, the first comparison should always fall through.
5668          Effectively change the comparison to the other one.  */
5669       if (!HONOR_NANS (mode))
5670         {
5671           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5672           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5673                                     target_mode);
5674         }
5675
5676       if (!HAVE_conditional_move)
5677         return 0;
5678
5679       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5680          conditional move.  */
5681       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5682                                normalizep, target_mode);
5683       if (tem == 0)
5684         return 0;
5685
5686       if (and_them)
5687         tem = emit_conditional_move (target, code, op0, op1, mode,
5688                                      tem, const0_rtx, GET_MODE (tem), 0);
5689       else
5690         tem = emit_conditional_move (target, code, op0, op1, mode,
5691                                      trueval, tem, GET_MODE (tem), 0);
5692
5693       if (tem == 0)
5694         delete_insns_since (last);
5695       return tem;
5696     }
5697
5698   /* The remaining tricks only apply to integer comparisons.  */
5699
5700   if (GET_MODE_CLASS (mode) != MODE_INT)
5701     return 0;
5702
5703   /* If this is an equality comparison of integers, we can try to exclusive-or
5704      (or subtract) the two operands and use a recursive call to try the
5705      comparison with zero.  Don't do any of these cases if branches are
5706      very cheap.  */
5707
5708   if ((code == EQ || code == NE) && op1 != const0_rtx)
5709     {
5710       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5711                           OPTAB_WIDEN);
5712
5713       if (tem == 0)
5714         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5715                             OPTAB_WIDEN);
5716       if (tem != 0)
5717         tem = emit_store_flag (target, code, tem, const0_rtx,
5718                                mode, unsignedp, normalizep);
5719       if (tem != 0)
5720         return tem;
5721
5722       delete_insns_since (last);
5723     }
5724
5725   /* For integer comparisons, try the reverse comparison.  However, for
5726      small X and if we'd have anyway to extend, implementing "X != 0"
5727      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5728   rcode = reverse_condition (code);
5729   if (can_compare_p (rcode, mode, ccp_store_flag)
5730       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5731             && code == NE
5732             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5733             && op1 == const0_rtx))
5734     {
5735       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5736                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5737
5738       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5739       if (want_add
5740           && rtx_cost (GEN_INT (normalizep), mode, PLUS, 1,
5741                        optimize_insn_for_speed_p ()) == 0)
5742         {
5743           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5744                                    STORE_FLAG_VALUE, target_mode);
5745           if (tem != 0)
5746             tem = expand_binop (target_mode, add_optab, tem,
5747                                 gen_int_mode (normalizep, target_mode),
5748                                 target, 0, OPTAB_WIDEN);
5749         }
5750       else if (!want_add
5751                && rtx_cost (trueval, mode, XOR, 1,
5752                             optimize_insn_for_speed_p ()) == 0)
5753         {
5754           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5755                                    normalizep, target_mode);
5756           if (tem != 0)
5757             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5758                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5759         }
5760
5761       if (tem != 0)
5762         return tem;
5763       delete_insns_since (last);
5764     }
5765
5766   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5767      the constant zero.  Reject all other comparisons at this point.  Only
5768      do LE and GT if branches are expensive since they are expensive on
5769      2-operand machines.  */
5770
5771   if (op1 != const0_rtx
5772       || (code != EQ && code != NE
5773           && (BRANCH_COST (optimize_insn_for_speed_p (),
5774                            false) <= 1 || (code != LE && code != GT))))
5775     return 0;
5776
5777   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5778      do the necessary operation below.  */
5779
5780   tem = 0;
5781
5782   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5783      the sign bit set.  */
5784
5785   if (code == LE)
5786     {
5787       /* This is destructive, so SUBTARGET can't be OP0.  */
5788       if (rtx_equal_p (subtarget, op0))
5789         subtarget = 0;
5790
5791       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5792                           OPTAB_WIDEN);
5793       if (tem)
5794         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5795                             OPTAB_WIDEN);
5796     }
5797
5798   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5799      number of bits in the mode of OP0, minus one.  */
5800
5801   if (code == GT)
5802     {
5803       if (rtx_equal_p (subtarget, op0))
5804         subtarget = 0;
5805
5806       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5807                           GET_MODE_BITSIZE (mode) - 1,
5808                           subtarget, 0);
5809       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5810                           OPTAB_WIDEN);
5811     }
5812
5813   if (code == EQ || code == NE)
5814     {
5815       /* For EQ or NE, one way to do the comparison is to apply an operation
5816          that converts the operand into a positive number if it is nonzero
5817          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5818          for NE we negate.  This puts the result in the sign bit.  Then we
5819          normalize with a shift, if needed.
5820
5821          Two operations that can do the above actions are ABS and FFS, so try
5822          them.  If that doesn't work, and MODE is smaller than a full word,
5823          we can use zero-extension to the wider mode (an unsigned conversion)
5824          as the operation.  */
5825
5826       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5827          that is compensated by the subsequent overflow when subtracting
5828          one / negating.  */
5829
5830       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5831         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5832       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5833         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5834       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5835         {
5836           tem = convert_modes (word_mode, mode, op0, 1);
5837           mode = word_mode;
5838         }
5839
5840       if (tem != 0)
5841         {
5842           if (code == EQ)
5843             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5844                                 0, OPTAB_WIDEN);
5845           else
5846             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5847         }
5848
5849       /* If we couldn't do it that way, for NE we can "or" the two's complement
5850          of the value with itself.  For EQ, we take the one's complement of
5851          that "or", which is an extra insn, so we only handle EQ if branches
5852          are expensive.  */
5853
5854       if (tem == 0
5855           && (code == NE
5856               || BRANCH_COST (optimize_insn_for_speed_p (),
5857                               false) > 1))
5858         {
5859           if (rtx_equal_p (subtarget, op0))
5860             subtarget = 0;
5861
5862           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5863           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5864                               OPTAB_WIDEN);
5865
5866           if (tem && code == EQ)
5867             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5868         }
5869     }
5870
5871   if (tem && normalizep)
5872     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5873                         GET_MODE_BITSIZE (mode) - 1,
5874                         subtarget, normalizep == 1);
5875
5876   if (tem)
5877     {
5878       if (!target)
5879         ;
5880       else if (GET_MODE (tem) != target_mode)
5881         {
5882           convert_move (target, tem, 0);
5883           tem = target;
5884         }
5885       else if (!subtarget)
5886         {
5887           emit_move_insn (target, tem);
5888           tem = target;
5889         }
5890     }
5891   else
5892     delete_insns_since (last);
5893
5894   return tem;
5895 }
5896
5897 /* Like emit_store_flag, but always succeeds.  */
5898
5899 rtx
5900 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5901                        machine_mode mode, int unsignedp, int normalizep)
5902 {
5903   rtx tem;
5904   rtx_code_label *label;
5905   rtx trueval, falseval;
5906
5907   /* First see if emit_store_flag can do the job.  */
5908   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5909   if (tem != 0)
5910     return tem;
5911
5912   if (!target)
5913     target = gen_reg_rtx (word_mode);
5914
5915   /* If this failed, we have to do this with set/compare/jump/set code.
5916      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5917   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5918   if (code == NE
5919       && GET_MODE_CLASS (mode) == MODE_INT
5920       && REG_P (target)
5921       && op0 == target
5922       && op1 == const0_rtx)
5923     {
5924       label = gen_label_rtx ();
5925       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp, mode,
5926                                NULL_RTX, NULL, label, -1);
5927       emit_move_insn (target, trueval);
5928       emit_label (label);
5929       return target;
5930     }
5931
5932   if (!REG_P (target)
5933       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5934     target = gen_reg_rtx (GET_MODE (target));
5935
5936   /* Jump in the right direction if the target cannot implement CODE
5937      but can jump on its reverse condition.  */
5938   falseval = const0_rtx;
5939   if (! can_compare_p (code, mode, ccp_jump)
5940       && (! FLOAT_MODE_P (mode)
5941           || code == ORDERED || code == UNORDERED
5942           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5943           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5944     {
5945       enum rtx_code rcode;
5946       if (FLOAT_MODE_P (mode))
5947         rcode = reverse_condition_maybe_unordered (code);
5948       else
5949         rcode = reverse_condition (code);
5950
5951       /* Canonicalize to UNORDERED for the libcall.  */
5952       if (can_compare_p (rcode, mode, ccp_jump)
5953           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5954         {
5955           falseval = trueval;
5956           trueval = const0_rtx;
5957           code = rcode;
5958         }
5959     }
5960
5961   emit_move_insn (target, trueval);
5962   label = gen_label_rtx ();
5963   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, NULL,
5964                            label, -1);
5965
5966   emit_move_insn (target, falseval);
5967   emit_label (label);
5968
5969   return target;
5970 }
5971 \f
5972 /* Perform possibly multi-word comparison and conditional jump to LABEL
5973    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5974    now a thin wrapper around do_compare_rtx_and_jump.  */
5975
5976 static void
5977 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, machine_mode mode,
5978                  rtx_code_label *label)
5979 {
5980   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5981   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode, NULL_RTX,
5982                            NULL, label, -1);
5983 }