gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
   5    Free Software Foundation, Inc.
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 3, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "toplev.h"
  29 #include "rtl.h"
  30 #include "tree.h"
  31 #include "tm_p.h"
  32 #include "flags.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "optabs.h"
  36 #include "real.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41
  42 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  43                                    unsigned HOST_WIDE_INT,
  44                                    unsigned HOST_WIDE_INT, rtx);
  45 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  46                                    unsigned HOST_WIDE_INT, rtx);
  47 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  48                                     unsigned HOST_WIDE_INT,
  49                                     unsigned HOST_WIDE_INT,
  50                                     unsigned HOST_WIDE_INT, rtx, int);
  51 static rtx mask_rtx (enum machine_mode, int, int, int);
  52 static rtx lshift_value (enum machine_mode, rtx, int, int);
  53 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  54                                     unsigned HOST_WIDE_INT, int);
  55 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  56 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  57 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  58
  59 /* Test whether a value is zero of a power of two.  */
  60 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  61
  62 /* Nonzero means divides or modulus operations are relatively cheap for
  63    powers of two, so don't use branches; emit the operation instead.
  64    Usually, this will mean that the MD file will emit non-branch
  65    sequences.  */
  66
  67 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
  68 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
  69
  70 #ifndef SLOW_UNALIGNED_ACCESS
  71 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  72 #endif
  73
  74 /* For compilers that support multiple targets with different word sizes,
  75    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  76    is the H8/300(H) compiler.  */
  77
  78 #ifndef MAX_BITS_PER_WORD
  79 #define MAX_BITS_PER_WORD BITS_PER_WORD
  80 #endif
  81
  82 /* Reduce conditional compilation elsewhere.  */
  83 #ifndef HAVE_insv
  84 #define HAVE_insv       0
  85 #define CODE_FOR_insv   CODE_FOR_nothing
  86 #define gen_insv(a,b,c,d) NULL_RTX
  87 #endif
  88 #ifndef HAVE_extv
  89 #define HAVE_extv       0
  90 #define CODE_FOR_extv   CODE_FOR_nothing
  91 #define gen_extv(a,b,c,d) NULL_RTX
  92 #endif
  93 #ifndef HAVE_extzv
  94 #define HAVE_extzv      0
  95 #define CODE_FOR_extzv  CODE_FOR_nothing
  96 #define gen_extzv(a,b,c,d) NULL_RTX
  97 #endif
  98
  99 /* Cost of various pieces of RTL.  Note that some of these are indexed by
 100    shift count and some by mode.  */
 101 static int zero_cost;
 102 static int add_cost[NUM_MACHINE_MODES];
 103 static int neg_cost[NUM_MACHINE_MODES];
 104 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 105 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 106 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
 107 static int mul_cost[NUM_MACHINE_MODES];
 108 static int sdiv_cost[NUM_MACHINE_MODES];
 109 static int udiv_cost[NUM_MACHINE_MODES];
 110 static int mul_widen_cost[NUM_MACHINE_MODES];
 111 static int mul_highpart_cost[NUM_MACHINE_MODES];
 112
 113 void
 114 init_expmed (void)
 115 {
 116   struct
 117   {
 118     struct rtx_def reg;         rtunion reg_fld[2];
 119     struct rtx_def plus;        rtunion plus_fld1;
 120     struct rtx_def neg;
 121     struct rtx_def mult;        rtunion mult_fld1;
 122     struct rtx_def sdiv;        rtunion sdiv_fld1;
 123     struct rtx_def udiv;        rtunion udiv_fld1;
 124     struct rtx_def zext;
 125     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 126     struct rtx_def smod_32;     rtunion smod_32_fld1;
 127     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 128     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 129     struct rtx_def wide_trunc;
 130     struct rtx_def shift;       rtunion shift_fld1;
 131     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 132     struct rtx_def shift_add;   rtunion shift_add_fld1;
 133     struct rtx_def shift_sub;   rtunion shift_sub_fld1;
 134   } all;
 135
 136   rtx pow2[MAX_BITS_PER_WORD];
 137   rtx cint[MAX_BITS_PER_WORD];
 138   int m, n;
 139   enum machine_mode mode, wider_mode;
 140
 141   zero_cost = rtx_cost (const0_rtx, 0);
 142
 143   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 144     {
 145       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 146       cint[m] = GEN_INT (m);
 147     }
 148
 149   memset (&all, 0, sizeof all);
 150
 151   PUT_CODE (&all.reg, REG);
 152   /* Avoid using hard regs in ways which may be unsupported.  */
 153   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 154
 155   PUT_CODE (&all.plus, PLUS);
 156   XEXP (&all.plus, 0) = &all.reg;
 157   XEXP (&all.plus, 1) = &all.reg;
 158
 159   PUT_CODE (&all.neg, NEG);
 160   XEXP (&all.neg, 0) = &all.reg;
 161
 162   PUT_CODE (&all.mult, MULT);
 163   XEXP (&all.mult, 0) = &all.reg;
 164   XEXP (&all.mult, 1) = &all.reg;
 165
 166   PUT_CODE (&all.sdiv, DIV);
 167   XEXP (&all.sdiv, 0) = &all.reg;
 168   XEXP (&all.sdiv, 1) = &all.reg;
 169
 170   PUT_CODE (&all.udiv, UDIV);
 171   XEXP (&all.udiv, 0) = &all.reg;
 172   XEXP (&all.udiv, 1) = &all.reg;
 173
 174   PUT_CODE (&all.sdiv_32, DIV);
 175   XEXP (&all.sdiv_32, 0) = &all.reg;
 176   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 177
 178   PUT_CODE (&all.smod_32, MOD);
 179   XEXP (&all.smod_32, 0) = &all.reg;
 180   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 181
 182   PUT_CODE (&all.zext, ZERO_EXTEND);
 183   XEXP (&all.zext, 0) = &all.reg;
 184
 185   PUT_CODE (&all.wide_mult, MULT);
 186   XEXP (&all.wide_mult, 0) = &all.zext;
 187   XEXP (&all.wide_mult, 1) = &all.zext;
 188
 189   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 190   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 191
 192   PUT_CODE (&all.wide_trunc, TRUNCATE);
 193   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 194
 195   PUT_CODE (&all.shift, ASHIFT);
 196   XEXP (&all.shift, 0) = &all.reg;
 197
 198   PUT_CODE (&all.shift_mult, MULT);
 199   XEXP (&all.shift_mult, 0) = &all.reg;
 200
 201   PUT_CODE (&all.shift_add, PLUS);
 202   XEXP (&all.shift_add, 0) = &all.shift_mult;
 203   XEXP (&all.shift_add, 1) = &all.reg;
 204
 205   PUT_CODE (&all.shift_sub, MINUS);
 206   XEXP (&all.shift_sub, 0) = &all.shift_mult;
 207   XEXP (&all.shift_sub, 1) = &all.reg;
 208
 209   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 210        mode != VOIDmode;
 211        mode = GET_MODE_WIDER_MODE (mode))
 212     {
 213       PUT_MODE (&all.reg, mode);
 214       PUT_MODE (&all.plus, mode);
 215       PUT_MODE (&all.neg, mode);
 216       PUT_MODE (&all.mult, mode);
 217       PUT_MODE (&all.sdiv, mode);
 218       PUT_MODE (&all.udiv, mode);
 219       PUT_MODE (&all.sdiv_32, mode);
 220       PUT_MODE (&all.smod_32, mode);
 221       PUT_MODE (&all.wide_trunc, mode);
 222       PUT_MODE (&all.shift, mode);
 223       PUT_MODE (&all.shift_mult, mode);
 224       PUT_MODE (&all.shift_add, mode);
 225       PUT_MODE (&all.shift_sub, mode);
 226
 227       add_cost[mode] = rtx_cost (&all.plus, SET);
 228       neg_cost[mode] = rtx_cost (&all.neg, SET);
 229       mul_cost[mode] = rtx_cost (&all.mult, SET);
 230       sdiv_cost[mode] = rtx_cost (&all.sdiv, SET);
 231       udiv_cost[mode] = rtx_cost (&all.udiv, SET);
 232
 233       sdiv_pow2_cheap[mode] = (rtx_cost (&all.sdiv_32, SET)
 234                                <= 2 * add_cost[mode]);
 235       smod_pow2_cheap[mode] = (rtx_cost (&all.smod_32, SET)
 236                                <= 4 * add_cost[mode]);
 237
 238       wider_mode = GET_MODE_WIDER_MODE (mode);
 239       if (wider_mode != VOIDmode)
 240         {
 241           PUT_MODE (&all.zext, wider_mode);
 242           PUT_MODE (&all.wide_mult, wider_mode);
 243           PUT_MODE (&all.wide_lshr, wider_mode);
 244           XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 245
 246           mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
 247           mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
 248         }
 249
 250       shift_cost[mode][0] = 0;
 251       shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
 252
 253       n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 254       for (m = 1; m < n; m++)
 255         {
 256           XEXP (&all.shift, 1) = cint[m];
 257           XEXP (&all.shift_mult, 1) = pow2[m];
 258
 259           shift_cost[mode][m] = rtx_cost (&all.shift, SET);
 260           shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
 261           shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
 262         }
 263     }
 264 }
 265
 266 /* Return an rtx representing minus the value of X.
 267    MODE is the intended mode of the result,
 268    useful if X is a CONST_INT.  */
 269
 270 rtx
 271 negate_rtx (enum machine_mode mode, rtx x)
 272 {
 273   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 274
 275   if (result == 0)
 276     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 277
 278   return result;
 279 }
 280
 281 /* Report on the availability of insv/extv/extzv and the desired mode
 282    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 283    is false; else the mode of the specified operand.  If OPNO is -1,
 284    all the caller cares about is whether the insn is available.  */
 285 enum machine_mode
 286 mode_for_extraction (enum extraction_pattern pattern, int opno)
 287 {
 288   const struct insn_data *data;
 289
 290   switch (pattern)
 291     {
 292     case EP_insv:
 293       if (HAVE_insv)
 294         {
 295           data = &insn_data[CODE_FOR_insv];
 296           break;
 297         }
 298       return MAX_MACHINE_MODE;
 299
 300     case EP_extv:
 301       if (HAVE_extv)
 302         {
 303           data = &insn_data[CODE_FOR_extv];
 304           break;
 305         }
 306       return MAX_MACHINE_MODE;
 307
 308     case EP_extzv:
 309       if (HAVE_extzv)
 310         {
 311           data = &insn_data[CODE_FOR_extzv];
 312           break;
 313         }
 314       return MAX_MACHINE_MODE;
 315
 316     default:
 317       gcc_unreachable ();
 318     }
 319
 320   if (opno == -1)
 321     return VOIDmode;
 322
 323   /* Everyone who uses this function used to follow it with
 324      if (result == VOIDmode) result = word_mode; */
 325   if (data->operand[opno].mode == VOIDmode)
 326     return word_mode;
 327   return data->operand[opno].mode;
 328 }
 329
 330 /* Return true if X, of mode MODE, matches the predicate for operand
 331    OPNO of instruction ICODE.  Allow volatile memories, regardless of
 332    the ambient volatile_ok setting.  */
 333
 334 static bool
 335 check_predicate_volatile_ok (enum insn_code icode, int opno,
 336                              rtx x, enum machine_mode mode)
 337 {
 338   bool save_volatile_ok, result;
 339
 340   save_volatile_ok = volatile_ok;
 341   result = insn_data[(int) icode].operand[opno].predicate (x, mode);
 342   volatile_ok = save_volatile_ok;
 343   return result;
 344 }
 345 \f
 346 /* A subroutine of store_bit_field, with the same arguments.  Return true
 347    if the operation could be implemented.
 348
 349    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 350    no other way of implementing the operation.  If FALLBACK_P is false,
 351    return false instead.  */
 352
 353 static bool
 354 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 355                    unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 356                    rtx value, bool fallback_p)
 357 {
 358   unsigned int unit
 359     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 360   unsigned HOST_WIDE_INT offset, bitpos;
 361   rtx op0 = str_rtx;
 362   int byte_offset;
 363   rtx orig_value;
 364
 365   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 366
 367   while (GET_CODE (op0) == SUBREG)
 368     {
 369       /* The following line once was done only if WORDS_BIG_ENDIAN,
 370          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 371          meaningful at a much higher level; when structures are copied
 372          between memory and regs, the higher-numbered regs
 373          always get higher addresses.  */
 374       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 375       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 376
 377       byte_offset = 0;
 378
 379       /* Paradoxical subregs need special handling on big endian machines.  */
 380       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 381         {
 382           int difference = inner_mode_size - outer_mode_size;
 383
 384           if (WORDS_BIG_ENDIAN)
 385             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 386           if (BYTES_BIG_ENDIAN)
 387             byte_offset += difference % UNITS_PER_WORD;
 388         }
 389       else
 390         byte_offset = SUBREG_BYTE (op0);
 391
 392       bitnum += byte_offset * BITS_PER_UNIT;
 393       op0 = SUBREG_REG (op0);
 394     }
 395
 396   /* No action is needed if the target is a register and if the field
 397      lies completely outside that register.  This can occur if the source
 398      code contains an out-of-bounds access to a small array.  */
 399   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 400     return true;
 401
 402   /* Use vec_set patterns for inserting parts of vectors whenever
 403      available.  */
 404   if (VECTOR_MODE_P (GET_MODE (op0))
 405       && !MEM_P (op0)
 406       && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code
 407           != CODE_FOR_nothing)
 408       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 409       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 410       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 411     {
 412       enum machine_mode outermode = GET_MODE (op0);
 413       enum machine_mode innermode = GET_MODE_INNER (outermode);
 414       int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code;
 415       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 416       rtx rtxpos = GEN_INT (pos);
 417       rtx src = value;
 418       rtx dest = op0;
 419       rtx pat, seq;
 420       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 421       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 422       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 423
 424       start_sequence ();
 425
 426       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 427         src = copy_to_mode_reg (mode1, src);
 428
 429       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 430         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 431
 432       /* We could handle this, but we should always be called with a pseudo
 433          for our targets and all insns should take them as outputs.  */
 434       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 435                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 436                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 437       pat = GEN_FCN (icode) (dest, src, rtxpos);
 438       seq = get_insns ();
 439       end_sequence ();
 440       if (pat)
 441         {
 442           emit_insn (seq);
 443           emit_insn (pat);
 444           return true;
 445         }
 446     }
 447
 448   /* If the target is a register, overwriting the entire object, or storing
 449      a full-word or multi-word field can be done with just a SUBREG.
 450
 451      If the target is memory, storing any naturally aligned field can be
 452      done with a simple store.  For targets that support fast unaligned
 453      memory, any naturally sized, unit aligned field can be done directly.  */
 454
 455   offset = bitnum / unit;
 456   bitpos = bitnum % unit;
 457   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 458                 + (offset * UNITS_PER_WORD);
 459
 460   if (bitpos == 0
 461       && bitsize == GET_MODE_BITSIZE (fieldmode)
 462       && (!MEM_P (op0)
 463           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 464              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 465              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 466           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 467              || (offset * BITS_PER_UNIT % bitsize == 0
 468                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 469     {
 470       if (MEM_P (op0))
 471         op0 = adjust_address (op0, fieldmode, offset);
 472       else if (GET_MODE (op0) != fieldmode)
 473         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 474                                    byte_offset);
 475       emit_move_insn (op0, value);
 476       return true;
 477     }
 478
 479   /* Make sure we are playing with integral modes.  Pun with subregs
 480      if we aren't.  This must come after the entire register case above,
 481      since that case is valid for any mode.  The following cases are only
 482      valid for integral modes.  */
 483   {
 484     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 485     if (imode != GET_MODE (op0))
 486       {
 487         if (MEM_P (op0))
 488           op0 = adjust_address (op0, imode, 0);
 489         else
 490           {
 491             gcc_assert (imode != BLKmode);
 492             op0 = gen_lowpart (imode, op0);
 493           }
 494       }
 495   }
 496
 497   /* We may be accessing data outside the field, which means
 498      we can alias adjacent data.  */
 499   if (MEM_P (op0))
 500     {
 501       op0 = shallow_copy_rtx (op0);
 502       set_mem_alias_set (op0, 0);
 503       set_mem_expr (op0, 0);
 504     }
 505
 506   /* If OP0 is a register, BITPOS must count within a word.
 507      But as we have it, it counts within whatever size OP0 now has.
 508      On a bigendian machine, these are not the same, so convert.  */
 509   if (BYTES_BIG_ENDIAN
 510       && !MEM_P (op0)
 511       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 512     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 513
 514   /* Storing an lsb-aligned field in a register
 515      can be done with a movestrict instruction.  */
 516
 517   if (!MEM_P (op0)
 518       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 519       && bitsize == GET_MODE_BITSIZE (fieldmode)
 520       && (optab_handler (movstrict_optab, fieldmode)->insn_code
 521           != CODE_FOR_nothing))
 522     {
 523       int icode = optab_handler (movstrict_optab, fieldmode)->insn_code;
 524       rtx insn;
 525       rtx start = get_last_insn ();
 526
 527       /* Get appropriate low part of the value being stored.  */
 528       if (GET_CODE (value) == CONST_INT || REG_P (value))
 529         value = gen_lowpart (fieldmode, value);
 530       else if (!(GET_CODE (value) == SYMBOL_REF
 531                  || GET_CODE (value) == LABEL_REF
 532                  || GET_CODE (value) == CONST))
 533         value = convert_to_mode (fieldmode, value, 0);
 534
 535       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 536         value = copy_to_mode_reg (fieldmode, value);
 537
 538       if (GET_CODE (op0) == SUBREG)
 539         {
 540           /* Else we've got some float mode source being extracted into
 541              a different float mode destination -- this combination of
 542              subregs results in Severe Tire Damage.  */
 543           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 544                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 545                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 546           op0 = SUBREG_REG (op0);
 547         }
 548
 549       insn = (GEN_FCN (icode)
 550                  (gen_rtx_SUBREG (fieldmode, op0,
 551                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 552                                   + (offset * UNITS_PER_WORD)),
 553                                   value));
 554       if (insn)
 555         {
 556           emit_insn (insn);
 557           return true;
 558         }
 559       delete_insns_since (start);
 560     }
 561
 562   /* Handle fields bigger than a word.  */
 563
 564   if (bitsize > BITS_PER_WORD)
 565     {
 566       /* Here we transfer the words of the field
 567          in the order least significant first.
 568          This is because the most significant word is the one which may
 569          be less than full.
 570          However, only do that if the value is not BLKmode.  */
 571
 572       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 573       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 574       unsigned int i;
 575       rtx last;
 576
 577       /* This is the mode we must force value to, so that there will be enough
 578          subwords to extract.  Note that fieldmode will often (always?) be
 579          VOIDmode, because that is what store_field uses to indicate that this
 580          is a bit field, but passing VOIDmode to operand_subword_force
 581          is not allowed.  */
 582       fieldmode = GET_MODE (value);
 583       if (fieldmode == VOIDmode)
 584         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 585
 586       last = get_last_insn ();
 587       for (i = 0; i < nwords; i++)
 588         {
 589           /* If I is 0, use the low-order word in both field and target;
 590              if I is 1, use the next to lowest word; and so on.  */
 591           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 592           unsigned int bit_offset = (backwards
 593                                      ? MAX ((int) bitsize - ((int) i + 1)
 594                                             * BITS_PER_WORD,
 595                                             0)
 596                                      : (int) i * BITS_PER_WORD);
 597           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 598
 599           if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
 600                                             bitsize - i * BITS_PER_WORD),
 601                                   bitnum + bit_offset, word_mode,
 602                                   value_word, fallback_p))
 603             {
 604               delete_insns_since (last);
 605               return false;
 606             }
 607         }
 608       return true;
 609     }
 610
 611   /* From here on we can assume that the field to be stored in is
 612      a full-word (whatever type that is), since it is shorter than a word.  */
 613
 614   /* OFFSET is the number of words or bytes (UNIT says which)
 615      from STR_RTX to the first word or byte containing part of the field.  */
 616
 617   if (!MEM_P (op0))
 618     {
 619       if (offset != 0
 620           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 621         {
 622           if (!REG_P (op0))
 623             {
 624               /* Since this is a destination (lvalue), we can't copy
 625                  it to a pseudo.  We can remove a SUBREG that does not
 626                  change the size of the operand.  Such a SUBREG may
 627                  have been added above.  */
 628               gcc_assert (GET_CODE (op0) == SUBREG
 629                           && (GET_MODE_SIZE (GET_MODE (op0))
 630                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 631               op0 = SUBREG_REG (op0);
 632             }
 633           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 634                                 op0, (offset * UNITS_PER_WORD));
 635         }
 636       offset = 0;
 637     }
 638
 639   /* If VALUE has a floating-point or complex mode, access it as an
 640      integer of the corresponding size.  This can occur on a machine
 641      with 64 bit registers that uses SFmode for float.  It can also
 642      occur for unaligned float or complex fields.  */
 643   orig_value = value;
 644   if (GET_MODE (value) != VOIDmode
 645       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 646       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 647     {
 648       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 649       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 650     }
 651
 652   /* Now OFFSET is nonzero only if OP0 is memory
 653      and is therefore always measured in bytes.  */
 654
 655   if (HAVE_insv
 656       && GET_MODE (value) != BLKmode
 657       && bitsize > 0
 658       && GET_MODE_BITSIZE (op_mode) >= bitsize
 659       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 660             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 661       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
 662                                                         VOIDmode)
 663       && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
 664     {
 665       int xbitpos = bitpos;
 666       rtx value1;
 667       rtx xop0 = op0;
 668       rtx last = get_last_insn ();
 669       rtx pat;
 670
 671       /* Add OFFSET into OP0's address.  */
 672       if (MEM_P (xop0))
 673         xop0 = adjust_address (xop0, byte_mode, offset);
 674
 675       /* If xop0 is a register, we need it in OP_MODE
 676          to make it acceptable to the format of insv.  */
 677       if (GET_CODE (xop0) == SUBREG)
 678         /* We can't just change the mode, because this might clobber op0,
 679            and we will need the original value of op0 if insv fails.  */
 680         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 681       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 682         xop0 = gen_rtx_SUBREG (op_mode, xop0, 0);
 683
 684       /* On big-endian machines, we count bits from the most significant.
 685          If the bit field insn does not, we must invert.  */
 686
 687       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 688         xbitpos = unit - bitsize - xbitpos;
 689
 690       /* We have been counting XBITPOS within UNIT.
 691          Count instead within the size of the register.  */
 692       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 693         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 694
 695       unit = GET_MODE_BITSIZE (op_mode);
 696
 697       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 698       value1 = value;
 699       if (GET_MODE (value) != op_mode)
 700         {
 701           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 702             {
 703               /* Optimization: Don't bother really extending VALUE
 704                  if it has all the bits we will actually use.  However,
 705                  if we must narrow it, be sure we do it correctly.  */
 706
 707               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 708                 {
 709                   rtx tmp;
 710
 711                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 712                   if (! tmp)
 713                     tmp = simplify_gen_subreg (op_mode,
 714                                                force_reg (GET_MODE (value),
 715                                                           value1),
 716                                                GET_MODE (value), 0);
 717                   value1 = tmp;
 718                 }
 719               else
 720                 value1 = gen_lowpart (op_mode, value1);
 721             }
 722           else if (GET_CODE (value) == CONST_INT)
 723             value1 = gen_int_mode (INTVAL (value), op_mode);
 724           else
 725             /* Parse phase is supposed to make VALUE's data type
 726                match that of the component reference, which is a type
 727                at least as wide as the field; so VALUE should have
 728                a mode that corresponds to that type.  */
 729             gcc_assert (CONSTANT_P (value));
 730         }
 731
 732       /* If this machine's insv insists on a register,
 733          get VALUE1 into a register.  */
 734       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 735              (value1, op_mode)))
 736         value1 = force_reg (op_mode, value1);
 737
 738       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 739       if (pat)
 740         {
 741           emit_insn (pat);
 742           return true;
 743         }
 744       delete_insns_since (last);
 745     }
 746
 747   /* If OP0 is a memory, try copying it to a register and seeing if a
 748      cheap register alternative is available.  */
 749   if (HAVE_insv && MEM_P (op0))
 750     {
 751       enum machine_mode bestmode;
 752
 753       /* Get the mode to use for inserting into this field.  If OP0 is
 754          BLKmode, get the smallest mode consistent with the alignment. If
 755          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 756          mode. Otherwise, use the smallest mode containing the field.  */
 757
 758       if (GET_MODE (op0) == BLKmode
 759           || (op_mode != MAX_MACHINE_MODE
 760               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 761         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
 762                                   (op_mode == MAX_MACHINE_MODE
 763                                    ? VOIDmode : op_mode),
 764                                   MEM_VOLATILE_P (op0));
 765       else
 766         bestmode = GET_MODE (op0);
 767
 768       if (bestmode != VOIDmode
 769           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 770           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 771                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 772         {
 773           rtx last, tempreg, xop0;
 774           unsigned HOST_WIDE_INT xoffset, xbitpos;
 775
 776           last = get_last_insn ();
 777
 778           /* Adjust address to point to the containing unit of
 779              that mode.  Compute the offset as a multiple of this unit,
 780              counting in bytes.  */
 781           unit = GET_MODE_BITSIZE (bestmode);
 782           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 783           xbitpos = bitnum % unit;
 784           xop0 = adjust_address (op0, bestmode, xoffset);
 785
 786           /* Fetch that unit, store the bitfield in it, then store
 787              the unit.  */
 788           tempreg = copy_to_reg (xop0);
 789           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 790                                  fieldmode, orig_value, false))
 791             {
 792               emit_move_insn (xop0, tempreg);
 793               return true;
 794             }
 795           delete_insns_since (last);
 796         }
 797     }
 798
 799   if (!fallback_p)
 800     return false;
 801
 802   store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 803   return true;
 804 }
 805
 806 /* Generate code to store value from rtx VALUE
 807    into a bit-field within structure STR_RTX
 808    containing BITSIZE bits starting at bit BITNUM.
 809    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 810
 811 void
 812 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 813                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 814                  rtx value)
 815 {
 816   if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
 817     gcc_unreachable ();
 818 }
 819 \f
 820 /* Use shifts and boolean operations to store VALUE
 821    into a bit field of width BITSIZE
 822    in a memory location specified by OP0 except offset by OFFSET bytes.
 823      (OFFSET must be 0 if OP0 is a register.)
 824    The field starts at position BITPOS within the byte.
 825     (If OP0 is a register, it may be a full word or a narrower mode,
 826      but BITPOS still counts within a full word,
 827      which is significant on bigendian machines.)  */
 828
 829 static void
 830 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 831                        unsigned HOST_WIDE_INT bitsize,
 832                        unsigned HOST_WIDE_INT bitpos, rtx value)
 833 {
 834   enum machine_mode mode;
 835   unsigned int total_bits = BITS_PER_WORD;
 836   rtx temp;
 837   int all_zero = 0;
 838   int all_one = 0;
 839
 840   /* There is a case not handled here:
 841      a structure with a known alignment of just a halfword
 842      and a field split across two aligned halfwords within the structure.
 843      Or likewise a structure with a known alignment of just a byte
 844      and a field split across two bytes.
 845      Such cases are not supposed to be able to occur.  */
 846
 847   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 848     {
 849       gcc_assert (!offset);
 850       /* Special treatment for a bit field split across two registers.  */
 851       if (bitsize + bitpos > BITS_PER_WORD)
 852         {
 853           store_split_bit_field (op0, bitsize, bitpos, value);
 854           return;
 855         }
 856     }
 857   else
 858     {
 859       /* Get the proper mode to use for this field.  We want a mode that
 860          includes the entire field.  If such a mode would be larger than
 861          a word, we won't be doing the extraction the normal way.
 862          We don't want a mode bigger than the destination.  */
 863
 864       mode = GET_MODE (op0);
 865       if (GET_MODE_BITSIZE (mode) == 0
 866           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 867         mode = word_mode;
 868       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 869                             MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 870
 871       if (mode == VOIDmode)
 872         {
 873           /* The only way this should occur is if the field spans word
 874              boundaries.  */
 875           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 876                                  value);
 877           return;
 878         }
 879
 880       total_bits = GET_MODE_BITSIZE (mode);
 881
 882       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 883          be in the range 0 to total_bits-1, and put any excess bytes in
 884          OFFSET.  */
 885       if (bitpos >= total_bits)
 886         {
 887           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 888           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 889                      * BITS_PER_UNIT);
 890         }
 891
 892       /* Get ref to an aligned byte, halfword, or word containing the field.
 893          Adjust BITPOS to be position within a word,
 894          and OFFSET to be the offset of that word.
 895          Then alter OP0 to refer to that word.  */
 896       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 897       offset -= (offset % (total_bits / BITS_PER_UNIT));
 898       op0 = adjust_address (op0, mode, offset);
 899     }
 900
 901   mode = GET_MODE (op0);
 902
 903   /* Now MODE is either some integral mode for a MEM as OP0,
 904      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 905      The bit field is contained entirely within OP0.
 906      BITPOS is the starting bit number within OP0.
 907      (OP0's mode may actually be narrower than MODE.)  */
 908
 909   if (BYTES_BIG_ENDIAN)
 910       /* BITPOS is the distance between our msb
 911          and that of the containing datum.
 912          Convert it to the distance from the lsb.  */
 913       bitpos = total_bits - bitsize - bitpos;
 914
 915   /* Now BITPOS is always the distance between our lsb
 916      and that of OP0.  */
 917
 918   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 919      we must first convert its mode to MODE.  */
 920
 921   if (GET_CODE (value) == CONST_INT)
 922     {
 923       HOST_WIDE_INT v = INTVAL (value);
 924
 925       if (bitsize < HOST_BITS_PER_WIDE_INT)
 926         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 927
 928       if (v == 0)
 929         all_zero = 1;
 930       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 931                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 932                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 933         all_one = 1;
 934
 935       value = lshift_value (mode, value, bitpos, bitsize);
 936     }
 937   else
 938     {
 939       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 940                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 941
 942       if (GET_MODE (value) != mode)
 943         {
 944           if ((REG_P (value) || GET_CODE (value) == SUBREG)
 945               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 946             value = gen_lowpart (mode, value);
 947           else
 948             value = convert_to_mode (mode, value, 1);
 949         }
 950
 951       if (must_and)
 952         value = expand_binop (mode, and_optab, value,
 953                               mask_rtx (mode, 0, bitsize, 0),
 954                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 955       if (bitpos > 0)
 956         value = expand_shift (LSHIFT_EXPR, mode, value,
 957                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 958     }
 959
 960   /* Now clear the chosen bits in OP0,
 961      except that if VALUE is -1 we need not bother.  */
 962   /* We keep the intermediates in registers to allow CSE to combine
 963      consecutive bitfield assignments.  */
 964
 965   temp = force_reg (mode, op0);
 966
 967   if (! all_one)
 968     {
 969       temp = expand_binop (mode, and_optab, temp,
 970                            mask_rtx (mode, bitpos, bitsize, 1),
 971                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 972       temp = force_reg (mode, temp);
 973     }
 974
 975   /* Now logical-or VALUE into OP0, unless it is zero.  */
 976
 977   if (! all_zero)
 978     {
 979       temp = expand_binop (mode, ior_optab, temp, value,
 980                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 981       temp = force_reg (mode, temp);
 982     }
 983
 984   if (op0 != temp)
 985     {
 986       op0 = copy_rtx (op0);
 987       emit_move_insn (op0, temp);
 988     }
 989 }
 990 \f
 991 /* Store a bit field that is split across multiple accessible memory objects.
 992
 993    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 994    BITSIZE is the field width; BITPOS the position of its first bit
 995    (within the word).
 996    VALUE is the value to store.
 997
 998    This does not yet handle fields wider than BITS_PER_WORD.  */
 999
1000 static void
1001 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1002                        unsigned HOST_WIDE_INT bitpos, rtx value)
1003 {
1004   unsigned int unit;
1005   unsigned int bitsdone = 0;
1006
1007   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1008      much at a time.  */
1009   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1010     unit = BITS_PER_WORD;
1011   else
1012     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1013
1014   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1015      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1016      that VALUE might be a floating-point constant.  */
1017   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
1018     {
1019       rtx word = gen_lowpart_common (word_mode, value);
1020
1021       if (word && (value != word))
1022         value = word;
1023       else
1024         value = gen_lowpart_common (word_mode,
1025                                     force_reg (GET_MODE (value) != VOIDmode
1026                                                ? GET_MODE (value)
1027                                                : word_mode, value));
1028     }
1029
1030   while (bitsdone < bitsize)
1031     {
1032       unsigned HOST_WIDE_INT thissize;
1033       rtx part, word;
1034       unsigned HOST_WIDE_INT thispos;
1035       unsigned HOST_WIDE_INT offset;
1036
1037       offset = (bitpos + bitsdone) / unit;
1038       thispos = (bitpos + bitsdone) % unit;
1039
1040       /* THISSIZE must not overrun a word boundary.  Otherwise,
1041          store_fixed_bit_field will call us again, and we will mutually
1042          recurse forever.  */
1043       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1044       thissize = MIN (thissize, unit - thispos);
1045
1046       if (BYTES_BIG_ENDIAN)
1047         {
1048           int total_bits;
1049
1050           /* We must do an endian conversion exactly the same way as it is
1051              done in extract_bit_field, so that the two calls to
1052              extract_fixed_bit_field will have comparable arguments.  */
1053           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1054             total_bits = BITS_PER_WORD;
1055           else
1056             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1057
1058           /* Fetch successively less significant portions.  */
1059           if (GET_CODE (value) == CONST_INT)
1060             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1061                              >> (bitsize - bitsdone - thissize))
1062                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1063           else
1064             /* The args are chosen so that the last part includes the
1065                lsb.  Give extract_bit_field the value it needs (with
1066                endianness compensation) to fetch the piece we want.  */
1067             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1068                                             total_bits - bitsize + bitsdone,
1069                                             NULL_RTX, 1);
1070         }
1071       else
1072         {
1073           /* Fetch successively more significant portions.  */
1074           if (GET_CODE (value) == CONST_INT)
1075             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1076                              >> bitsdone)
1077                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1078           else
1079             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1080                                             bitsdone, NULL_RTX, 1);
1081         }
1082
1083       /* If OP0 is a register, then handle OFFSET here.
1084
1085          When handling multiword bitfields, extract_bit_field may pass
1086          down a word_mode SUBREG of a larger REG for a bitfield that actually
1087          crosses a word boundary.  Thus, for a SUBREG, we must find
1088          the current word starting from the base register.  */
1089       if (GET_CODE (op0) == SUBREG)
1090         {
1091           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1092           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1093                                         GET_MODE (SUBREG_REG (op0)));
1094           offset = 0;
1095         }
1096       else if (REG_P (op0))
1097         {
1098           word = operand_subword_force (op0, offset, GET_MODE (op0));
1099           offset = 0;
1100         }
1101       else
1102         word = op0;
1103
1104       /* OFFSET is in UNITs, and UNIT is in bits.
1105          store_fixed_bit_field wants offset in bytes.  */
1106       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1107                              thispos, part);
1108       bitsdone += thissize;
1109     }
1110 }
1111 \f
1112 /* A subroutine of extract_bit_field_1 that converts return value X
1113    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1114    to extract_bit_field.  */
1115
1116 static rtx
1117 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1118                              enum machine_mode tmode, bool unsignedp)
1119 {
1120   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1121     return x;
1122
1123   /* If the x mode is not a scalar integral, first convert to the
1124      integer mode of that size and then access it as a floating-point
1125      value via a SUBREG.  */
1126   if (!SCALAR_INT_MODE_P (tmode))
1127     {
1128       enum machine_mode smode;
1129
1130       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1131       x = convert_to_mode (smode, x, unsignedp);
1132       x = force_reg (smode, x);
1133       return gen_lowpart (tmode, x);
1134     }
1135
1136   return convert_to_mode (tmode, x, unsignedp);
1137 }
1138
1139 /* A subroutine of extract_bit_field, with the same arguments.
1140    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1141    if we can find no other means of implementing the operation.
1142    if FALLBACK_P is false, return NULL instead.  */
1143
1144 static rtx
1145 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1146                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1147                      enum machine_mode mode, enum machine_mode tmode,
1148                      bool fallback_p)
1149 {
1150   unsigned int unit
1151     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1152   unsigned HOST_WIDE_INT offset, bitpos;
1153   rtx op0 = str_rtx;
1154   enum machine_mode int_mode;
1155   enum machine_mode ext_mode;
1156   enum machine_mode mode1;
1157   enum insn_code icode;
1158   int byte_offset;
1159
1160   if (tmode == VOIDmode)
1161     tmode = mode;
1162
1163   while (GET_CODE (op0) == SUBREG)
1164     {
1165       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1166       op0 = SUBREG_REG (op0);
1167     }
1168
1169   /* If we have an out-of-bounds access to a register, just return an
1170      uninitialized register of the required mode.  This can occur if the
1171      source code contains an out-of-bounds access to a small array.  */
1172   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1173     return gen_reg_rtx (tmode);
1174
1175   if (REG_P (op0)
1176       && mode == GET_MODE (op0)
1177       && bitnum == 0
1178       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1179     {
1180       /* We're trying to extract a full register from itself.  */
1181       return op0;
1182     }
1183
1184   /* See if we can get a better vector mode before extracting.  */
1185   if (VECTOR_MODE_P (GET_MODE (op0))
1186       && !MEM_P (op0)
1187       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1188     {
1189       enum machine_mode new_mode;
1190       int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1191
1192       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1193         new_mode = MIN_MODE_VECTOR_FLOAT;
1194       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1195         new_mode = MIN_MODE_VECTOR_FRACT;
1196       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1197         new_mode = MIN_MODE_VECTOR_UFRACT;
1198       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1199         new_mode = MIN_MODE_VECTOR_ACCUM;
1200       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1201         new_mode = MIN_MODE_VECTOR_UACCUM;
1202       else
1203         new_mode = MIN_MODE_VECTOR_INT;
1204
1205       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1206         if (GET_MODE_NUNITS (new_mode) == nunits
1207             && GET_MODE_INNER (new_mode) == tmode
1208             && targetm.vector_mode_supported_p (new_mode))
1209           break;
1210       if (new_mode != VOIDmode)
1211         op0 = gen_lowpart (new_mode, op0);
1212     }
1213
1214   /* Use vec_extract patterns for extracting parts of vectors whenever
1215      available.  */
1216   if (VECTOR_MODE_P (GET_MODE (op0))
1217       && !MEM_P (op0)
1218       && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code
1219           != CODE_FOR_nothing)
1220       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1221           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1222     {
1223       enum machine_mode outermode = GET_MODE (op0);
1224       enum machine_mode innermode = GET_MODE_INNER (outermode);
1225       int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code;
1226       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1227       rtx rtxpos = GEN_INT (pos);
1228       rtx src = op0;
1229       rtx dest = NULL, pat, seq;
1230       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1231       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1232       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1233
1234       if (innermode == tmode || innermode == mode)
1235         dest = target;
1236
1237       if (!dest)
1238         dest = gen_reg_rtx (innermode);
1239
1240       start_sequence ();
1241
1242       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1243         dest = copy_to_mode_reg (mode0, dest);
1244
1245       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1246         src = copy_to_mode_reg (mode1, src);
1247
1248       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1249         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1250
1251       /* We could handle this, but we should always be called with a pseudo
1252          for our targets and all insns should take them as outputs.  */
1253       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1254                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1255                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1256
1257       pat = GEN_FCN (icode) (dest, src, rtxpos);
1258       seq = get_insns ();
1259       end_sequence ();
1260       if (pat)
1261         {
1262           emit_insn (seq);
1263           emit_insn (pat);
1264           if (mode0 != mode)
1265             return gen_lowpart (tmode, dest);
1266           return dest;
1267         }
1268     }
1269
1270   /* Make sure we are playing with integral modes.  Pun with subregs
1271      if we aren't.  */
1272   {
1273     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1274     if (imode != GET_MODE (op0))
1275       {
1276         if (MEM_P (op0))
1277           op0 = adjust_address (op0, imode, 0);
1278         else
1279           {
1280             gcc_assert (imode != BLKmode);
1281             op0 = gen_lowpart (imode, op0);
1282
1283             /* If we got a SUBREG, force it into a register since we
1284                aren't going to be able to do another SUBREG on it.  */
1285             if (GET_CODE (op0) == SUBREG)
1286               op0 = force_reg (imode, op0);
1287           }
1288       }
1289   }
1290
1291   /* We may be accessing data outside the field, which means
1292      we can alias adjacent data.  */
1293   if (MEM_P (op0))
1294     {
1295       op0 = shallow_copy_rtx (op0);
1296       set_mem_alias_set (op0, 0);
1297       set_mem_expr (op0, 0);
1298     }
1299
1300   /* Extraction of a full-word or multi-word value from a structure
1301      in a register or aligned memory can be done with just a SUBREG.
1302      A subword value in the least significant part of a register
1303      can also be extracted with a SUBREG.  For this, we need the
1304      byte offset of the value in op0.  */
1305
1306   bitpos = bitnum % unit;
1307   offset = bitnum / unit;
1308   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1309
1310   /* If OP0 is a register, BITPOS must count within a word.
1311      But as we have it, it counts within whatever size OP0 now has.
1312      On a bigendian machine, these are not the same, so convert.  */
1313   if (BYTES_BIG_ENDIAN
1314       && !MEM_P (op0)
1315       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1316     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1317
1318   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1319      If that's wrong, the solution is to test for it and set TARGET to 0
1320      if needed.  */
1321
1322   /* Only scalar integer modes can be converted via subregs.  There is an
1323      additional problem for FP modes here in that they can have a precision
1324      which is different from the size.  mode_for_size uses precision, but
1325      we want a mode based on the size, so we must avoid calling it for FP
1326      modes.  */
1327   mode1  = (SCALAR_INT_MODE_P (tmode)
1328             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1329             : mode);
1330
1331   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1332         && bitpos % BITS_PER_WORD == 0)
1333        || (mode1 != BLKmode
1334            /* ??? The big endian test here is wrong.  This is correct
1335               if the value is in a register, and if mode_for_size is not
1336               the same mode as op0.  This causes us to get unnecessarily
1337               inefficient code from the Thumb port when -mbig-endian.  */
1338            && (BYTES_BIG_ENDIAN
1339                ? bitpos + bitsize == BITS_PER_WORD
1340                : bitpos == 0)))
1341       && ((!MEM_P (op0)
1342            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1343                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1344            && GET_MODE_SIZE (mode1) != 0
1345            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1346           || (MEM_P (op0)
1347               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1348                   || (offset * BITS_PER_UNIT % bitsize == 0
1349                       && MEM_ALIGN (op0) % bitsize == 0)))))
1350     {
1351       if (MEM_P (op0))
1352         op0 = adjust_address (op0, mode1, offset);
1353       else if (mode1 != GET_MODE (op0))
1354         {
1355           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1356                                          byte_offset);
1357           if (sub == NULL)
1358             goto no_subreg_mode_swap;
1359           op0 = sub;
1360         }
1361       if (mode1 != mode)
1362         return convert_to_mode (tmode, op0, unsignedp);
1363       return op0;
1364     }
1365  no_subreg_mode_swap:
1366
1367   /* Handle fields bigger than a word.  */
1368
1369   if (bitsize > BITS_PER_WORD)
1370     {
1371       /* Here we transfer the words of the field
1372          in the order least significant first.
1373          This is because the most significant word is the one which may
1374          be less than full.  */
1375
1376       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1377       unsigned int i;
1378
1379       if (target == 0 || !REG_P (target))
1380         target = gen_reg_rtx (mode);
1381
1382       /* Indicate for flow that the entire target reg is being set.  */
1383       emit_clobber (target);
1384
1385       for (i = 0; i < nwords; i++)
1386         {
1387           /* If I is 0, use the low-order word in both field and target;
1388              if I is 1, use the next to lowest word; and so on.  */
1389           /* Word number in TARGET to use.  */
1390           unsigned int wordnum
1391             = (WORDS_BIG_ENDIAN
1392                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1393                : i);
1394           /* Offset from start of field in OP0.  */
1395           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1396                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1397                                                 * (int) BITS_PER_WORD))
1398                                      : (int) i * BITS_PER_WORD);
1399           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1400           rtx result_part
1401             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1402                                            bitsize - i * BITS_PER_WORD),
1403                                  bitnum + bit_offset, 1, target_part, mode,
1404                                  word_mode);
1405
1406           gcc_assert (target_part);
1407
1408           if (result_part != target_part)
1409             emit_move_insn (target_part, result_part);
1410         }
1411
1412       if (unsignedp)
1413         {
1414           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1415              need to be zero'd out.  */
1416           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1417             {
1418               unsigned int i, total_words;
1419
1420               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1421               for (i = nwords; i < total_words; i++)
1422                 emit_move_insn
1423                   (operand_subword (target,
1424                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1425                                     1, VOIDmode),
1426                    const0_rtx);
1427             }
1428           return target;
1429         }
1430
1431       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1432       target = expand_shift (LSHIFT_EXPR, mode, target,
1433                              build_int_cst (NULL_TREE,
1434                                             GET_MODE_BITSIZE (mode) - bitsize),
1435                              NULL_RTX, 0);
1436       return expand_shift (RSHIFT_EXPR, mode, target,
1437                            build_int_cst (NULL_TREE,
1438                                           GET_MODE_BITSIZE (mode) - bitsize),
1439                            NULL_RTX, 0);
1440     }
1441
1442   /* From here on we know the desired field is smaller than a word.  */
1443
1444   /* Check if there is a correspondingly-sized integer field, so we can
1445      safely extract it as one size of integer, if necessary; then
1446      truncate or extend to the size that is wanted; then use SUBREGs or
1447      convert_to_mode to get one of the modes we really wanted.  */
1448
1449   int_mode = int_mode_for_mode (tmode);
1450   if (int_mode == BLKmode)
1451     int_mode = int_mode_for_mode (mode);
1452   /* Should probably push op0 out to memory and then do a load.  */
1453   gcc_assert (int_mode != BLKmode);
1454
1455   /* OFFSET is the number of words or bytes (UNIT says which)
1456      from STR_RTX to the first word or byte containing part of the field.  */
1457   if (!MEM_P (op0))
1458     {
1459       if (offset != 0
1460           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1461         {
1462           if (!REG_P (op0))
1463             op0 = copy_to_reg (op0);
1464           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1465                                 op0, (offset * UNITS_PER_WORD));
1466         }
1467       offset = 0;
1468     }
1469
1470   /* Now OFFSET is nonzero only for memory operands.  */
1471   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1472   icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1473   if (ext_mode != MAX_MACHINE_MODE
1474       && bitsize > 0
1475       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1476       /* If op0 is a register, we need it in EXT_MODE to make it
1477          acceptable to the format of ext(z)v.  */
1478       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1479       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1480            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1481       && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1482     {
1483       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1484       rtx bitsize_rtx, bitpos_rtx;
1485       rtx last = get_last_insn ();
1486       rtx xop0 = op0;
1487       rtx xtarget = target;
1488       rtx xspec_target = target;
1489       rtx xspec_target_subreg = 0;
1490       rtx pat;
1491
1492       /* If op0 is a register, we need it in EXT_MODE to make it
1493          acceptable to the format of ext(z)v.  */
1494       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1495         xop0 = gen_rtx_SUBREG (ext_mode, xop0, 0);
1496       if (MEM_P (xop0))
1497         /* Get ref to first byte containing part of the field.  */
1498         xop0 = adjust_address (xop0, byte_mode, xoffset);
1499
1500       /* On big-endian machines, we count bits from the most significant.
1501          If the bit field insn does not, we must invert.  */
1502       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1503         xbitpos = unit - bitsize - xbitpos;
1504
1505       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1506       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1507         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1508
1509       unit = GET_MODE_BITSIZE (ext_mode);
1510
1511       if (xtarget == 0)
1512         xtarget = xspec_target = gen_reg_rtx (tmode);
1513
1514       if (GET_MODE (xtarget) != ext_mode)
1515         {
1516           if (REG_P (xtarget))
1517             {
1518               xtarget = gen_lowpart (ext_mode, xtarget);
1519               if (GET_MODE_SIZE (ext_mode)
1520                   > GET_MODE_SIZE (GET_MODE (xspec_target)))
1521                 xspec_target_subreg = xtarget;
1522             }
1523           else
1524             xtarget = gen_reg_rtx (ext_mode);
1525         }
1526
1527       /* If this machine's ext(z)v insists on a register target,
1528          make sure we have one.  */
1529       if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1530         xtarget = gen_reg_rtx (ext_mode);
1531
1532       bitsize_rtx = GEN_INT (bitsize);
1533       bitpos_rtx = GEN_INT (xbitpos);
1534
1535       pat = (unsignedp
1536              ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1537              : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1538       if (pat)
1539         {
1540           emit_insn (pat);
1541           if (xtarget == xspec_target)
1542             return xtarget;
1543           if (xtarget == xspec_target_subreg)
1544             return xspec_target;
1545           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1546         }
1547       delete_insns_since (last);
1548     }
1549
1550   /* If OP0 is a memory, try copying it to a register and seeing if a
1551      cheap register alternative is available.  */
1552   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1553     {
1554       enum machine_mode bestmode;
1555
1556       /* Get the mode to use for inserting into this field.  If
1557          OP0 is BLKmode, get the smallest mode consistent with the
1558          alignment. If OP0 is a non-BLKmode object that is no
1559          wider than EXT_MODE, use its mode. Otherwise, use the
1560          smallest mode containing the field.  */
1561
1562       if (GET_MODE (op0) == BLKmode
1563           || (ext_mode != MAX_MACHINE_MODE
1564               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1565         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1566                                   (ext_mode == MAX_MACHINE_MODE
1567                                    ? VOIDmode : ext_mode),
1568                                   MEM_VOLATILE_P (op0));
1569       else
1570         bestmode = GET_MODE (op0);
1571
1572       if (bestmode != VOIDmode
1573           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1574                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1575         {
1576           unsigned HOST_WIDE_INT xoffset, xbitpos;
1577
1578           /* Compute the offset as a multiple of this unit,
1579              counting in bytes.  */
1580           unit = GET_MODE_BITSIZE (bestmode);
1581           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1582           xbitpos = bitnum % unit;
1583
1584           /* Make sure the register is big enough for the whole field.  */
1585           if (xoffset * BITS_PER_UNIT + unit
1586               >= offset * BITS_PER_UNIT + bitsize)
1587             {
1588               rtx last, result, xop0;
1589
1590               last = get_last_insn ();
1591
1592               /* Fetch it to a register in that size.  */
1593               xop0 = adjust_address (op0, bestmode, xoffset);
1594               xop0 = force_reg (bestmode, xop0);
1595               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1596                                             unsignedp, target,
1597                                             mode, tmode, false);
1598               if (result)
1599                 return result;
1600
1601               delete_insns_since (last);
1602             }
1603         }
1604     }
1605
1606   if (!fallback_p)
1607     return NULL;
1608
1609   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1610                                     bitpos, target, unsignedp);
1611   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1612 }
1613
1614 /* Generate code to extract a byte-field from STR_RTX
1615    containing BITSIZE bits, starting at BITNUM,
1616    and put it in TARGET if possible (if TARGET is nonzero).
1617    Regardless of TARGET, we return the rtx for where the value is placed.
1618
1619    STR_RTX is the structure containing the byte (a REG or MEM).
1620    UNSIGNEDP is nonzero if this is an unsigned bit field.
1621    MODE is the natural mode of the field value once extracted.
1622    TMODE is the mode the caller would like the value to have;
1623    but the value may be returned with type MODE instead.
1624
1625    If a TARGET is specified and we can store in it at no extra cost,
1626    we do so, and return TARGET.
1627    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1628    if they are equally easy.  */
1629
1630 rtx
1631 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1632                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1633                    enum machine_mode mode, enum machine_mode tmode)
1634 {
1635   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1636                               target, mode, tmode, true);
1637 }
1638 \f
1639 /* Extract a bit field using shifts and boolean operations
1640    Returns an rtx to represent the value.
1641    OP0 addresses a register (word) or memory (byte).
1642    BITPOS says which bit within the word or byte the bit field starts in.
1643    OFFSET says how many bytes farther the bit field starts;
1644     it is 0 if OP0 is a register.
1645    BITSIZE says how many bits long the bit field is.
1646     (If OP0 is a register, it may be narrower than a full word,
1647      but BITPOS still counts within a full word,
1648      which is significant on bigendian machines.)
1649
1650    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1651    If TARGET is nonzero, attempts to store the value there
1652    and return TARGET, but this is not guaranteed.
1653    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1654
1655 static rtx
1656 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1657                          unsigned HOST_WIDE_INT offset,
1658                          unsigned HOST_WIDE_INT bitsize,
1659                          unsigned HOST_WIDE_INT bitpos, rtx target,
1660                          int unsignedp)
1661 {
1662   unsigned int total_bits = BITS_PER_WORD;
1663   enum machine_mode mode;
1664
1665   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1666     {
1667       /* Special treatment for a bit field split across two registers.  */
1668       if (bitsize + bitpos > BITS_PER_WORD)
1669         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1670     }
1671   else
1672     {
1673       /* Get the proper mode to use for this field.  We want a mode that
1674          includes the entire field.  If such a mode would be larger than
1675          a word, we won't be doing the extraction the normal way.  */
1676
1677       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1678                             MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1679
1680       if (mode == VOIDmode)
1681         /* The only way this should occur is if the field spans word
1682            boundaries.  */
1683         return extract_split_bit_field (op0, bitsize,
1684                                         bitpos + offset * BITS_PER_UNIT,
1685                                         unsignedp);
1686
1687       total_bits = GET_MODE_BITSIZE (mode);
1688
1689       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1690          be in the range 0 to total_bits-1, and put any excess bytes in
1691          OFFSET.  */
1692       if (bitpos >= total_bits)
1693         {
1694           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1695           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1696                      * BITS_PER_UNIT);
1697         }
1698
1699       /* Get ref to an aligned byte, halfword, or word containing the field.
1700          Adjust BITPOS to be position within a word,
1701          and OFFSET to be the offset of that word.
1702          Then alter OP0 to refer to that word.  */
1703       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1704       offset -= (offset % (total_bits / BITS_PER_UNIT));
1705       op0 = adjust_address (op0, mode, offset);
1706     }
1707
1708   mode = GET_MODE (op0);
1709
1710   if (BYTES_BIG_ENDIAN)
1711     /* BITPOS is the distance between our msb and that of OP0.
1712        Convert it to the distance from the lsb.  */
1713     bitpos = total_bits - bitsize - bitpos;
1714
1715   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1716      We have reduced the big-endian case to the little-endian case.  */
1717
1718   if (unsignedp)
1719     {
1720       if (bitpos)
1721         {
1722           /* If the field does not already start at the lsb,
1723              shift it so it does.  */
1724           tree amount = build_int_cst (NULL_TREE, bitpos);
1725           /* Maybe propagate the target for the shift.  */
1726           /* But not if we will return it--could confuse integrate.c.  */
1727           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1728           if (tmode != mode) subtarget = 0;
1729           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1730         }
1731       /* Convert the value to the desired mode.  */
1732       if (mode != tmode)
1733         op0 = convert_to_mode (tmode, op0, 1);
1734
1735       /* Unless the msb of the field used to be the msb when we shifted,
1736          mask out the upper bits.  */
1737
1738       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1739         return expand_binop (GET_MODE (op0), and_optab, op0,
1740                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1741                              target, 1, OPTAB_LIB_WIDEN);
1742       return op0;
1743     }
1744
1745   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1746      then arithmetic-shift its lsb to the lsb of the word.  */
1747   op0 = force_reg (mode, op0);
1748   if (mode != tmode)
1749     target = 0;
1750
1751   /* Find the narrowest integer mode that contains the field.  */
1752
1753   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1754        mode = GET_MODE_WIDER_MODE (mode))
1755     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1756       {
1757         op0 = convert_to_mode (mode, op0, 0);
1758         break;
1759       }
1760
1761   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1762     {
1763       tree amount
1764         = build_int_cst (NULL_TREE,
1765                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1766       /* Maybe propagate the target for the shift.  */
1767       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1768       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1769     }
1770
1771   return expand_shift (RSHIFT_EXPR, mode, op0,
1772                        build_int_cst (NULL_TREE,
1773                                       GET_MODE_BITSIZE (mode) - bitsize),
1774                        target, 0);
1775 }
1776 \f
1777 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1778    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1779    complement of that if COMPLEMENT.  The mask is truncated if
1780    necessary to the width of mode MODE.  The mask is zero-extended if
1781    BITSIZE+BITPOS is too small for MODE.  */
1782
1783 static rtx
1784 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1785 {
1786   HOST_WIDE_INT masklow, maskhigh;
1787
1788   if (bitsize == 0)
1789     masklow = 0;
1790   else if (bitpos < HOST_BITS_PER_WIDE_INT)
1791     masklow = (HOST_WIDE_INT) -1 << bitpos;
1792   else
1793     masklow = 0;
1794
1795   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1796     masklow &= ((unsigned HOST_WIDE_INT) -1
1797                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1798
1799   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1800     maskhigh = -1;
1801   else
1802     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1803
1804   if (bitsize == 0)
1805     maskhigh = 0;
1806   else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1807     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1808                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1809   else
1810     maskhigh = 0;
1811
1812   if (complement)
1813     {
1814       maskhigh = ~maskhigh;
1815       masklow = ~masklow;
1816     }
1817
1818   return immed_double_const (masklow, maskhigh, mode);
1819 }
1820
1821 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1822    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1823
1824 static rtx
1825 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1826 {
1827   unsigned HOST_WIDE_INT v = INTVAL (value);
1828   HOST_WIDE_INT low, high;
1829
1830   if (bitsize < HOST_BITS_PER_WIDE_INT)
1831     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1832
1833   if (bitpos < HOST_BITS_PER_WIDE_INT)
1834     {
1835       low = v << bitpos;
1836       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1837     }
1838   else
1839     {
1840       low = 0;
1841       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1842     }
1843
1844   return immed_double_const (low, high, mode);
1845 }
1846 \f
1847 /* Extract a bit field that is split across two words
1848    and return an RTX for the result.
1849
1850    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1851    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1852    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1853
1854 static rtx
1855 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1856                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1857 {
1858   unsigned int unit;
1859   unsigned int bitsdone = 0;
1860   rtx result = NULL_RTX;
1861   int first = 1;
1862
1863   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1864      much at a time.  */
1865   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1866     unit = BITS_PER_WORD;
1867   else
1868     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1869
1870   while (bitsdone < bitsize)
1871     {
1872       unsigned HOST_WIDE_INT thissize;
1873       rtx part, word;
1874       unsigned HOST_WIDE_INT thispos;
1875       unsigned HOST_WIDE_INT offset;
1876
1877       offset = (bitpos + bitsdone) / unit;
1878       thispos = (bitpos + bitsdone) % unit;
1879
1880       /* THISSIZE must not overrun a word boundary.  Otherwise,
1881          extract_fixed_bit_field will call us again, and we will mutually
1882          recurse forever.  */
1883       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1884       thissize = MIN (thissize, unit - thispos);
1885
1886       /* If OP0 is a register, then handle OFFSET here.
1887
1888          When handling multiword bitfields, extract_bit_field may pass
1889          down a word_mode SUBREG of a larger REG for a bitfield that actually
1890          crosses a word boundary.  Thus, for a SUBREG, we must find
1891          the current word starting from the base register.  */
1892       if (GET_CODE (op0) == SUBREG)
1893         {
1894           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1895           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1896                                         GET_MODE (SUBREG_REG (op0)));
1897           offset = 0;
1898         }
1899       else if (REG_P (op0))
1900         {
1901           word = operand_subword_force (op0, offset, GET_MODE (op0));
1902           offset = 0;
1903         }
1904       else
1905         word = op0;
1906
1907       /* Extract the parts in bit-counting order,
1908          whose meaning is determined by BYTES_PER_UNIT.
1909          OFFSET is in UNITs, and UNIT is in bits.
1910          extract_fixed_bit_field wants offset in bytes.  */
1911       part = extract_fixed_bit_field (word_mode, word,
1912                                       offset * unit / BITS_PER_UNIT,
1913                                       thissize, thispos, 0, 1);
1914       bitsdone += thissize;
1915
1916       /* Shift this part into place for the result.  */
1917       if (BYTES_BIG_ENDIAN)
1918         {
1919           if (bitsize != bitsdone)
1920             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1921                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
1922                                  0, 1);
1923         }
1924       else
1925         {
1926           if (bitsdone != thissize)
1927             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1928                                  build_int_cst (NULL_TREE,
1929                                                 bitsdone - thissize), 0, 1);
1930         }
1931
1932       if (first)
1933         result = part;
1934       else
1935         /* Combine the parts with bitwise or.  This works
1936            because we extracted each part as an unsigned bit field.  */
1937         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1938                                OPTAB_LIB_WIDEN);
1939
1940       first = 0;
1941     }
1942
1943   /* Unsigned bit field: we are done.  */
1944   if (unsignedp)
1945     return result;
1946   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1947   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1948                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1949                          NULL_RTX, 0);
1950   return expand_shift (RSHIFT_EXPR, word_mode, result,
1951                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
1952                        NULL_RTX, 0);
1953 }
1954 \f
1955 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1956    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1957    MODE, fill the upper bits with zeros.  Fail if the layout of either
1958    mode is unknown (as for CC modes) or if the extraction would involve
1959    unprofitable mode punning.  Return the value on success, otherwise
1960    return null.
1961
1962    This is different from gen_lowpart* in these respects:
1963
1964      - the returned value must always be considered an rvalue
1965
1966      - when MODE is wider than SRC_MODE, the extraction involves
1967        a zero extension
1968
1969      - when MODE is smaller than SRC_MODE, the extraction involves
1970        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
1971
1972    In other words, this routine performs a computation, whereas the
1973    gen_lowpart* routines are conceptually lvalue or rvalue subreg
1974    operations.  */
1975
1976 rtx
1977 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
1978 {
1979   enum machine_mode int_mode, src_int_mode;
1980
1981   if (mode == src_mode)
1982     return src;
1983
1984   if (CONSTANT_P (src))
1985     return simplify_gen_subreg (mode, src, src_mode,
1986                                 subreg_lowpart_offset (mode, src_mode));
1987
1988   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
1989     return NULL_RTX;
1990
1991   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
1992       && MODES_TIEABLE_P (mode, src_mode))
1993     {
1994       rtx x = gen_lowpart_common (mode, src);
1995       if (x)
1996         return x;
1997     }
1998
1999   src_int_mode = int_mode_for_mode (src_mode);
2000   int_mode = int_mode_for_mode (mode);
2001   if (src_int_mode == BLKmode || int_mode == BLKmode)
2002     return NULL_RTX;
2003
2004   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2005     return NULL_RTX;
2006   if (!MODES_TIEABLE_P (int_mode, mode))
2007     return NULL_RTX;
2008
2009   src = gen_lowpart (src_int_mode, src);
2010   src = convert_modes (int_mode, src_int_mode, src, true);
2011   src = gen_lowpart (mode, src);
2012   return src;
2013 }
2014 \f
2015 /* Add INC into TARGET.  */
2016
2017 void
2018 expand_inc (rtx target, rtx inc)
2019 {
2020   rtx value = expand_binop (GET_MODE (target), add_optab,
2021                             target, inc,
2022                             target, 0, OPTAB_LIB_WIDEN);
2023   if (value != target)
2024     emit_move_insn (target, value);
2025 }
2026
2027 /* Subtract DEC from TARGET.  */
2028
2029 void
2030 expand_dec (rtx target, rtx dec)
2031 {
2032   rtx value = expand_binop (GET_MODE (target), sub_optab,
2033                             target, dec,
2034                             target, 0, OPTAB_LIB_WIDEN);
2035   if (value != target)
2036     emit_move_insn (target, value);
2037 }
2038 \f
2039 /* Output a shift instruction for expression code CODE,
2040    with SHIFTED being the rtx for the value to shift,
2041    and AMOUNT the tree for the amount to shift by.
2042    Store the result in the rtx TARGET, if that is convenient.
2043    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2044    Return the rtx for where the value is.  */
2045
2046 rtx
2047 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2048               tree amount, rtx target, int unsignedp)
2049 {
2050   rtx op1, temp = 0;
2051   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2052   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2053   optab lshift_optab = ashl_optab;
2054   optab rshift_arith_optab = ashr_optab;
2055   optab rshift_uns_optab = lshr_optab;
2056   optab lrotate_optab = rotl_optab;
2057   optab rrotate_optab = rotr_optab;
2058   enum machine_mode op1_mode;
2059   int attempt;
2060
2061   op1 = expand_normal (amount);
2062   op1_mode = GET_MODE (op1);
2063
2064   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2065      shift amount is a vector, use the vector/vector shift patterns.  */
2066   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2067     {
2068       lshift_optab = vashl_optab;
2069       rshift_arith_optab = vashr_optab;
2070       rshift_uns_optab = vlshr_optab;
2071       lrotate_optab = vrotl_optab;
2072       rrotate_optab = vrotr_optab;
2073     }
2074
2075   /* Previously detected shift-counts computed by NEGATE_EXPR
2076      and shifted in the other direction; but that does not work
2077      on all machines.  */
2078
2079   if (SHIFT_COUNT_TRUNCATED)
2080     {
2081       if (GET_CODE (op1) == CONST_INT
2082           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2083               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2084         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2085                        % GET_MODE_BITSIZE (mode));
2086       else if (GET_CODE (op1) == SUBREG
2087                && subreg_lowpart_p (op1))
2088         op1 = SUBREG_REG (op1);
2089     }
2090
2091   if (op1 == const0_rtx)
2092     return shifted;
2093
2094   /* Check whether its cheaper to implement a left shift by a constant
2095      bit count by a sequence of additions.  */
2096   if (code == LSHIFT_EXPR
2097       && GET_CODE (op1) == CONST_INT
2098       && INTVAL (op1) > 0
2099       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2100       && INTVAL (op1) < MAX_BITS_PER_WORD
2101       && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode]
2102       && shift_cost[mode][INTVAL (op1)] != MAX_COST)
2103     {
2104       int i;
2105       for (i = 0; i < INTVAL (op1); i++)
2106         {
2107           temp = force_reg (mode, shifted);
2108           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2109                                   unsignedp, OPTAB_LIB_WIDEN);
2110         }
2111       return shifted;
2112     }
2113
2114   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2115     {
2116       enum optab_methods methods;
2117
2118       if (attempt == 0)
2119         methods = OPTAB_DIRECT;
2120       else if (attempt == 1)
2121         methods = OPTAB_WIDEN;
2122       else
2123         methods = OPTAB_LIB_WIDEN;
2124
2125       if (rotate)
2126         {
2127           /* Widening does not work for rotation.  */
2128           if (methods == OPTAB_WIDEN)
2129             continue;
2130           else if (methods == OPTAB_LIB_WIDEN)
2131             {
2132               /* If we have been unable to open-code this by a rotation,
2133                  do it as the IOR of two shifts.  I.e., to rotate A
2134                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2135                  where C is the bitsize of A.
2136
2137                  It is theoretically possible that the target machine might
2138                  not be able to perform either shift and hence we would
2139                  be making two libcalls rather than just the one for the
2140                  shift (similarly if IOR could not be done).  We will allow
2141                  this extremely unlikely lossage to avoid complicating the
2142                  code below.  */
2143
2144               rtx subtarget = target == shifted ? 0 : target;
2145               tree new_amount, other_amount;
2146               rtx temp1;
2147               tree type = TREE_TYPE (amount);
2148               if (GET_MODE (op1) != TYPE_MODE (type)
2149                   && GET_MODE (op1) != VOIDmode)
2150                 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2151               new_amount = make_tree (type, op1);
2152               other_amount
2153                 = fold_build2 (MINUS_EXPR, type,
2154                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2155                                new_amount);
2156
2157               shifted = force_reg (mode, shifted);
2158
2159               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2160                                    mode, shifted, new_amount, 0, 1);
2161               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2162                                     mode, shifted, other_amount, subtarget, 1);
2163               return expand_binop (mode, ior_optab, temp, temp1, target,
2164                                    unsignedp, methods);
2165             }
2166
2167           temp = expand_binop (mode,
2168                                left ? lrotate_optab : rrotate_optab,
2169                                shifted, op1, target, unsignedp, methods);
2170         }
2171       else if (unsignedp)
2172         temp = expand_binop (mode,
2173                              left ? lshift_optab : rshift_uns_optab,
2174                              shifted, op1, target, unsignedp, methods);
2175
2176       /* Do arithmetic shifts.
2177          Also, if we are going to widen the operand, we can just as well
2178          use an arithmetic right-shift instead of a logical one.  */
2179       if (temp == 0 && ! rotate
2180           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2181         {
2182           enum optab_methods methods1 = methods;
2183
2184           /* If trying to widen a log shift to an arithmetic shift,
2185              don't accept an arithmetic shift of the same size.  */
2186           if (unsignedp)
2187             methods1 = OPTAB_MUST_WIDEN;
2188
2189           /* Arithmetic shift */
2190
2191           temp = expand_binop (mode,
2192                                left ? lshift_optab : rshift_arith_optab,
2193                                shifted, op1, target, unsignedp, methods1);
2194         }
2195
2196       /* We used to try extzv here for logical right shifts, but that was
2197          only useful for one machine, the VAX, and caused poor code
2198          generation there for lshrdi3, so the code was deleted and a
2199          define_expand for lshrsi3 was added to vax.md.  */
2200     }
2201
2202   gcc_assert (temp);
2203   return temp;
2204 }
2205 \f
2206 enum alg_code {
2207   alg_unknown,
2208   alg_zero,
2209   alg_m, alg_shift,
2210   alg_add_t_m2,
2211   alg_sub_t_m2,
2212   alg_add_factor,
2213   alg_sub_factor,
2214   alg_add_t2_m,
2215   alg_sub_t2_m,
2216   alg_impossible
2217 };
2218
2219 /* This structure holds the "cost" of a multiply sequence.  The
2220    "cost" field holds the total rtx_cost of every operator in the
2221    synthetic multiplication sequence, hence cost(a op b) is defined
2222    as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2223    The "latency" field holds the minimum possible latency of the
2224    synthetic multiply, on a hypothetical infinitely parallel CPU.
2225    This is the critical path, or the maximum height, of the expression
2226    tree which is the sum of rtx_costs on the most expensive path from
2227    any leaf to the root.  Hence latency(a op b) is defined as zero for
2228    leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise.  */
2229
2230 struct mult_cost {
2231   short cost;     /* Total rtx_cost of the multiplication sequence.  */
2232   short latency;  /* The latency of the multiplication sequence.  */
2233 };
2234
2235 /* This macro is used to compare a pointer to a mult_cost against an
2236    single integer "rtx_cost" value.  This is equivalent to the macro
2237    CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}.  */
2238 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y)    \
2239                              || ((X)->cost == (Y) && (X)->latency < (Y)))
2240
2241 /* This macro is used to compare two pointers to mult_costs against
2242    each other.  The macro returns true if X is cheaper than Y.
2243    Currently, the cheaper of two mult_costs is the one with the
2244    lower "cost".  If "cost"s are tied, the lower latency is cheaper.  */
2245 #define CHEAPER_MULT_COST(X,Y)  ((X)->cost < (Y)->cost          \
2246                                  || ((X)->cost == (Y)->cost     \
2247                                      && (X)->latency < (Y)->latency))
2248
2249 /* This structure records a sequence of operations.
2250    `ops' is the number of operations recorded.
2251    `cost' is their total cost.
2252    The operations are stored in `op' and the corresponding
2253    logarithms of the integer coefficients in `log'.
2254
2255    These are the operations:
2256    alg_zero             total := 0;
2257    alg_m                total := multiplicand;
2258    alg_shift            total := total * coeff
2259    alg_add_t_m2         total := total + multiplicand * coeff;
2260    alg_sub_t_m2         total := total - multiplicand * coeff;
2261    alg_add_factor       total := total * coeff + total;
2262    alg_sub_factor       total := total * coeff - total;
2263    alg_add_t2_m         total := total * coeff + multiplicand;
2264    alg_sub_t2_m         total := total * coeff - multiplicand;
2265
2266    The first operand must be either alg_zero or alg_m.  */
2267
2268 struct algorithm
2269 {
2270   struct mult_cost cost;
2271   short ops;
2272   /* The size of the OP and LOG fields are not directly related to the
2273      word size, but the worst-case algorithms will be if we have few
2274      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2275      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2276      in total wordsize operations.  */
2277   enum alg_code op[MAX_BITS_PER_WORD];
2278   char log[MAX_BITS_PER_WORD];
2279 };
2280
2281 /* The entry for our multiplication cache/hash table.  */
2282 struct alg_hash_entry {
2283   /* The number we are multiplying by.  */
2284   unsigned HOST_WIDE_INT t;
2285
2286   /* The mode in which we are multiplying something by T.  */
2287   enum machine_mode mode;
2288
2289   /* The best multiplication algorithm for t.  */
2290   enum alg_code alg;
2291
2292   /* The cost of multiplication if ALG_CODE is not alg_impossible.
2293      Otherwise, the cost within which multiplication by T is
2294      impossible.  */
2295   struct mult_cost cost;
2296 };
2297
2298 /* The number of cache/hash entries.  */
2299 #if HOST_BITS_PER_WIDE_INT == 64
2300 #define NUM_ALG_HASH_ENTRIES 1031
2301 #else
2302 #define NUM_ALG_HASH_ENTRIES 307
2303 #endif
2304
2305 /* Each entry of ALG_HASH caches alg_code for some integer.  This is
2306    actually a hash table.  If we have a collision, that the older
2307    entry is kicked out.  */
2308 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2309
2310 /* Indicates the type of fixup needed after a constant multiplication.
2311    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2312    the result should be negated, and ADD_VARIANT means that the
2313    multiplicand should be added to the result.  */
2314 enum mult_variant {basic_variant, negate_variant, add_variant};
2315
2316 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2317                         const struct mult_cost *, enum machine_mode mode);
2318 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2319                                  struct algorithm *, enum mult_variant *, int);
2320 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2321                               const struct algorithm *, enum mult_variant);
2322 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2323                                                  int, rtx *, int *, int *);
2324 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2325 static rtx extract_high_half (enum machine_mode, rtx);
2326 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2327 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2328                                        int, int);
2329 /* Compute and return the best algorithm for multiplying by T.
2330    The algorithm must cost less than cost_limit
2331    If retval.cost >= COST_LIMIT, no algorithm was found and all
2332    other field of the returned struct are undefined.
2333    MODE is the machine mode of the multiplication.  */
2334
2335 static void
2336 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2337             const struct mult_cost *cost_limit, enum machine_mode mode)
2338 {
2339   int m;
2340   struct algorithm *alg_in, *best_alg;
2341   struct mult_cost best_cost;
2342   struct mult_cost new_limit;
2343   int op_cost, op_latency;
2344   unsigned HOST_WIDE_INT q;
2345   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2346   int hash_index;
2347   bool cache_hit = false;
2348   enum alg_code cache_alg = alg_zero;
2349
2350   /* Indicate that no algorithm is yet found.  If no algorithm
2351      is found, this value will be returned and indicate failure.  */
2352   alg_out->cost.cost = cost_limit->cost + 1;
2353   alg_out->cost.latency = cost_limit->latency + 1;
2354
2355   if (cost_limit->cost < 0
2356       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2357     return;
2358
2359   /* Restrict the bits of "t" to the multiplication's mode.  */
2360   t &= GET_MODE_MASK (mode);
2361
2362   /* t == 1 can be done in zero cost.  */
2363   if (t == 1)
2364     {
2365       alg_out->ops = 1;
2366       alg_out->cost.cost = 0;
2367       alg_out->cost.latency = 0;
2368       alg_out->op[0] = alg_m;
2369       return;
2370     }
2371
2372   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2373      fail now.  */
2374   if (t == 0)
2375     {
2376       if (MULT_COST_LESS (cost_limit, zero_cost))
2377         return;
2378       else
2379         {
2380           alg_out->ops = 1;
2381           alg_out->cost.cost = zero_cost;
2382           alg_out->cost.latency = zero_cost;
2383           alg_out->op[0] = alg_zero;
2384           return;
2385         }
2386     }
2387
2388   /* We'll be needing a couple extra algorithm structures now.  */
2389
2390   alg_in = XALLOCA (struct algorithm);
2391   best_alg = XALLOCA (struct algorithm);
2392   best_cost = *cost_limit;
2393
2394   /* Compute the hash index.  */
2395   hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2396
2397   /* See if we already know what to do for T.  */
2398   if (alg_hash[hash_index].t == t
2399       && alg_hash[hash_index].mode == mode
2400       && alg_hash[hash_index].alg != alg_unknown)
2401     {
2402       cache_alg = alg_hash[hash_index].alg;
2403
2404       if (cache_alg == alg_impossible)
2405         {
2406           /* The cache tells us that it's impossible to synthesize
2407              multiplication by T within alg_hash[hash_index].cost.  */
2408           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2409             /* COST_LIMIT is at least as restrictive as the one
2410                recorded in the hash table, in which case we have no
2411                hope of synthesizing a multiplication.  Just
2412                return.  */
2413             return;
2414
2415           /* If we get here, COST_LIMIT is less restrictive than the
2416              one recorded in the hash table, so we may be able to
2417              synthesize a multiplication.  Proceed as if we didn't
2418              have the cache entry.  */
2419         }
2420       else
2421         {
2422           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2423             /* The cached algorithm shows that this multiplication
2424                requires more cost than COST_LIMIT.  Just return.  This
2425                way, we don't clobber this cache entry with
2426                alg_impossible but retain useful information.  */
2427             return;
2428
2429           cache_hit = true;
2430
2431           switch (cache_alg)
2432             {
2433             case alg_shift:
2434               goto do_alg_shift;
2435
2436             case alg_add_t_m2:
2437             case alg_sub_t_m2:
2438               goto do_alg_addsub_t_m2;
2439
2440             case alg_add_factor:
2441             case alg_sub_factor:
2442               goto do_alg_addsub_factor;
2443
2444             case alg_add_t2_m:
2445               goto do_alg_add_t2_m;
2446
2447             case alg_sub_t2_m:
2448               goto do_alg_sub_t2_m;
2449
2450             default:
2451               gcc_unreachable ();
2452             }
2453         }
2454     }
2455
2456   /* If we have a group of zero bits at the low-order part of T, try
2457      multiplying by the remaining bits and then doing a shift.  */
2458
2459   if ((t & 1) == 0)
2460     {
2461     do_alg_shift:
2462       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2463       if (m < maxm)
2464         {
2465           q = t >> m;
2466           /* The function expand_shift will choose between a shift and
2467              a sequence of additions, so the observed cost is given as
2468              MIN (m * add_cost[mode], shift_cost[mode][m]).  */
2469           op_cost = m * add_cost[mode];
2470           if (shift_cost[mode][m] < op_cost)
2471             op_cost = shift_cost[mode][m];
2472           new_limit.cost = best_cost.cost - op_cost;
2473           new_limit.latency = best_cost.latency - op_cost;
2474           synth_mult (alg_in, q, &new_limit, mode);
2475
2476           alg_in->cost.cost += op_cost;
2477           alg_in->cost.latency += op_cost;
2478           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2479             {
2480               struct algorithm *x;
2481               best_cost = alg_in->cost;
2482               x = alg_in, alg_in = best_alg, best_alg = x;
2483               best_alg->log[best_alg->ops] = m;
2484               best_alg->op[best_alg->ops] = alg_shift;
2485             }
2486         }
2487       if (cache_hit)
2488         goto done;
2489     }
2490
2491   /* If we have an odd number, add or subtract one.  */
2492   if ((t & 1) != 0)
2493     {
2494       unsigned HOST_WIDE_INT w;
2495
2496     do_alg_addsub_t_m2:
2497       for (w = 1; (w & t) != 0; w <<= 1)
2498         ;
2499       /* If T was -1, then W will be zero after the loop.  This is another
2500          case where T ends with ...111.  Handling this with (T + 1) and
2501          subtract 1 produces slightly better code and results in algorithm
2502          selection much faster than treating it like the ...0111 case
2503          below.  */
2504       if (w == 0
2505           || (w > 2
2506               /* Reject the case where t is 3.
2507                  Thus we prefer addition in that case.  */
2508               && t != 3))
2509         {
2510           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2511
2512           op_cost = add_cost[mode];
2513           new_limit.cost = best_cost.cost - op_cost;
2514           new_limit.latency = best_cost.latency - op_cost;
2515           synth_mult (alg_in, t + 1, &new_limit, mode);
2516
2517           alg_in->cost.cost += op_cost;
2518           alg_in->cost.latency += op_cost;
2519           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2520             {
2521               struct algorithm *x;
2522               best_cost = alg_in->cost;
2523               x = alg_in, alg_in = best_alg, best_alg = x;
2524               best_alg->log[best_alg->ops] = 0;
2525               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2526             }
2527         }
2528       else
2529         {
2530           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2531
2532           op_cost = add_cost[mode];
2533           new_limit.cost = best_cost.cost - op_cost;
2534           new_limit.latency = best_cost.latency - op_cost;
2535           synth_mult (alg_in, t - 1, &new_limit, mode);
2536
2537           alg_in->cost.cost += op_cost;
2538           alg_in->cost.latency += op_cost;
2539           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2540             {
2541               struct algorithm *x;
2542               best_cost = alg_in->cost;
2543               x = alg_in, alg_in = best_alg, best_alg = x;
2544               best_alg->log[best_alg->ops] = 0;
2545               best_alg->op[best_alg->ops] = alg_add_t_m2;
2546             }
2547         }
2548       if (cache_hit)
2549         goto done;
2550     }
2551
2552   /* Look for factors of t of the form
2553      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2554      If we find such a factor, we can multiply by t using an algorithm that
2555      multiplies by q, shift the result by m and add/subtract it to itself.
2556
2557      We search for large factors first and loop down, even if large factors
2558      are less probable than small; if we find a large factor we will find a
2559      good sequence quickly, and therefore be able to prune (by decreasing
2560      COST_LIMIT) the search.  */
2561
2562  do_alg_addsub_factor:
2563   for (m = floor_log2 (t - 1); m >= 2; m--)
2564     {
2565       unsigned HOST_WIDE_INT d;
2566
2567       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2568       if (t % d == 0 && t > d && m < maxm
2569           && (!cache_hit || cache_alg == alg_add_factor))
2570         {
2571           /* If the target has a cheap shift-and-add instruction use
2572              that in preference to a shift insn followed by an add insn.
2573              Assume that the shift-and-add is "atomic" with a latency
2574              equal to its cost, otherwise assume that on superscalar
2575              hardware the shift may be executed concurrently with the
2576              earlier steps in the algorithm.  */
2577           op_cost = add_cost[mode] + shift_cost[mode][m];
2578           if (shiftadd_cost[mode][m] < op_cost)
2579             {
2580               op_cost = shiftadd_cost[mode][m];
2581               op_latency = op_cost;
2582             }
2583           else
2584             op_latency = add_cost[mode];
2585
2586           new_limit.cost = best_cost.cost - op_cost;
2587           new_limit.latency = best_cost.latency - op_latency;
2588           synth_mult (alg_in, t / d, &new_limit, mode);
2589
2590           alg_in->cost.cost += op_cost;
2591           alg_in->cost.latency += op_latency;
2592           if (alg_in->cost.latency < op_cost)
2593             alg_in->cost.latency = op_cost;
2594           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2595             {
2596               struct algorithm *x;
2597               best_cost = alg_in->cost;
2598               x = alg_in, alg_in = best_alg, best_alg = x;
2599               best_alg->log[best_alg->ops] = m;
2600               best_alg->op[best_alg->ops] = alg_add_factor;
2601             }
2602           /* Other factors will have been taken care of in the recursion.  */
2603           break;
2604         }
2605
2606       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2607       if (t % d == 0 && t > d && m < maxm
2608           && (!cache_hit || cache_alg == alg_sub_factor))
2609         {
2610           /* If the target has a cheap shift-and-subtract insn use
2611              that in preference to a shift insn followed by a sub insn.
2612              Assume that the shift-and-sub is "atomic" with a latency
2613              equal to it's cost, otherwise assume that on superscalar
2614              hardware the shift may be executed concurrently with the
2615              earlier steps in the algorithm.  */
2616           op_cost = add_cost[mode] + shift_cost[mode][m];
2617           if (shiftsub_cost[mode][m] < op_cost)
2618             {
2619               op_cost = shiftsub_cost[mode][m];
2620               op_latency = op_cost;
2621             }
2622           else
2623             op_latency = add_cost[mode];
2624
2625           new_limit.cost = best_cost.cost - op_cost;
2626           new_limit.latency = best_cost.latency - op_latency;
2627           synth_mult (alg_in, t / d, &new_limit, mode);
2628
2629           alg_in->cost.cost += op_cost;
2630           alg_in->cost.latency += op_latency;
2631           if (alg_in->cost.latency < op_cost)
2632             alg_in->cost.latency = op_cost;
2633           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2634             {
2635               struct algorithm *x;
2636               best_cost = alg_in->cost;
2637               x = alg_in, alg_in = best_alg, best_alg = x;
2638               best_alg->log[best_alg->ops] = m;
2639               best_alg->op[best_alg->ops] = alg_sub_factor;
2640             }
2641           break;
2642         }
2643     }
2644   if (cache_hit)
2645     goto done;
2646
2647   /* Try shift-and-add (load effective address) instructions,
2648      i.e. do a*3, a*5, a*9.  */
2649   if ((t & 1) != 0)
2650     {
2651     do_alg_add_t2_m:
2652       q = t - 1;
2653       q = q & -q;
2654       m = exact_log2 (q);
2655       if (m >= 0 && m < maxm)
2656         {
2657           op_cost = shiftadd_cost[mode][m];
2658           new_limit.cost = best_cost.cost - op_cost;
2659           new_limit.latency = best_cost.latency - op_cost;
2660           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2661
2662           alg_in->cost.cost += op_cost;
2663           alg_in->cost.latency += op_cost;
2664           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2665             {
2666               struct algorithm *x;
2667               best_cost = alg_in->cost;
2668               x = alg_in, alg_in = best_alg, best_alg = x;
2669               best_alg->log[best_alg->ops] = m;
2670               best_alg->op[best_alg->ops] = alg_add_t2_m;
2671             }
2672         }
2673       if (cache_hit)
2674         goto done;
2675
2676     do_alg_sub_t2_m:
2677       q = t + 1;
2678       q = q & -q;
2679       m = exact_log2 (q);
2680       if (m >= 0 && m < maxm)
2681         {
2682           op_cost = shiftsub_cost[mode][m];
2683           new_limit.cost = best_cost.cost - op_cost;
2684           new_limit.latency = best_cost.latency - op_cost;
2685           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2686
2687           alg_in->cost.cost += op_cost;
2688           alg_in->cost.latency += op_cost;
2689           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2690             {
2691               struct algorithm *x;
2692               best_cost = alg_in->cost;
2693               x = alg_in, alg_in = best_alg, best_alg = x;
2694               best_alg->log[best_alg->ops] = m;
2695               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2696             }
2697         }
2698       if (cache_hit)
2699         goto done;
2700     }
2701
2702  done:
2703   /* If best_cost has not decreased, we have not found any algorithm.  */
2704   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2705     {
2706       /* We failed to find an algorithm.  Record alg_impossible for
2707          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2708          we are asked to find an algorithm for T within the same or
2709          lower COST_LIMIT, we can immediately return to the
2710          caller.  */
2711       alg_hash[hash_index].t = t;
2712       alg_hash[hash_index].mode = mode;
2713       alg_hash[hash_index].alg = alg_impossible;
2714       alg_hash[hash_index].cost = *cost_limit;
2715       return;
2716     }
2717
2718   /* Cache the result.  */
2719   if (!cache_hit)
2720     {
2721       alg_hash[hash_index].t = t;
2722       alg_hash[hash_index].mode = mode;
2723       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2724       alg_hash[hash_index].cost.cost = best_cost.cost;
2725       alg_hash[hash_index].cost.latency = best_cost.latency;
2726     }
2727
2728   /* If we are getting a too long sequence for `struct algorithm'
2729      to record, make this search fail.  */
2730   if (best_alg->ops == MAX_BITS_PER_WORD)
2731     return;
2732
2733   /* Copy the algorithm from temporary space to the space at alg_out.
2734      We avoid using structure assignment because the majority of
2735      best_alg is normally undefined, and this is a critical function.  */
2736   alg_out->ops = best_alg->ops + 1;
2737   alg_out->cost = best_cost;
2738   memcpy (alg_out->op, best_alg->op,
2739           alg_out->ops * sizeof *alg_out->op);
2740   memcpy (alg_out->log, best_alg->log,
2741           alg_out->ops * sizeof *alg_out->log);
2742 }
2743 \f
2744 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2745    Try three variations:
2746
2747        - a shift/add sequence based on VAL itself
2748        - a shift/add sequence based on -VAL, followed by a negation
2749        - a shift/add sequence based on VAL - 1, followed by an addition.
2750
2751    Return true if the cheapest of these cost less than MULT_COST,
2752    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2753
2754 static bool
2755 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2756                      struct algorithm *alg, enum mult_variant *variant,
2757                      int mult_cost)
2758 {
2759   struct algorithm alg2;
2760   struct mult_cost limit;
2761   int op_cost;
2762
2763   /* Fail quickly for impossible bounds.  */
2764   if (mult_cost < 0)
2765     return false;
2766
2767   /* Ensure that mult_cost provides a reasonable upper bound.
2768      Any constant multiplication can be performed with less
2769      than 2 * bits additions.  */
2770   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[mode];
2771   if (mult_cost > op_cost)
2772     mult_cost = op_cost;
2773
2774   *variant = basic_variant;
2775   limit.cost = mult_cost;
2776   limit.latency = mult_cost;
2777   synth_mult (alg, val, &limit, mode);
2778
2779   /* This works only if the inverted value actually fits in an
2780      `unsigned int' */
2781   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2782     {
2783       op_cost = neg_cost[mode];
2784       if (MULT_COST_LESS (&alg->cost, mult_cost))
2785         {
2786           limit.cost = alg->cost.cost - op_cost;
2787           limit.latency = alg->cost.latency - op_cost;
2788         }
2789       else
2790         {
2791           limit.cost = mult_cost - op_cost;
2792           limit.latency = mult_cost - op_cost;
2793         }
2794
2795       synth_mult (&alg2, -val, &limit, mode);
2796       alg2.cost.cost += op_cost;
2797       alg2.cost.latency += op_cost;
2798       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2799         *alg = alg2, *variant = negate_variant;
2800     }
2801
2802   /* This proves very useful for division-by-constant.  */
2803   op_cost = add_cost[mode];
2804   if (MULT_COST_LESS (&alg->cost, mult_cost))
2805     {
2806       limit.cost = alg->cost.cost - op_cost;
2807       limit.latency = alg->cost.latency - op_cost;
2808     }
2809   else
2810     {
2811       limit.cost = mult_cost - op_cost;
2812       limit.latency = mult_cost - op_cost;
2813     }
2814
2815   synth_mult (&alg2, val - 1, &limit, mode);
2816   alg2.cost.cost += op_cost;
2817   alg2.cost.latency += op_cost;
2818   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2819     *alg = alg2, *variant = add_variant;
2820
2821   return MULT_COST_LESS (&alg->cost, mult_cost);
2822 }
2823
2824 /* A subroutine of expand_mult, used for constant multiplications.
2825    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2826    convenient.  Use the shift/add sequence described by ALG and apply
2827    the final fixup specified by VARIANT.  */
2828
2829 static rtx
2830 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2831                    rtx target, const struct algorithm *alg,
2832                    enum mult_variant variant)
2833 {
2834   HOST_WIDE_INT val_so_far;
2835   rtx insn, accum, tem;
2836   int opno;
2837   enum machine_mode nmode;
2838
2839   /* Avoid referencing memory over and over and invalid sharing
2840      on SUBREGs.  */
2841   op0 = force_reg (mode, op0);
2842
2843   /* ACCUM starts out either as OP0 or as a zero, depending on
2844      the first operation.  */
2845
2846   if (alg->op[0] == alg_zero)
2847     {
2848       accum = copy_to_mode_reg (mode, const0_rtx);
2849       val_so_far = 0;
2850     }
2851   else if (alg->op[0] == alg_m)
2852     {
2853       accum = copy_to_mode_reg (mode, op0);
2854       val_so_far = 1;
2855     }
2856   else
2857     gcc_unreachable ();
2858
2859   for (opno = 1; opno < alg->ops; opno++)
2860     {
2861       int log = alg->log[opno];
2862       rtx shift_subtarget = optimize ? 0 : accum;
2863       rtx add_target
2864         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2865            && !optimize)
2866           ? target : 0;
2867       rtx accum_target = optimize ? 0 : accum;
2868
2869       switch (alg->op[opno])
2870         {
2871         case alg_shift:
2872           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2873                                 build_int_cst (NULL_TREE, log),
2874                                 NULL_RTX, 0);
2875           val_so_far <<= log;
2876           break;
2877
2878         case alg_add_t_m2:
2879           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2880                               build_int_cst (NULL_TREE, log),
2881                               NULL_RTX, 0);
2882           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2883                                  add_target ? add_target : accum_target);
2884           val_so_far += (HOST_WIDE_INT) 1 << log;
2885           break;
2886
2887         case alg_sub_t_m2:
2888           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2889                               build_int_cst (NULL_TREE, log),
2890                               NULL_RTX, 0);
2891           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2892                                  add_target ? add_target : accum_target);
2893           val_so_far -= (HOST_WIDE_INT) 1 << log;
2894           break;
2895
2896         case alg_add_t2_m:
2897           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2898                                 build_int_cst (NULL_TREE, log),
2899                                 shift_subtarget,
2900                                 0);
2901           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2902                                  add_target ? add_target : accum_target);
2903           val_so_far = (val_so_far << log) + 1;
2904           break;
2905
2906         case alg_sub_t2_m:
2907           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2908                                 build_int_cst (NULL_TREE, log),
2909                                 shift_subtarget, 0);
2910           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2911                                  add_target ? add_target : accum_target);
2912           val_so_far = (val_so_far << log) - 1;
2913           break;
2914
2915         case alg_add_factor:
2916           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2917                               build_int_cst (NULL_TREE, log),
2918                               NULL_RTX, 0);
2919           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2920                                  add_target ? add_target : accum_target);
2921           val_so_far += val_so_far << log;
2922           break;
2923
2924         case alg_sub_factor:
2925           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2926                               build_int_cst (NULL_TREE, log),
2927                               NULL_RTX, 0);
2928           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2929                                  (add_target
2930                                   ? add_target : (optimize ? 0 : tem)));
2931           val_so_far = (val_so_far << log) - val_so_far;
2932           break;
2933
2934         default:
2935           gcc_unreachable ();
2936         }
2937
2938       /* Write a REG_EQUAL note on the last insn so that we can cse
2939          multiplication sequences.  Note that if ACCUM is a SUBREG,
2940          we've set the inner register and must properly indicate
2941          that.  */
2942
2943       tem = op0, nmode = mode;
2944       if (GET_CODE (accum) == SUBREG)
2945         {
2946           nmode = GET_MODE (SUBREG_REG (accum));
2947           tem = gen_lowpart (nmode, op0);
2948         }
2949
2950       insn = get_last_insn ();
2951       set_unique_reg_note (insn, REG_EQUAL,
2952                            gen_rtx_MULT (nmode, tem,
2953                                          GEN_INT (val_so_far)));
2954     }
2955
2956   if (variant == negate_variant)
2957     {
2958       val_so_far = -val_so_far;
2959       accum = expand_unop (mode, neg_optab, accum, target, 0);
2960     }
2961   else if (variant == add_variant)
2962     {
2963       val_so_far = val_so_far + 1;
2964       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2965     }
2966
2967   /* Compare only the bits of val and val_so_far that are significant
2968      in the result mode, to avoid sign-/zero-extension confusion.  */
2969   val &= GET_MODE_MASK (mode);
2970   val_so_far &= GET_MODE_MASK (mode);
2971   gcc_assert (val == val_so_far);
2972
2973   return accum;
2974 }
2975
2976 /* Perform a multiplication and return an rtx for the result.
2977    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2978    TARGET is a suggestion for where to store the result (an rtx).
2979
2980    We check specially for a constant integer as OP1.
2981    If you want this check for OP0 as well, then before calling
2982    you should swap the two operands if OP0 would be constant.  */
2983
2984 rtx
2985 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
2986              int unsignedp)
2987 {
2988   enum mult_variant variant;
2989   struct algorithm algorithm;
2990   int max_cost;
2991
2992   /* Handling const0_rtx here allows us to use zero as a rogue value for
2993      coeff below.  */
2994   if (op1 == const0_rtx)
2995     return const0_rtx;
2996   if (op1 == const1_rtx)
2997     return op0;
2998   if (op1 == constm1_rtx)
2999     return expand_unop (mode,
3000                         GET_MODE_CLASS (mode) == MODE_INT
3001                         && !unsignedp && flag_trapv
3002                         ? negv_optab : neg_optab,
3003                         op0, target, 0);
3004
3005   /* These are the operations that are potentially turned into a sequence
3006      of shifts and additions.  */
3007   if (SCALAR_INT_MODE_P (mode)
3008       && (unsignedp || !flag_trapv))
3009     {
3010       HOST_WIDE_INT coeff = 0;
3011       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3012
3013       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3014          less than or equal in size to `unsigned int' this doesn't matter.
3015          If the mode is larger than `unsigned int', then synth_mult works
3016          only if the constant value exactly fits in an `unsigned int' without
3017          any truncation.  This means that multiplying by negative values does
3018          not work; results are off by 2^32 on a 32 bit machine.  */
3019
3020       if (GET_CODE (op1) == CONST_INT)
3021         {
3022           /* Attempt to handle multiplication of DImode values by negative
3023              coefficients, by performing the multiplication by a positive
3024              multiplier and then inverting the result.  */
3025           if (INTVAL (op1) < 0
3026               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3027             {
3028               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3029                  result is interpreted as an unsigned coefficient.
3030                  Exclude cost of op0 from max_cost to match the cost
3031                  calculation of the synth_mult.  */
3032               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET)
3033                          - neg_cost[mode];
3034               if (max_cost > 0
3035                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3036                                           &variant, max_cost))
3037                 {
3038                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3039                                                 NULL_RTX, &algorithm,
3040                                                 variant);
3041                   return expand_unop (mode, neg_optab, temp, target, 0);
3042                 }
3043             }
3044           else coeff = INTVAL (op1);
3045         }
3046       else if (GET_CODE (op1) == CONST_DOUBLE)
3047         {
3048           /* If we are multiplying in DImode, it may still be a win
3049              to try to work with shifts and adds.  */
3050           if (CONST_DOUBLE_HIGH (op1) == 0)
3051             coeff = CONST_DOUBLE_LOW (op1);
3052           else if (CONST_DOUBLE_LOW (op1) == 0
3053                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3054             {
3055               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3056                           + HOST_BITS_PER_WIDE_INT;
3057               return expand_shift (LSHIFT_EXPR, mode, op0,
3058                                    build_int_cst (NULL_TREE, shift),
3059                                    target, unsignedp);
3060             }
3061         }
3062
3063       /* We used to test optimize here, on the grounds that it's better to
3064          produce a smaller program when -O is not used.  But this causes
3065          such a terrible slowdown sometimes that it seems better to always
3066          use synth_mult.  */
3067       if (coeff != 0)
3068         {
3069           /* Special case powers of two.  */
3070           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3071             return expand_shift (LSHIFT_EXPR, mode, op0,
3072                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3073                                  target, unsignedp);
3074
3075           /* Exclude cost of op0 from max_cost to match the cost
3076              calculation of the synth_mult.  */
3077           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET);
3078           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3079                                    max_cost))
3080             return expand_mult_const (mode, op0, coeff, target,
3081                                       &algorithm, variant);
3082         }
3083     }
3084
3085   if (GET_CODE (op0) == CONST_DOUBLE)
3086     {
3087       rtx temp = op0;
3088       op0 = op1;
3089       op1 = temp;
3090     }
3091
3092   /* Expand x*2.0 as x+x.  */
3093   if (GET_CODE (op1) == CONST_DOUBLE
3094       && SCALAR_FLOAT_MODE_P (mode))
3095     {
3096       REAL_VALUE_TYPE d;
3097       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3098
3099       if (REAL_VALUES_EQUAL (d, dconst2))
3100         {
3101           op0 = force_reg (GET_MODE (op0), op0);
3102           return expand_binop (mode, add_optab, op0, op0,
3103                                target, unsignedp, OPTAB_LIB_WIDEN);
3104         }
3105     }
3106
3107   /* This used to use umul_optab if unsigned, but for non-widening multiply
3108      there is no difference between signed and unsigned.  */
3109   op0 = expand_binop (mode,
3110                       ! unsignedp
3111                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3112                       ? smulv_optab : smul_optab,
3113                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3114   gcc_assert (op0);
3115   return op0;
3116 }
3117 \f
3118 /* Return the smallest n such that 2**n >= X.  */
3119
3120 int
3121 ceil_log2 (unsigned HOST_WIDE_INT x)
3122 {
3123   return floor_log2 (x - 1) + 1;
3124 }
3125
3126 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3127    replace division by D, and put the least significant N bits of the result
3128    in *MULTIPLIER_PTR and return the most significant bit.
3129
3130    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3131    needed precision is in PRECISION (should be <= N).
3132
3133    PRECISION should be as small as possible so this function can choose
3134    multiplier more freely.
3135
3136    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3137    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3138
3139    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3140    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3141
3142 static
3143 unsigned HOST_WIDE_INT
3144 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3145                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3146 {
3147   HOST_WIDE_INT mhigh_hi, mlow_hi;
3148   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3149   int lgup, post_shift;
3150   int pow, pow2;
3151   unsigned HOST_WIDE_INT nl, dummy1;
3152   HOST_WIDE_INT nh, dummy2;
3153
3154   /* lgup = ceil(log2(divisor)); */
3155   lgup = ceil_log2 (d);
3156
3157   gcc_assert (lgup <= n);
3158
3159   pow = n + lgup;
3160   pow2 = n + lgup - precision;
3161
3162   /* We could handle this with some effort, but this case is much
3163      better handled directly with a scc insn, so rely on caller using
3164      that.  */
3165   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3166
3167   /* mlow = 2^(N + lgup)/d */
3168  if (pow >= HOST_BITS_PER_WIDE_INT)
3169     {
3170       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3171       nl = 0;
3172     }
3173   else
3174     {
3175       nh = 0;
3176       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3177     }
3178   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3179                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3180
3181   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3182   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3183     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3184   else
3185     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3186   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3187                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3188
3189   gcc_assert (!mhigh_hi || nh - d < d);
3190   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3191   /* Assert that mlow < mhigh.  */
3192   gcc_assert (mlow_hi < mhigh_hi
3193               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3194
3195   /* If precision == N, then mlow, mhigh exceed 2^N
3196      (but they do not exceed 2^(N+1)).  */
3197
3198   /* Reduce to lowest terms.  */
3199   for (post_shift = lgup; post_shift > 0; post_shift--)
3200     {
3201       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3202       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3203       if (ml_lo >= mh_lo)
3204         break;
3205
3206       mlow_hi = 0;
3207       mlow_lo = ml_lo;
3208       mhigh_hi = 0;
3209       mhigh_lo = mh_lo;
3210     }
3211
3212   *post_shift_ptr = post_shift;
3213   *lgup_ptr = lgup;
3214   if (n < HOST_BITS_PER_WIDE_INT)
3215     {
3216       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3217       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3218       return mhigh_lo >= mask;
3219     }
3220   else
3221     {
3222       *multiplier_ptr = GEN_INT (mhigh_lo);
3223       return mhigh_hi;
3224     }
3225 }
3226
3227 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3228    congruent to 1 (mod 2**N).  */
3229
3230 static unsigned HOST_WIDE_INT
3231 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3232 {
3233   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3234
3235   /* The algorithm notes that the choice y = x satisfies
3236      x*y == 1 mod 2^3, since x is assumed odd.
3237      Each iteration doubles the number of bits of significance in y.  */
3238
3239   unsigned HOST_WIDE_INT mask;
3240   unsigned HOST_WIDE_INT y = x;
3241   int nbit = 3;
3242
3243   mask = (n == HOST_BITS_PER_WIDE_INT
3244           ? ~(unsigned HOST_WIDE_INT) 0
3245           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3246
3247   while (nbit < n)
3248     {
3249       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3250       nbit *= 2;
3251     }
3252   return y;
3253 }
3254
3255 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3256    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3257    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3258    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3259    become signed.
3260
3261    The result is put in TARGET if that is convenient.
3262
3263    MODE is the mode of operation.  */
3264
3265 rtx
3266 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3267                              rtx op1, rtx target, int unsignedp)
3268 {
3269   rtx tem;
3270   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3271
3272   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3273                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3274                       NULL_RTX, 0);
3275   tem = expand_and (mode, tem, op1, NULL_RTX);
3276   adj_operand
3277     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3278                      adj_operand);
3279
3280   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3281                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3282                       NULL_RTX, 0);
3283   tem = expand_and (mode, tem, op0, NULL_RTX);
3284   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3285                           target);
3286
3287   return target;
3288 }
3289
3290 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3291
3292 static rtx
3293 extract_high_half (enum machine_mode mode, rtx op)
3294 {
3295   enum machine_mode wider_mode;
3296
3297   if (mode == word_mode)
3298     return gen_highpart (mode, op);
3299
3300   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3301
3302   wider_mode = GET_MODE_WIDER_MODE (mode);
3303   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3304                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3305   return convert_modes (mode, wider_mode, op, 0);
3306 }
3307
3308 /* Like expand_mult_highpart, but only consider using a multiplication
3309    optab.  OP1 is an rtx for the constant operand.  */
3310
3311 static rtx
3312 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3313                             rtx target, int unsignedp, int max_cost)
3314 {
3315   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3316   enum machine_mode wider_mode;
3317   optab moptab;
3318   rtx tem;
3319   int size;
3320
3321   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3322
3323   wider_mode = GET_MODE_WIDER_MODE (mode);
3324   size = GET_MODE_BITSIZE (mode);
3325
3326   /* Firstly, try using a multiplication insn that only generates the needed
3327      high part of the product, and in the sign flavor of unsignedp.  */
3328   if (mul_highpart_cost[mode] < max_cost)
3329     {
3330       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3331       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3332                           unsignedp, OPTAB_DIRECT);
3333       if (tem)
3334         return tem;
3335     }
3336
3337   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3338      Need to adjust the result after the multiplication.  */
3339   if (size - 1 < BITS_PER_WORD
3340       && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3341           + 4 * add_cost[mode] < max_cost))
3342     {
3343       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3344       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3345                           unsignedp, OPTAB_DIRECT);
3346       if (tem)
3347         /* We used the wrong signedness.  Adjust the result.  */
3348         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3349                                             tem, unsignedp);
3350     }
3351
3352   /* Try widening multiplication.  */
3353   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3354   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3355       && mul_widen_cost[wider_mode] < max_cost)
3356     {
3357       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3358                           unsignedp, OPTAB_WIDEN);
3359       if (tem)
3360         return extract_high_half (mode, tem);
3361     }
3362
3363   /* Try widening the mode and perform a non-widening multiplication.  */
3364   if (optab_handler (smul_optab, wider_mode)->insn_code != CODE_FOR_nothing
3365       && size - 1 < BITS_PER_WORD
3366       && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3367     {
3368       rtx insns, wop0, wop1;
3369
3370       /* We need to widen the operands, for example to ensure the
3371          constant multiplier is correctly sign or zero extended.
3372          Use a sequence to clean-up any instructions emitted by
3373          the conversions if things don't work out.  */
3374       start_sequence ();
3375       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3376       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3377       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3378                           unsignedp, OPTAB_WIDEN);
3379       insns = get_insns ();
3380       end_sequence ();
3381
3382       if (tem)
3383         {
3384           emit_insn (insns);
3385           return extract_high_half (mode, tem);
3386         }
3387     }
3388
3389   /* Try widening multiplication of opposite signedness, and adjust.  */
3390   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3391   if (optab_handler (moptab, wider_mode)->insn_code != CODE_FOR_nothing
3392       && size - 1 < BITS_PER_WORD
3393       && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3394           + 4 * add_cost[mode] < max_cost))
3395     {
3396       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3397                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3398       if (tem != 0)
3399         {
3400           tem = extract_high_half (mode, tem);
3401           /* We used the wrong signedness.  Adjust the result.  */
3402           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3403                                               target, unsignedp);
3404         }
3405     }
3406
3407   return 0;
3408 }
3409
3410 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3411    putting the high half of the result in TARGET if that is convenient,
3412    and return where the result is.  If the operation can not be performed,
3413    0 is returned.
3414
3415    MODE is the mode of operation and result.
3416
3417    UNSIGNEDP nonzero means unsigned multiply.
3418
3419    MAX_COST is the total allowed cost for the expanded RTL.  */
3420
3421 static rtx
3422 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3423                       rtx target, int unsignedp, int max_cost)
3424 {
3425   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3426   unsigned HOST_WIDE_INT cnst1;
3427   int extra_cost;
3428   bool sign_adjust = false;
3429   enum mult_variant variant;
3430   struct algorithm alg;
3431   rtx tem;
3432
3433   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3434   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3435   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3436
3437   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3438
3439   /* We can't optimize modes wider than BITS_PER_WORD.
3440      ??? We might be able to perform double-word arithmetic if
3441      mode == word_mode, however all the cost calculations in
3442      synth_mult etc. assume single-word operations.  */
3443   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3444     return expand_mult_highpart_optab (mode, op0, op1, target,
3445                                        unsignedp, max_cost);
3446
3447   extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3448
3449   /* Check whether we try to multiply by a negative constant.  */
3450   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3451     {
3452       sign_adjust = true;
3453       extra_cost += add_cost[mode];
3454     }
3455
3456   /* See whether shift/add multiplication is cheap enough.  */
3457   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3458                            max_cost - extra_cost))
3459     {
3460       /* See whether the specialized multiplication optabs are
3461          cheaper than the shift/add version.  */
3462       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3463                                         alg.cost.cost + extra_cost);
3464       if (tem)
3465         return tem;
3466
3467       tem = convert_to_mode (wider_mode, op0, unsignedp);
3468       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3469       tem = extract_high_half (mode, tem);
3470
3471       /* Adjust result for signedness.  */
3472       if (sign_adjust)
3473         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3474
3475       return tem;
3476     }
3477   return expand_mult_highpart_optab (mode, op0, op1, target,
3478                                      unsignedp, max_cost);
3479 }
3480
3481
3482 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3483
3484 static rtx
3485 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3486 {
3487   unsigned HOST_WIDE_INT masklow, maskhigh;
3488   rtx result, temp, shift, label;
3489   int logd;
3490
3491   logd = floor_log2 (d);
3492   result = gen_reg_rtx (mode);
3493
3494   /* Avoid conditional branches when they're expensive.  */
3495   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3496       && optimize_insn_for_speed_p ())
3497     {
3498       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3499                                       mode, 0, -1);
3500       if (signmask)
3501         {
3502           signmask = force_reg (mode, signmask);
3503           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3504           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3505
3506           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3507              which instruction sequence to use.  If logical right shifts
3508              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3509              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3510
3511           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3512           if (optab_handler (lshr_optab, mode)->insn_code == CODE_FOR_nothing
3513               || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3514             {
3515               temp = expand_binop (mode, xor_optab, op0, signmask,
3516                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3517               temp = expand_binop (mode, sub_optab, temp, signmask,
3518                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3519               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3520                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3521               temp = expand_binop (mode, xor_optab, temp, signmask,
3522                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3523               temp = expand_binop (mode, sub_optab, temp, signmask,
3524                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3525             }
3526           else
3527             {
3528               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3529                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3530               signmask = force_reg (mode, signmask);
3531
3532               temp = expand_binop (mode, add_optab, op0, signmask,
3533                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3534               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3535                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3536               temp = expand_binop (mode, sub_optab, temp, signmask,
3537                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3538             }
3539           return temp;
3540         }
3541     }
3542
3543   /* Mask contains the mode's signbit and the significant bits of the
3544      modulus.  By including the signbit in the operation, many targets
3545      can avoid an explicit compare operation in the following comparison
3546      against zero.  */
3547
3548   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3549   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3550     {
3551       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3552       maskhigh = -1;
3553     }
3554   else
3555     maskhigh = (HOST_WIDE_INT) -1
3556                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3557
3558   temp = expand_binop (mode, and_optab, op0,
3559                        immed_double_const (masklow, maskhigh, mode),
3560                        result, 1, OPTAB_LIB_WIDEN);
3561   if (temp != result)
3562     emit_move_insn (result, temp);
3563
3564   label = gen_label_rtx ();
3565   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3566
3567   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3568                        0, OPTAB_LIB_WIDEN);
3569   masklow = (HOST_WIDE_INT) -1 << logd;
3570   maskhigh = -1;
3571   temp = expand_binop (mode, ior_optab, temp,
3572                        immed_double_const (masklow, maskhigh, mode),
3573                        result, 1, OPTAB_LIB_WIDEN);
3574   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3575                        0, OPTAB_LIB_WIDEN);
3576   if (temp != result)
3577     emit_move_insn (result, temp);
3578   emit_label (label);
3579   return result;
3580 }
3581
3582 /* Expand signed division of OP0 by a power of two D in mode MODE.
3583    This routine is only called for positive values of D.  */
3584
3585 static rtx
3586 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3587 {
3588   rtx temp, label;
3589   tree shift;
3590   int logd;
3591
3592   logd = floor_log2 (d);
3593   shift = build_int_cst (NULL_TREE, logd);
3594
3595   if (d == 2
3596       && BRANCH_COST (optimize_insn_for_speed_p (),
3597                       false) >= 1)
3598     {
3599       temp = gen_reg_rtx (mode);
3600       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3601       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3602                            0, OPTAB_LIB_WIDEN);
3603       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3604     }
3605
3606 #ifdef HAVE_conditional_move
3607   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3608       >= 2)
3609     {
3610       rtx temp2;
3611
3612       /* ??? emit_conditional_move forces a stack adjustment via
3613          compare_from_rtx so, if the sequence is discarded, it will
3614          be lost.  Do it now instead.  */
3615       do_pending_stack_adjust ();
3616
3617       start_sequence ();
3618       temp2 = copy_to_mode_reg (mode, op0);
3619       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3620                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3621       temp = force_reg (mode, temp);
3622
3623       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3624       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3625                                      mode, temp, temp2, mode, 0);
3626       if (temp2)
3627         {
3628           rtx seq = get_insns ();
3629           end_sequence ();
3630           emit_insn (seq);
3631           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3632         }
3633       end_sequence ();
3634     }
3635 #endif
3636
3637   if (BRANCH_COST (optimize_insn_for_speed_p (),
3638                    false) >= 2)
3639     {
3640       int ushift = GET_MODE_BITSIZE (mode) - logd;
3641
3642       temp = gen_reg_rtx (mode);
3643       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3644       if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3645         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3646                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3647       else
3648         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3649                              build_int_cst (NULL_TREE, ushift),
3650                              NULL_RTX, 1);
3651       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3652                            0, OPTAB_LIB_WIDEN);
3653       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3654     }
3655
3656   label = gen_label_rtx ();
3657   temp = copy_to_mode_reg (mode, op0);
3658   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3659   expand_inc (temp, GEN_INT (d - 1));
3660   emit_label (label);
3661   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3662 }
3663 \f
3664 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3665    if that is convenient, and returning where the result is.
3666    You may request either the quotient or the remainder as the result;
3667    specify REM_FLAG nonzero to get the remainder.
3668
3669    CODE is the expression code for which kind of division this is;
3670    it controls how rounding is done.  MODE is the machine mode to use.
3671    UNSIGNEDP nonzero means do unsigned division.  */
3672
3673 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3674    and then correct it by or'ing in missing high bits
3675    if result of ANDI is nonzero.
3676    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3677    This could optimize to a bfexts instruction.
3678    But C doesn't use these operations, so their optimizations are
3679    left for later.  */
3680 /* ??? For modulo, we don't actually need the highpart of the first product,
3681    the low part will do nicely.  And for small divisors, the second multiply
3682    can also be a low-part only multiply or even be completely left out.
3683    E.g. to calculate the remainder of a division by 3 with a 32 bit
3684    multiply, multiply with 0x55555556 and extract the upper two bits;
3685    the result is exact for inputs up to 0x1fffffff.
3686    The input range can be reduced by using cross-sum rules.
3687    For odd divisors >= 3, the following table gives right shift counts
3688    so that if a number is shifted by an integer multiple of the given
3689    amount, the remainder stays the same:
3690    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3691    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3692    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3693    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3694    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3695
3696    Cross-sum rules for even numbers can be derived by leaving as many bits
3697    to the right alone as the divisor has zeros to the right.
3698    E.g. if x is an unsigned 32 bit number:
3699    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3700    */
3701
3702 rtx
3703 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3704                rtx op0, rtx op1, rtx target, int unsignedp)
3705 {
3706   enum machine_mode compute_mode;
3707   rtx tquotient;
3708   rtx quotient = 0, remainder = 0;
3709   rtx last;
3710   int size;
3711   rtx insn, set;
3712   optab optab1, optab2;
3713   int op1_is_constant, op1_is_pow2 = 0;
3714   int max_cost, extra_cost;
3715   static HOST_WIDE_INT last_div_const = 0;
3716   static HOST_WIDE_INT ext_op1;
3717
3718   op1_is_constant = GET_CODE (op1) == CONST_INT;
3719   if (op1_is_constant)
3720     {
3721       ext_op1 = INTVAL (op1);
3722       if (unsignedp)
3723         ext_op1 &= GET_MODE_MASK (mode);
3724       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3725                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3726     }
3727
3728   /*
3729      This is the structure of expand_divmod:
3730
3731      First comes code to fix up the operands so we can perform the operations
3732      correctly and efficiently.
3733
3734      Second comes a switch statement with code specific for each rounding mode.
3735      For some special operands this code emits all RTL for the desired
3736      operation, for other cases, it generates only a quotient and stores it in
3737      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3738      to indicate that it has not done anything.
3739
3740      Last comes code that finishes the operation.  If QUOTIENT is set and
3741      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3742      QUOTIENT is not set, it is computed using trunc rounding.
3743
3744      We try to generate special code for division and remainder when OP1 is a
3745      constant.  If |OP1| = 2**n we can use shifts and some other fast
3746      operations.  For other values of OP1, we compute a carefully selected
3747      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3748      by m.
3749
3750      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3751      half of the product.  Different strategies for generating the product are
3752      implemented in expand_mult_highpart.
3753
3754      If what we actually want is the remainder, we generate that by another
3755      by-constant multiplication and a subtraction.  */
3756
3757   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3758      code below will malfunction if we are, so check here and handle
3759      the special case if so.  */
3760   if (op1 == const1_rtx)
3761     return rem_flag ? const0_rtx : op0;
3762
3763     /* When dividing by -1, we could get an overflow.
3764      negv_optab can handle overflows.  */
3765   if (! unsignedp && op1 == constm1_rtx)
3766     {
3767       if (rem_flag)
3768         return const0_rtx;
3769       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3770                           ? negv_optab : neg_optab, op0, target, 0);
3771     }
3772
3773   if (target
3774       /* Don't use the function value register as a target
3775          since we have to read it as well as write it,
3776          and function-inlining gets confused by this.  */
3777       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3778           /* Don't clobber an operand while doing a multi-step calculation.  */
3779           || ((rem_flag || op1_is_constant)
3780               && (reg_mentioned_p (target, op0)
3781                   || (MEM_P (op0) && MEM_P (target))))
3782           || reg_mentioned_p (target, op1)
3783           || (MEM_P (op1) && MEM_P (target))))
3784     target = 0;
3785
3786   /* Get the mode in which to perform this computation.  Normally it will
3787      be MODE, but sometimes we can't do the desired operation in MODE.
3788      If so, pick a wider mode in which we can do the operation.  Convert
3789      to that mode at the start to avoid repeated conversions.
3790
3791      First see what operations we need.  These depend on the expression
3792      we are evaluating.  (We assume that divxx3 insns exist under the
3793      same conditions that modxx3 insns and that these insns don't normally
3794      fail.  If these assumptions are not correct, we may generate less
3795      efficient code in some cases.)
3796
3797      Then see if we find a mode in which we can open-code that operation
3798      (either a division, modulus, or shift).  Finally, check for the smallest
3799      mode for which we can do the operation with a library call.  */
3800
3801   /* We might want to refine this now that we have division-by-constant
3802      optimization.  Since expand_mult_highpart tries so many variants, it is
3803      not straightforward to generalize this.  Maybe we should make an array
3804      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3805
3806   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3807             ? (unsignedp ? lshr_optab : ashr_optab)
3808             : (unsignedp ? udiv_optab : sdiv_optab));
3809   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3810             ? optab1
3811             : (unsignedp ? udivmod_optab : sdivmod_optab));
3812
3813   for (compute_mode = mode; compute_mode != VOIDmode;
3814        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3815     if (optab_handler (optab1, compute_mode)->insn_code != CODE_FOR_nothing
3816         || optab_handler (optab2, compute_mode)->insn_code != CODE_FOR_nothing)
3817       break;
3818
3819   if (compute_mode == VOIDmode)
3820     for (compute_mode = mode; compute_mode != VOIDmode;
3821          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3822       if (optab_libfunc (optab1, compute_mode)
3823           || optab_libfunc (optab2, compute_mode))
3824         break;
3825
3826   /* If we still couldn't find a mode, use MODE, but expand_binop will
3827      probably die.  */
3828   if (compute_mode == VOIDmode)
3829     compute_mode = mode;
3830
3831   if (target && GET_MODE (target) == compute_mode)
3832     tquotient = target;
3833   else
3834     tquotient = gen_reg_rtx (compute_mode);
3835
3836   size = GET_MODE_BITSIZE (compute_mode);
3837 #if 0
3838   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3839      (mode), and thereby get better code when OP1 is a constant.  Do that
3840      later.  It will require going over all usages of SIZE below.  */
3841   size = GET_MODE_BITSIZE (mode);
3842 #endif
3843
3844   /* Only deduct something for a REM if the last divide done was
3845      for a different constant.   Then set the constant of the last
3846      divide.  */
3847   max_cost = unsignedp ? udiv_cost[compute_mode] : sdiv_cost[compute_mode];
3848   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3849                      && INTVAL (op1) == last_div_const))
3850     max_cost -= mul_cost[compute_mode] + add_cost[compute_mode];
3851
3852   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3853
3854   /* Now convert to the best mode to use.  */
3855   if (compute_mode != mode)
3856     {
3857       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3858       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3859
3860       /* convert_modes may have placed op1 into a register, so we
3861          must recompute the following.  */
3862       op1_is_constant = GET_CODE (op1) == CONST_INT;
3863       op1_is_pow2 = (op1_is_constant
3864                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3865                           || (! unsignedp
3866                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3867     }
3868
3869   /* If one of the operands is a volatile MEM, copy it into a register.  */
3870
3871   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3872     op0 = force_reg (compute_mode, op0);
3873   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3874     op1 = force_reg (compute_mode, op1);
3875
3876   /* If we need the remainder or if OP1 is constant, we need to
3877      put OP0 in a register in case it has any queued subexpressions.  */
3878   if (rem_flag || op1_is_constant)
3879     op0 = force_reg (compute_mode, op0);
3880
3881   last = get_last_insn ();
3882
3883   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3884   if (unsignedp)
3885     {
3886       if (code == FLOOR_DIV_EXPR)
3887         code = TRUNC_DIV_EXPR;
3888       if (code == FLOOR_MOD_EXPR)
3889         code = TRUNC_MOD_EXPR;
3890       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3891         code = TRUNC_DIV_EXPR;
3892     }
3893
3894   if (op1 != const0_rtx)
3895     switch (code)
3896       {
3897       case TRUNC_MOD_EXPR:
3898       case TRUNC_DIV_EXPR:
3899         if (op1_is_constant)
3900           {
3901             if (unsignedp)
3902               {
3903                 unsigned HOST_WIDE_INT mh;
3904                 int pre_shift, post_shift;
3905                 int dummy;
3906                 rtx ml;
3907                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3908                                             & GET_MODE_MASK (compute_mode));
3909
3910                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3911                   {
3912                     pre_shift = floor_log2 (d);
3913                     if (rem_flag)
3914                       {
3915                         remainder
3916                           = expand_binop (compute_mode, and_optab, op0,
3917                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3918                                           remainder, 1,
3919                                           OPTAB_LIB_WIDEN);
3920                         if (remainder)
3921                           return gen_lowpart (mode, remainder);
3922                       }
3923                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3924                                              build_int_cst (NULL_TREE,
3925                                                             pre_shift),
3926                                              tquotient, 1);
3927                   }
3928                 else if (size <= HOST_BITS_PER_WIDE_INT)
3929                   {
3930                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3931                       {
3932                         /* Most significant bit of divisor is set; emit an scc
3933                            insn.  */
3934                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3935                                                     compute_mode, 1, 1);
3936                         if (quotient == 0)
3937                           goto fail1;
3938                       }
3939                     else
3940                       {
3941                         /* Find a suitable multiplier and right shift count
3942                            instead of multiplying with D.  */
3943
3944                         mh = choose_multiplier (d, size, size,
3945                                                 &ml, &post_shift, &dummy);
3946
3947                         /* If the suggested multiplier is more than SIZE bits,
3948                            we can do better for even divisors, using an
3949                            initial right shift.  */
3950                         if (mh != 0 && (d & 1) == 0)
3951                           {
3952                             pre_shift = floor_log2 (d & -d);
3953                             mh = choose_multiplier (d >> pre_shift, size,
3954                                                     size - pre_shift,
3955                                                     &ml, &post_shift, &dummy);
3956                             gcc_assert (!mh);
3957                           }
3958                         else
3959                           pre_shift = 0;
3960
3961                         if (mh != 0)
3962                           {
3963                             rtx t1, t2, t3, t4;
3964
3965                             if (post_shift - 1 >= BITS_PER_WORD)
3966                               goto fail1;
3967
3968                             extra_cost
3969                               = (shift_cost[compute_mode][post_shift - 1]
3970                                  + shift_cost[compute_mode][1]
3971                                  + 2 * add_cost[compute_mode]);
3972                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3973                                                        NULL_RTX, 1,
3974                                                        max_cost - extra_cost);
3975                             if (t1 == 0)
3976                               goto fail1;
3977                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3978                                                                op0, t1),
3979                                                 NULL_RTX);
3980                             t3 = expand_shift
3981                               (RSHIFT_EXPR, compute_mode, t2,
3982                                build_int_cst (NULL_TREE, 1),
3983                                NULL_RTX,1);
3984                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3985                                                               t1, t3),
3986                                                 NULL_RTX);
3987                             quotient = expand_shift
3988                               (RSHIFT_EXPR, compute_mode, t4,
3989                                build_int_cst (NULL_TREE, post_shift - 1),
3990                                tquotient, 1);
3991                           }
3992                         else
3993                           {
3994                             rtx t1, t2;
3995
3996                             if (pre_shift >= BITS_PER_WORD
3997                                 || post_shift >= BITS_PER_WORD)
3998                               goto fail1;
3999
4000                             t1 = expand_shift
4001                               (RSHIFT_EXPR, compute_mode, op0,
4002                                build_int_cst (NULL_TREE, pre_shift),
4003                                NULL_RTX, 1);
4004                             extra_cost
4005                               = (shift_cost[compute_mode][pre_shift]
4006                                  + shift_cost[compute_mode][post_shift]);
4007                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4008                                                        NULL_RTX, 1,
4009                                                        max_cost - extra_cost);
4010                             if (t2 == 0)
4011                               goto fail1;
4012                             quotient = expand_shift
4013                               (RSHIFT_EXPR, compute_mode, t2,
4014                                build_int_cst (NULL_TREE, post_shift),
4015                                tquotient, 1);
4016                           }
4017                       }
4018                   }
4019                 else            /* Too wide mode to use tricky code */
4020                   break;
4021
4022                 insn = get_last_insn ();
4023                 if (insn != last
4024                     && (set = single_set (insn)) != 0
4025                     && SET_DEST (set) == quotient)
4026                   set_unique_reg_note (insn,
4027                                        REG_EQUAL,
4028                                        gen_rtx_UDIV (compute_mode, op0, op1));
4029               }
4030             else                /* TRUNC_DIV, signed */
4031               {
4032                 unsigned HOST_WIDE_INT ml;
4033                 int lgup, post_shift;
4034                 rtx mlr;
4035                 HOST_WIDE_INT d = INTVAL (op1);
4036                 unsigned HOST_WIDE_INT abs_d;
4037
4038                 /* Since d might be INT_MIN, we have to cast to
4039                    unsigned HOST_WIDE_INT before negating to avoid
4040                    undefined signed overflow.  */
4041                 abs_d = (d >= 0
4042                          ? (unsigned HOST_WIDE_INT) d
4043                          : - (unsigned HOST_WIDE_INT) d);
4044
4045                 /* n rem d = n rem -d */
4046                 if (rem_flag && d < 0)
4047                   {
4048                     d = abs_d;
4049                     op1 = gen_int_mode (abs_d, compute_mode);
4050                   }
4051
4052                 if (d == 1)
4053                   quotient = op0;
4054                 else if (d == -1)
4055                   quotient = expand_unop (compute_mode, neg_optab, op0,
4056                                           tquotient, 0);
4057                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4058                   {
4059                     /* This case is not handled correctly below.  */
4060                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4061                                                 compute_mode, 1, 1);
4062                     if (quotient == 0)
4063                       goto fail1;
4064                   }
4065                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4066                          && (rem_flag ? smod_pow2_cheap[compute_mode]
4067                                       : sdiv_pow2_cheap[compute_mode])
4068                          /* We assume that cheap metric is true if the
4069                             optab has an expander for this mode.  */
4070                          && ((optab_handler ((rem_flag ? smod_optab
4071                                               : sdiv_optab),
4072                                               compute_mode)->insn_code
4073                               != CODE_FOR_nothing)
4074                              || (optab_handler(sdivmod_optab,
4075                                                compute_mode)
4076                                  ->insn_code != CODE_FOR_nothing)))
4077                   ;
4078                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4079                   {
4080                     if (rem_flag)
4081                       {
4082                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4083                         if (remainder)
4084                           return gen_lowpart (mode, remainder);
4085                       }
4086
4087                     if (sdiv_pow2_cheap[compute_mode]
4088                         && ((optab_handler (sdiv_optab, compute_mode)->insn_code
4089                              != CODE_FOR_nothing)
4090                             || (optab_handler (sdivmod_optab, compute_mode)->insn_code
4091                                 != CODE_FOR_nothing)))
4092                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4093                                                 compute_mode, op0,
4094                                                 gen_int_mode (abs_d,
4095                                                               compute_mode),
4096                                                 NULL_RTX, 0);
4097                     else
4098                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4099
4100                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4101                        negate the quotient.  */
4102                     if (d < 0)
4103                       {
4104                         insn = get_last_insn ();
4105                         if (insn != last
4106                             && (set = single_set (insn)) != 0
4107                             && SET_DEST (set) == quotient
4108                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4109                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4110                           set_unique_reg_note (insn,
4111                                                REG_EQUAL,
4112                                                gen_rtx_DIV (compute_mode,
4113                                                             op0,
4114                                                             GEN_INT
4115                                                             (trunc_int_for_mode
4116                                                              (abs_d,
4117                                                               compute_mode))));
4118
4119                         quotient = expand_unop (compute_mode, neg_optab,
4120                                                 quotient, quotient, 0);
4121                       }
4122                   }
4123                 else if (size <= HOST_BITS_PER_WIDE_INT)
4124                   {
4125                     choose_multiplier (abs_d, size, size - 1,
4126                                        &mlr, &post_shift, &lgup);
4127                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4128                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4129                       {
4130                         rtx t1, t2, t3;
4131
4132                         if (post_shift >= BITS_PER_WORD
4133                             || size - 1 >= BITS_PER_WORD)
4134                           goto fail1;
4135
4136                         extra_cost = (shift_cost[compute_mode][post_shift]
4137                                       + shift_cost[compute_mode][size - 1]
4138                                       + add_cost[compute_mode]);
4139                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4140                                                    NULL_RTX, 0,
4141                                                    max_cost - extra_cost);
4142                         if (t1 == 0)
4143                           goto fail1;
4144                         t2 = expand_shift
4145                           (RSHIFT_EXPR, compute_mode, t1,
4146                            build_int_cst (NULL_TREE, post_shift),
4147                            NULL_RTX, 0);
4148                         t3 = expand_shift
4149                           (RSHIFT_EXPR, compute_mode, op0,
4150                            build_int_cst (NULL_TREE, size - 1),
4151                            NULL_RTX, 0);
4152                         if (d < 0)
4153                           quotient
4154                             = force_operand (gen_rtx_MINUS (compute_mode,
4155                                                             t3, t2),
4156                                              tquotient);
4157                         else
4158                           quotient
4159                             = force_operand (gen_rtx_MINUS (compute_mode,
4160                                                             t2, t3),
4161                                              tquotient);
4162                       }
4163                     else
4164                       {
4165                         rtx t1, t2, t3, t4;
4166
4167                         if (post_shift >= BITS_PER_WORD
4168                             || size - 1 >= BITS_PER_WORD)
4169                           goto fail1;
4170
4171                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4172                         mlr = gen_int_mode (ml, compute_mode);
4173                         extra_cost = (shift_cost[compute_mode][post_shift]
4174                                       + shift_cost[compute_mode][size - 1]
4175                                       + 2 * add_cost[compute_mode]);
4176                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4177                                                    NULL_RTX, 0,
4178                                                    max_cost - extra_cost);
4179                         if (t1 == 0)
4180                           goto fail1;
4181                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4182                                                           t1, op0),
4183                                             NULL_RTX);
4184                         t3 = expand_shift
4185                           (RSHIFT_EXPR, compute_mode, t2,
4186                            build_int_cst (NULL_TREE, post_shift),
4187                            NULL_RTX, 0);
4188                         t4 = expand_shift
4189                           (RSHIFT_EXPR, compute_mode, op0,
4190                            build_int_cst (NULL_TREE, size - 1),
4191                            NULL_RTX, 0);
4192                         if (d < 0)
4193                           quotient
4194                             = force_operand (gen_rtx_MINUS (compute_mode,
4195                                                             t4, t3),
4196                                              tquotient);
4197                         else
4198                           quotient
4199                             = force_operand (gen_rtx_MINUS (compute_mode,
4200                                                             t3, t4),
4201                                              tquotient);
4202                       }
4203                   }
4204                 else            /* Too wide mode to use tricky code */
4205                   break;
4206
4207                 insn = get_last_insn ();
4208                 if (insn != last
4209                     && (set = single_set (insn)) != 0
4210                     && SET_DEST (set) == quotient)
4211                   set_unique_reg_note (insn,
4212                                        REG_EQUAL,
4213                                        gen_rtx_DIV (compute_mode, op0, op1));
4214               }
4215             break;
4216           }
4217       fail1:
4218         delete_insns_since (last);
4219         break;
4220
4221       case FLOOR_DIV_EXPR:
4222       case FLOOR_MOD_EXPR:
4223       /* We will come here only for signed operations.  */
4224         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4225           {
4226             unsigned HOST_WIDE_INT mh;
4227             int pre_shift, lgup, post_shift;
4228             HOST_WIDE_INT d = INTVAL (op1);
4229             rtx ml;
4230
4231             if (d > 0)
4232               {
4233                 /* We could just as easily deal with negative constants here,
4234                    but it does not seem worth the trouble for GCC 2.6.  */
4235                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4236                   {
4237                     pre_shift = floor_log2 (d);
4238                     if (rem_flag)
4239                       {
4240                         remainder = expand_binop (compute_mode, and_optab, op0,
4241                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4242                                                   remainder, 0, OPTAB_LIB_WIDEN);
4243                         if (remainder)
4244                           return gen_lowpart (mode, remainder);
4245                       }
4246                     quotient = expand_shift
4247                       (RSHIFT_EXPR, compute_mode, op0,
4248                        build_int_cst (NULL_TREE, pre_shift),
4249                        tquotient, 0);
4250                   }
4251                 else
4252                   {
4253                     rtx t1, t2, t3, t4;
4254
4255                     mh = choose_multiplier (d, size, size - 1,
4256                                             &ml, &post_shift, &lgup);
4257                     gcc_assert (!mh);
4258
4259                     if (post_shift < BITS_PER_WORD
4260                         && size - 1 < BITS_PER_WORD)
4261                       {
4262                         t1 = expand_shift
4263                           (RSHIFT_EXPR, compute_mode, op0,
4264                            build_int_cst (NULL_TREE, size - 1),
4265                            NULL_RTX, 0);
4266                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4267                                            NULL_RTX, 0, OPTAB_WIDEN);
4268                         extra_cost = (shift_cost[compute_mode][post_shift]
4269                                       + shift_cost[compute_mode][size - 1]
4270                                       + 2 * add_cost[compute_mode]);
4271                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4272                                                    NULL_RTX, 1,
4273                                                    max_cost - extra_cost);
4274                         if (t3 != 0)
4275                           {
4276                             t4 = expand_shift
4277                               (RSHIFT_EXPR, compute_mode, t3,
4278                                build_int_cst (NULL_TREE, post_shift),
4279                                NULL_RTX, 1);
4280                             quotient = expand_binop (compute_mode, xor_optab,
4281                                                      t4, t1, tquotient, 0,
4282                                                      OPTAB_WIDEN);
4283                           }
4284                       }
4285                   }
4286               }
4287             else
4288               {
4289                 rtx nsign, t1, t2, t3, t4;
4290                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4291                                                   op0, constm1_rtx), NULL_RTX);
4292                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4293                                    0, OPTAB_WIDEN);
4294                 nsign = expand_shift
4295                   (RSHIFT_EXPR, compute_mode, t2,
4296                    build_int_cst (NULL_TREE, size - 1),
4297                    NULL_RTX, 0);
4298                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4299                                     NULL_RTX);
4300                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4301                                     NULL_RTX, 0);
4302                 if (t4)
4303                   {
4304                     rtx t5;
4305                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4306                                       NULL_RTX, 0);
4307                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4308                                                             t4, t5),
4309                                               tquotient);
4310                   }
4311               }
4312           }
4313
4314         if (quotient != 0)
4315           break;
4316         delete_insns_since (last);
4317
4318         /* Try using an instruction that produces both the quotient and
4319            remainder, using truncation.  We can easily compensate the quotient
4320            or remainder to get floor rounding, once we have the remainder.
4321            Notice that we compute also the final remainder value here,
4322            and return the result right away.  */
4323         if (target == 0 || GET_MODE (target) != compute_mode)
4324           target = gen_reg_rtx (compute_mode);
4325
4326         if (rem_flag)
4327           {
4328             remainder
4329               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4330             quotient = gen_reg_rtx (compute_mode);
4331           }
4332         else
4333           {
4334             quotient
4335               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4336             remainder = gen_reg_rtx (compute_mode);
4337           }
4338
4339         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4340                                  quotient, remainder, 0))
4341           {
4342             /* This could be computed with a branch-less sequence.
4343                Save that for later.  */
4344             rtx tem;
4345             rtx label = gen_label_rtx ();
4346             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4347             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4348                                 NULL_RTX, 0, OPTAB_WIDEN);
4349             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4350             expand_dec (quotient, const1_rtx);
4351             expand_inc (remainder, op1);
4352             emit_label (label);
4353             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4354           }
4355
4356         /* No luck with division elimination or divmod.  Have to do it
4357            by conditionally adjusting op0 *and* the result.  */
4358         {
4359           rtx label1, label2, label3, label4, label5;
4360           rtx adjusted_op0;
4361           rtx tem;
4362
4363           quotient = gen_reg_rtx (compute_mode);
4364           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4365           label1 = gen_label_rtx ();
4366           label2 = gen_label_rtx ();
4367           label3 = gen_label_rtx ();
4368           label4 = gen_label_rtx ();
4369           label5 = gen_label_rtx ();
4370           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4371           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4372           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4373                               quotient, 0, OPTAB_LIB_WIDEN);
4374           if (tem != quotient)
4375             emit_move_insn (quotient, tem);
4376           emit_jump_insn (gen_jump (label5));
4377           emit_barrier ();
4378           emit_label (label1);
4379           expand_inc (adjusted_op0, const1_rtx);
4380           emit_jump_insn (gen_jump (label4));
4381           emit_barrier ();
4382           emit_label (label2);
4383           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4384           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4385                               quotient, 0, OPTAB_LIB_WIDEN);
4386           if (tem != quotient)
4387             emit_move_insn (quotient, tem);
4388           emit_jump_insn (gen_jump (label5));
4389           emit_barrier ();
4390           emit_label (label3);
4391           expand_dec (adjusted_op0, const1_rtx);
4392           emit_label (label4);
4393           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4394                               quotient, 0, OPTAB_LIB_WIDEN);
4395           if (tem != quotient)
4396             emit_move_insn (quotient, tem);
4397           expand_dec (quotient, const1_rtx);
4398           emit_label (label5);
4399         }
4400         break;
4401
4402       case CEIL_DIV_EXPR:
4403       case CEIL_MOD_EXPR:
4404         if (unsignedp)
4405           {
4406             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4407               {
4408                 rtx t1, t2, t3;
4409                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4410                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4411                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4412                                    tquotient, 1);
4413                 t2 = expand_binop (compute_mode, and_optab, op0,
4414                                    GEN_INT (d - 1),
4415                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4416                 t3 = gen_reg_rtx (compute_mode);
4417                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4418                                       compute_mode, 1, 1);
4419                 if (t3 == 0)
4420                   {
4421                     rtx lab;
4422                     lab = gen_label_rtx ();
4423                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4424                     expand_inc (t1, const1_rtx);
4425                     emit_label (lab);
4426                     quotient = t1;
4427                   }
4428                 else
4429                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4430                                                           t1, t3),
4431                                             tquotient);
4432                 break;
4433               }
4434
4435             /* Try using an instruction that produces both the quotient and
4436                remainder, using truncation.  We can easily compensate the
4437                quotient or remainder to get ceiling rounding, once we have the
4438                remainder.  Notice that we compute also the final remainder
4439                value here, and return the result right away.  */
4440             if (target == 0 || GET_MODE (target) != compute_mode)
4441               target = gen_reg_rtx (compute_mode);
4442
4443             if (rem_flag)
4444               {
4445                 remainder = (REG_P (target)
4446                              ? target : gen_reg_rtx (compute_mode));
4447                 quotient = gen_reg_rtx (compute_mode);
4448               }
4449             else
4450               {
4451                 quotient = (REG_P (target)
4452                             ? target : gen_reg_rtx (compute_mode));
4453                 remainder = gen_reg_rtx (compute_mode);
4454               }
4455
4456             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4457                                      remainder, 1))
4458               {
4459                 /* This could be computed with a branch-less sequence.
4460                    Save that for later.  */
4461                 rtx label = gen_label_rtx ();
4462                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4463                                  compute_mode, label);
4464                 expand_inc (quotient, const1_rtx);
4465                 expand_dec (remainder, op1);
4466                 emit_label (label);
4467                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4468               }
4469
4470             /* No luck with division elimination or divmod.  Have to do it
4471                by conditionally adjusting op0 *and* the result.  */
4472             {
4473               rtx label1, label2;
4474               rtx adjusted_op0, tem;
4475
4476               quotient = gen_reg_rtx (compute_mode);
4477               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4478               label1 = gen_label_rtx ();
4479               label2 = gen_label_rtx ();
4480               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4481                                compute_mode, label1);
4482               emit_move_insn  (quotient, const0_rtx);
4483               emit_jump_insn (gen_jump (label2));
4484               emit_barrier ();
4485               emit_label (label1);
4486               expand_dec (adjusted_op0, const1_rtx);
4487               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4488                                   quotient, 1, OPTAB_LIB_WIDEN);
4489               if (tem != quotient)
4490                 emit_move_insn (quotient, tem);
4491               expand_inc (quotient, const1_rtx);
4492               emit_label (label2);
4493             }
4494           }
4495         else /* signed */
4496           {
4497             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4498                 && INTVAL (op1) >= 0)
4499               {
4500                 /* This is extremely similar to the code for the unsigned case
4501                    above.  For 2.7 we should merge these variants, but for
4502                    2.6.1 I don't want to touch the code for unsigned since that
4503                    get used in C.  The signed case will only be used by other
4504                    languages (Ada).  */
4505
4506                 rtx t1, t2, t3;
4507                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4508                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4509                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4510                                    tquotient, 0);
4511                 t2 = expand_binop (compute_mode, and_optab, op0,
4512                                    GEN_INT (d - 1),
4513                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4514                 t3 = gen_reg_rtx (compute_mode);
4515                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4516                                       compute_mode, 1, 1);
4517                 if (t3 == 0)
4518                   {
4519                     rtx lab;
4520                     lab = gen_label_rtx ();
4521                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4522                     expand_inc (t1, const1_rtx);
4523                     emit_label (lab);
4524                     quotient = t1;
4525                   }
4526                 else
4527                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4528                                                           t1, t3),
4529                                             tquotient);
4530                 break;
4531               }
4532
4533             /* Try using an instruction that produces both the quotient and
4534                remainder, using truncation.  We can easily compensate the
4535                quotient or remainder to get ceiling rounding, once we have the
4536                remainder.  Notice that we compute also the final remainder
4537                value here, and return the result right away.  */
4538             if (target == 0 || GET_MODE (target) != compute_mode)
4539               target = gen_reg_rtx (compute_mode);
4540             if (rem_flag)
4541               {
4542                 remainder= (REG_P (target)
4543                             ? target : gen_reg_rtx (compute_mode));
4544                 quotient = gen_reg_rtx (compute_mode);
4545               }
4546             else
4547               {
4548                 quotient = (REG_P (target)
4549                             ? target : gen_reg_rtx (compute_mode));
4550                 remainder = gen_reg_rtx (compute_mode);
4551               }
4552
4553             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4554                                      remainder, 0))
4555               {
4556                 /* This could be computed with a branch-less sequence.
4557                    Save that for later.  */
4558                 rtx tem;
4559                 rtx label = gen_label_rtx ();
4560                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4561                                  compute_mode, label);
4562                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4563                                     NULL_RTX, 0, OPTAB_WIDEN);
4564                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4565                 expand_inc (quotient, const1_rtx);
4566                 expand_dec (remainder, op1);
4567                 emit_label (label);
4568                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4569               }
4570
4571             /* No luck with division elimination or divmod.  Have to do it
4572                by conditionally adjusting op0 *and* the result.  */
4573             {
4574               rtx label1, label2, label3, label4, label5;
4575               rtx adjusted_op0;
4576               rtx tem;
4577
4578               quotient = gen_reg_rtx (compute_mode);
4579               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4580               label1 = gen_label_rtx ();
4581               label2 = gen_label_rtx ();
4582               label3 = gen_label_rtx ();
4583               label4 = gen_label_rtx ();
4584               label5 = gen_label_rtx ();
4585               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4586               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4587                                compute_mode, label1);
4588               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4589                                   quotient, 0, OPTAB_LIB_WIDEN);
4590               if (tem != quotient)
4591                 emit_move_insn (quotient, tem);
4592               emit_jump_insn (gen_jump (label5));
4593               emit_barrier ();
4594               emit_label (label1);
4595               expand_dec (adjusted_op0, const1_rtx);
4596               emit_jump_insn (gen_jump (label4));
4597               emit_barrier ();
4598               emit_label (label2);
4599               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4600                                compute_mode, label3);
4601               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4602                                   quotient, 0, OPTAB_LIB_WIDEN);
4603               if (tem != quotient)
4604                 emit_move_insn (quotient, tem);
4605               emit_jump_insn (gen_jump (label5));
4606               emit_barrier ();
4607               emit_label (label3);
4608               expand_inc (adjusted_op0, const1_rtx);
4609               emit_label (label4);
4610               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4611                                   quotient, 0, OPTAB_LIB_WIDEN);
4612               if (tem != quotient)
4613                 emit_move_insn (quotient, tem);
4614               expand_inc (quotient, const1_rtx);
4615               emit_label (label5);
4616             }
4617           }
4618         break;
4619
4620       case EXACT_DIV_EXPR:
4621         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4622           {
4623             HOST_WIDE_INT d = INTVAL (op1);
4624             unsigned HOST_WIDE_INT ml;
4625             int pre_shift;
4626             rtx t1;
4627
4628             pre_shift = floor_log2 (d & -d);
4629             ml = invert_mod2n (d >> pre_shift, size);
4630             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4631                                build_int_cst (NULL_TREE, pre_shift),
4632                                NULL_RTX, unsignedp);
4633             quotient = expand_mult (compute_mode, t1,
4634                                     gen_int_mode (ml, compute_mode),
4635                                     NULL_RTX, 1);
4636
4637             insn = get_last_insn ();
4638             set_unique_reg_note (insn,
4639                                  REG_EQUAL,
4640                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4641                                                  compute_mode,
4642                                                  op0, op1));
4643           }
4644         break;
4645
4646       case ROUND_DIV_EXPR:
4647       case ROUND_MOD_EXPR:
4648         if (unsignedp)
4649           {
4650             rtx tem;
4651             rtx label;
4652             label = gen_label_rtx ();
4653             quotient = gen_reg_rtx (compute_mode);
4654             remainder = gen_reg_rtx (compute_mode);
4655             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4656               {
4657                 rtx tem;
4658                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4659                                          quotient, 1, OPTAB_LIB_WIDEN);
4660                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4661                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4662                                           remainder, 1, OPTAB_LIB_WIDEN);
4663               }
4664             tem = plus_constant (op1, -1);
4665             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4666                                 build_int_cst (NULL_TREE, 1),
4667                                 NULL_RTX, 1);
4668             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4669             expand_inc (quotient, const1_rtx);
4670             expand_dec (remainder, op1);
4671             emit_label (label);
4672           }
4673         else
4674           {
4675             rtx abs_rem, abs_op1, tem, mask;
4676             rtx label;
4677             label = gen_label_rtx ();
4678             quotient = gen_reg_rtx (compute_mode);
4679             remainder = gen_reg_rtx (compute_mode);
4680             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4681               {
4682                 rtx tem;
4683                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4684                                          quotient, 0, OPTAB_LIB_WIDEN);
4685                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4686                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4687                                           remainder, 0, OPTAB_LIB_WIDEN);
4688               }
4689             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4690             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4691             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4692                                 build_int_cst (NULL_TREE, 1),
4693                                 NULL_RTX, 1);
4694             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4695             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4696                                 NULL_RTX, 0, OPTAB_WIDEN);
4697             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4698                                  build_int_cst (NULL_TREE, size - 1),
4699                                  NULL_RTX, 0);
4700             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4701                                 NULL_RTX, 0, OPTAB_WIDEN);
4702             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4703                                 NULL_RTX, 0, OPTAB_WIDEN);
4704             expand_inc (quotient, tem);
4705             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4706                                 NULL_RTX, 0, OPTAB_WIDEN);
4707             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4708                                 NULL_RTX, 0, OPTAB_WIDEN);
4709             expand_dec (remainder, tem);
4710             emit_label (label);
4711           }
4712         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4713
4714       default:
4715         gcc_unreachable ();
4716       }
4717
4718   if (quotient == 0)
4719     {
4720       if (target && GET_MODE (target) != compute_mode)
4721         target = 0;
4722
4723       if (rem_flag)
4724         {
4725           /* Try to produce the remainder without producing the quotient.
4726              If we seem to have a divmod pattern that does not require widening,
4727              don't try widening here.  We should really have a WIDEN argument
4728              to expand_twoval_binop, since what we'd really like to do here is
4729              1) try a mod insn in compute_mode
4730              2) try a divmod insn in compute_mode
4731              3) try a div insn in compute_mode and multiply-subtract to get
4732                 remainder
4733              4) try the same things with widening allowed.  */
4734           remainder
4735             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4736                                  op0, op1, target,
4737                                  unsignedp,
4738                                  ((optab_handler (optab2, compute_mode)->insn_code
4739                                    != CODE_FOR_nothing)
4740                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4741           if (remainder == 0)
4742             {
4743               /* No luck there.  Can we do remainder and divide at once
4744                  without a library call?  */
4745               remainder = gen_reg_rtx (compute_mode);
4746               if (! expand_twoval_binop ((unsignedp
4747                                           ? udivmod_optab
4748                                           : sdivmod_optab),
4749                                          op0, op1,
4750                                          NULL_RTX, remainder, unsignedp))
4751                 remainder = 0;
4752             }
4753
4754           if (remainder)
4755             return gen_lowpart (mode, remainder);
4756         }
4757
4758       /* Produce the quotient.  Try a quotient insn, but not a library call.
4759          If we have a divmod in this mode, use it in preference to widening
4760          the div (for this test we assume it will not fail). Note that optab2
4761          is set to the one of the two optabs that the call below will use.  */
4762       quotient
4763         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4764                              op0, op1, rem_flag ? NULL_RTX : target,
4765                              unsignedp,
4766                              ((optab_handler (optab2, compute_mode)->insn_code
4767                                != CODE_FOR_nothing)
4768                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4769
4770       if (quotient == 0)
4771         {
4772           /* No luck there.  Try a quotient-and-remainder insn,
4773              keeping the quotient alone.  */
4774           quotient = gen_reg_rtx (compute_mode);
4775           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4776                                      op0, op1,
4777                                      quotient, NULL_RTX, unsignedp))
4778             {
4779               quotient = 0;
4780               if (! rem_flag)
4781                 /* Still no luck.  If we are not computing the remainder,
4782                    use a library call for the quotient.  */
4783                 quotient = sign_expand_binop (compute_mode,
4784                                               udiv_optab, sdiv_optab,
4785                                               op0, op1, target,
4786                                               unsignedp, OPTAB_LIB_WIDEN);
4787             }
4788         }
4789     }
4790
4791   if (rem_flag)
4792     {
4793       if (target && GET_MODE (target) != compute_mode)
4794         target = 0;
4795
4796       if (quotient == 0)
4797         {
4798           /* No divide instruction either.  Use library for remainder.  */
4799           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4800                                          op0, op1, target,
4801                                          unsignedp, OPTAB_LIB_WIDEN);
4802           /* No remainder function.  Try a quotient-and-remainder
4803              function, keeping the remainder.  */
4804           if (!remainder)
4805             {
4806               remainder = gen_reg_rtx (compute_mode);
4807               if (!expand_twoval_binop_libfunc
4808                   (unsignedp ? udivmod_optab : sdivmod_optab,
4809                    op0, op1,
4810                    NULL_RTX, remainder,
4811                    unsignedp ? UMOD : MOD))
4812                 remainder = NULL_RTX;
4813             }
4814         }
4815       else
4816         {
4817           /* We divided.  Now finish doing X - Y * (X / Y).  */
4818           remainder = expand_mult (compute_mode, quotient, op1,
4819                                    NULL_RTX, unsignedp);
4820           remainder = expand_binop (compute_mode, sub_optab, op0,
4821                                     remainder, target, unsignedp,
4822                                     OPTAB_LIB_WIDEN);
4823         }
4824     }
4825
4826   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4827 }
4828 \f
4829 /* Return a tree node with data type TYPE, describing the value of X.
4830    Usually this is an VAR_DECL, if there is no obvious better choice.
4831    X may be an expression, however we only support those expressions
4832    generated by loop.c.  */
4833
4834 tree
4835 make_tree (tree type, rtx x)
4836 {
4837   tree t;
4838
4839   switch (GET_CODE (x))
4840     {
4841     case CONST_INT:
4842       {
4843         HOST_WIDE_INT hi = 0;
4844
4845         if (INTVAL (x) < 0
4846             && !(TYPE_UNSIGNED (type)
4847                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4848                      < HOST_BITS_PER_WIDE_INT)))
4849           hi = -1;
4850
4851         t = build_int_cst_wide (type, INTVAL (x), hi);
4852
4853         return t;
4854       }
4855
4856     case CONST_DOUBLE:
4857       if (GET_MODE (x) == VOIDmode)
4858         t = build_int_cst_wide (type,
4859                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4860       else
4861         {
4862           REAL_VALUE_TYPE d;
4863
4864           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4865           t = build_real (type, d);
4866         }
4867
4868       return t;
4869
4870     case CONST_VECTOR:
4871       {
4872         int units = CONST_VECTOR_NUNITS (x);
4873         tree itype = TREE_TYPE (type);
4874         tree t = NULL_TREE;
4875         int i;
4876
4877
4878         /* Build a tree with vector elements.  */
4879         for (i = units - 1; i >= 0; --i)
4880           {
4881             rtx elt = CONST_VECTOR_ELT (x, i);
4882             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4883           }
4884
4885         return build_vector (type, t);
4886       }
4887
4888     case PLUS:
4889       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4890                           make_tree (type, XEXP (x, 1)));
4891
4892     case MINUS:
4893       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4894                           make_tree (type, XEXP (x, 1)));
4895
4896     case NEG:
4897       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4898
4899     case MULT:
4900       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4901                           make_tree (type, XEXP (x, 1)));
4902
4903     case ASHIFT:
4904       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4905                           make_tree (type, XEXP (x, 1)));
4906
4907     case LSHIFTRT:
4908       t = unsigned_type_for (type);
4909       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4910                                          make_tree (t, XEXP (x, 0)),
4911                                          make_tree (type, XEXP (x, 1))));
4912
4913     case ASHIFTRT:
4914       t = signed_type_for (type);
4915       return fold_convert (type, build2 (RSHIFT_EXPR, t,
4916                                          make_tree (t, XEXP (x, 0)),
4917                                          make_tree (type, XEXP (x, 1))));
4918
4919     case DIV:
4920       if (TREE_CODE (type) != REAL_TYPE)
4921         t = signed_type_for (type);
4922       else
4923         t = type;
4924
4925       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4926                                          make_tree (t, XEXP (x, 0)),
4927                                          make_tree (t, XEXP (x, 1))));
4928     case UDIV:
4929       t = unsigned_type_for (type);
4930       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
4931                                          make_tree (t, XEXP (x, 0)),
4932                                          make_tree (t, XEXP (x, 1))));
4933
4934     case SIGN_EXTEND:
4935     case ZERO_EXTEND:
4936       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4937                                           GET_CODE (x) == ZERO_EXTEND);
4938       return fold_convert (type, make_tree (t, XEXP (x, 0)));
4939
4940     case CONST:
4941       return make_tree (type, XEXP (x, 0));
4942
4943     case SYMBOL_REF:
4944       t = SYMBOL_REF_DECL (x);
4945       if (t)
4946         return fold_convert (type, build_fold_addr_expr (t));
4947       /* else fall through.  */
4948
4949     default:
4950       t = build_decl (VAR_DECL, NULL_TREE, type);
4951
4952       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4953          ptr_mode.  So convert.  */
4954       if (POINTER_TYPE_P (type))
4955         x = convert_memory_address (TYPE_MODE (type), x);
4956
4957       /* Note that we do *not* use SET_DECL_RTL here, because we do not
4958          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
4959       t->decl_with_rtl.rtl = x;
4960
4961       return t;
4962     }
4963 }
4964 \f
4965 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4966    and returning TARGET.
4967
4968    If TARGET is 0, a pseudo-register or constant is returned.  */
4969
4970 rtx
4971 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4972 {
4973   rtx tem = 0;
4974
4975   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4976     tem = simplify_binary_operation (AND, mode, op0, op1);
4977   if (tem == 0)
4978     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4979
4980   if (target == 0)
4981     target = tem;
4982   else if (tem != target)
4983     emit_move_insn (target, tem);
4984   return target;
4985 }
4986 \f
4987 /* Helper function for emit_store_flag.  */
4988 static rtx
4989 emit_store_flag_1 (rtx target, rtx subtarget, enum machine_mode mode,
4990                    int normalizep)
4991 {
4992   rtx op0;
4993   enum machine_mode target_mode = GET_MODE (target);
4994
4995   /* If we are converting to a wider mode, first convert to
4996      TARGET_MODE, then normalize.  This produces better combining
4997      opportunities on machines that have a SIGN_EXTRACT when we are
4998      testing a single bit.  This mostly benefits the 68k.
4999
5000      If STORE_FLAG_VALUE does not have the sign bit set when
5001      interpreted in MODE, we can do this conversion as unsigned, which
5002      is usually more efficient.  */
5003   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5004     {
5005       convert_move (target, subtarget,
5006                     (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5007                     && 0 == (STORE_FLAG_VALUE
5008                              & ((HOST_WIDE_INT) 1
5009                                 << (GET_MODE_BITSIZE (mode) -1))));
5010       op0 = target;
5011       mode = target_mode;
5012     }
5013   else
5014     op0 = subtarget;
5015
5016   /* If we want to keep subexpressions around, don't reuse our last
5017      target.  */
5018   if (optimize)
5019     subtarget = 0;
5020
5021   /* Now normalize to the proper value in MODE.  Sometimes we don't
5022      have to do anything.  */
5023   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5024     ;
5025   /* STORE_FLAG_VALUE might be the most negative number, so write
5026      the comparison this way to avoid a compiler-time warning.  */
5027   else if (- normalizep == STORE_FLAG_VALUE)
5028     op0 = expand_unop (mode, neg_optab, op0, subtarget, 0);
5029
5030   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5031      it hard to use a value of just the sign bit due to ANSI integer
5032      constant typing rules.  */
5033   else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5034            && (STORE_FLAG_VALUE
5035                & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1))))
5036     op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5037                         size_int (GET_MODE_BITSIZE (mode) - 1), subtarget,
5038                         normalizep == 1);
5039   else
5040     {
5041       gcc_assert (STORE_FLAG_VALUE & 1);
5042
5043       op0 = expand_and (mode, op0, const1_rtx, subtarget);
5044       if (normalizep == -1)
5045         op0 = expand_unop (mode, neg_optab, op0, op0, 0);
5046     }
5047
5048   /* If we were converting to a smaller mode, do the conversion now.  */
5049   if (target_mode != mode)
5050     {
5051       convert_move (target, op0, 0);
5052       return target;
5053     }
5054   else
5055     return op0;
5056 }
5057
5058 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5059    and storing in TARGET.  Normally return TARGET.
5060    Return 0 if that cannot be done.
5061
5062    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5063    it is VOIDmode, they cannot both be CONST_INT.
5064
5065    UNSIGNEDP is for the case where we have to widen the operands
5066    to perform the operation.  It says to use zero-extension.
5067
5068    NORMALIZEP is 1 if we should convert the result to be either zero
5069    or one.  Normalize is -1 if we should convert the result to be
5070    either zero or -1.  If NORMALIZEP is zero, the result will be left
5071    "raw" out of the scc insn.  */
5072
5073 rtx
5074 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5075                  enum machine_mode mode, int unsignedp, int normalizep)
5076 {
5077   rtx subtarget;
5078   enum insn_code icode;
5079   enum machine_mode compare_mode;
5080   enum machine_mode target_mode = GET_MODE (target);
5081   rtx tem;
5082   rtx last = get_last_insn ();
5083   rtx pattern, comparison;
5084
5085   if (unsignedp)
5086     code = unsigned_condition (code);
5087
5088   /* If one operand is constant, make it the second one.  Only do this
5089      if the other operand is not constant as well.  */
5090
5091   if (swap_commutative_operands_p (op0, op1))
5092     {
5093       tem = op0;
5094       op0 = op1;
5095       op1 = tem;
5096       code = swap_condition (code);
5097     }
5098
5099   if (mode == VOIDmode)
5100     mode = GET_MODE (op0);
5101
5102   /* For some comparisons with 1 and -1, we can convert this to
5103      comparisons with zero.  This will often produce more opportunities for
5104      store-flag insns.  */
5105
5106   switch (code)
5107     {
5108     case LT:
5109       if (op1 == const1_rtx)
5110         op1 = const0_rtx, code = LE;
5111       break;
5112     case LE:
5113       if (op1 == constm1_rtx)
5114         op1 = const0_rtx, code = LT;
5115       break;
5116     case GE:
5117       if (op1 == const1_rtx)
5118         op1 = const0_rtx, code = GT;
5119       break;
5120     case GT:
5121       if (op1 == constm1_rtx)
5122         op1 = const0_rtx, code = GE;
5123       break;
5124     case GEU:
5125       if (op1 == const1_rtx)
5126         op1 = const0_rtx, code = NE;
5127       break;
5128     case LTU:
5129       if (op1 == const1_rtx)
5130         op1 = const0_rtx, code = EQ;
5131       break;
5132     default:
5133       break;
5134     }
5135
5136   /* If we are comparing a double-word integer with zero or -1, we can
5137      convert the comparison into one involving a single word.  */
5138   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5139       && GET_MODE_CLASS (mode) == MODE_INT
5140       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5141     {
5142       if ((code == EQ || code == NE)
5143           && (op1 == const0_rtx || op1 == constm1_rtx))
5144         {
5145           rtx op00, op01, op0both;
5146
5147           /* Do a logical OR or AND of the two words and compare the
5148              result.  */
5149           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5150           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5151           op0both = expand_binop (word_mode,
5152                                   op1 == const0_rtx ? ior_optab : and_optab,
5153                                   op00, op01, NULL_RTX, unsignedp,
5154                                   OPTAB_DIRECT);
5155
5156           if (op0both != 0)
5157             return emit_store_flag (target, code, op0both, op1, word_mode,
5158                                     unsignedp, normalizep);
5159         }
5160       else if ((code == LT || code == GE) && op1 == const0_rtx)
5161         {
5162           rtx op0h;
5163
5164           /* If testing the sign bit, can just test on high word.  */
5165           op0h = simplify_gen_subreg (word_mode, op0, mode,
5166                                       subreg_highpart_offset (word_mode,
5167                                                               mode));
5168           return emit_store_flag (target, code, op0h, op1, word_mode,
5169                                   unsignedp, normalizep);
5170         }
5171     }
5172
5173   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5174      complement of A (for GE) and shifting the sign bit to the low bit.  */
5175   if (op1 == const0_rtx && (code == LT || code == GE)
5176       && GET_MODE_CLASS (mode) == MODE_INT
5177       && (normalizep || STORE_FLAG_VALUE == 1
5178           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5179               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5180                   == ((unsigned HOST_WIDE_INT) 1
5181                       << (GET_MODE_BITSIZE (mode) - 1))))))
5182     {
5183       subtarget = target;
5184
5185       /* If the result is to be wider than OP0, it is best to convert it
5186          first.  If it is to be narrower, it is *incorrect* to convert it
5187          first.  */
5188       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5189         {
5190           op0 = convert_modes (target_mode, mode, op0, 0);
5191           mode = target_mode;
5192         }
5193
5194       if (target_mode != mode)
5195         subtarget = 0;
5196
5197       if (code == GE)
5198         op0 = expand_unop (mode, one_cmpl_optab, op0,
5199                            ((STORE_FLAG_VALUE == 1 || normalizep)
5200                             ? 0 : subtarget), 0);
5201
5202       if (STORE_FLAG_VALUE == 1 || normalizep)
5203         /* If we are supposed to produce a 0/1 value, we want to do
5204            a logical shift from the sign bit to the low-order bit; for
5205            a -1/0 value, we do an arithmetic shift.  */
5206         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5207                             size_int (GET_MODE_BITSIZE (mode) - 1),
5208                             subtarget, normalizep != -1);
5209
5210       if (mode != target_mode)
5211         op0 = convert_modes (target_mode, mode, op0, 0);
5212
5213       return op0;
5214     }
5215
5216   icode = setcc_gen_code[(int) code];
5217
5218   if (icode != CODE_FOR_nothing)
5219     {
5220       insn_operand_predicate_fn pred;
5221
5222       /* We think we may be able to do this with a scc insn.  Emit the
5223          comparison and then the scc insn.  */
5224
5225       do_pending_stack_adjust ();
5226       last = get_last_insn ();
5227
5228       comparison
5229         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5230       if (CONSTANT_P (comparison))
5231         {
5232           switch (GET_CODE (comparison))
5233             {
5234             case CONST_INT:
5235               if (comparison == const0_rtx)
5236                 return const0_rtx;
5237               break;
5238
5239 #ifdef FLOAT_STORE_FLAG_VALUE
5240             case CONST_DOUBLE:
5241               if (comparison == CONST0_RTX (GET_MODE (comparison)))
5242                 return const0_rtx;
5243               break;
5244 #endif
5245             default:
5246               gcc_unreachable ();
5247             }
5248
5249           if (normalizep == 1)
5250             return const1_rtx;
5251           if (normalizep == -1)
5252             return constm1_rtx;
5253           return const_true_rtx;
5254         }
5255
5256       /* The code of COMPARISON may not match CODE if compare_from_rtx
5257          decided to swap its operands and reverse the original code.
5258
5259          We know that compare_from_rtx returns either a CONST_INT or
5260          a new comparison code, so it is safe to just extract the
5261          code from COMPARISON.  */
5262       code = GET_CODE (comparison);
5263
5264       /* Get a reference to the target in the proper mode for this insn.  */
5265       compare_mode = insn_data[(int) icode].operand[0].mode;
5266       subtarget = target;
5267       pred = insn_data[(int) icode].operand[0].predicate;
5268       if (optimize || ! (*pred) (subtarget, compare_mode))
5269         subtarget = gen_reg_rtx (compare_mode);
5270
5271       pattern = GEN_FCN (icode) (subtarget);
5272       if (pattern)
5273         {
5274           emit_insn (pattern);
5275           return emit_store_flag_1 (target, subtarget, compare_mode,
5276                                     normalizep);
5277         }
5278     }
5279   else
5280     {
5281       /* We don't have an scc insn, so try a cstore insn.  */
5282
5283       for (compare_mode = mode; compare_mode != VOIDmode;
5284            compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5285         {
5286           icode = optab_handler (cstore_optab, compare_mode)->insn_code;
5287           if (icode != CODE_FOR_nothing)
5288             break;
5289         }
5290
5291       if (icode != CODE_FOR_nothing)
5292         {
5293           enum machine_mode result_mode
5294             = insn_data[(int) icode].operand[0].mode;
5295           rtx cstore_op0 = op0;
5296           rtx cstore_op1 = op1;
5297
5298           do_pending_stack_adjust ();
5299           last = get_last_insn ();
5300
5301           if (compare_mode != mode)
5302             {
5303               cstore_op0 = convert_modes (compare_mode, mode, cstore_op0,
5304                                           unsignedp);
5305               cstore_op1 = convert_modes (compare_mode, mode, cstore_op1,
5306                                           unsignedp);
5307             }
5308
5309           if (!insn_data[(int) icode].operand[2].predicate (cstore_op0,
5310                                                             compare_mode))
5311             cstore_op0 = copy_to_mode_reg (compare_mode, cstore_op0);
5312
5313           if (!insn_data[(int) icode].operand[3].predicate (cstore_op1,
5314                                                             compare_mode))
5315             cstore_op1 = copy_to_mode_reg (compare_mode, cstore_op1);
5316
5317           comparison = gen_rtx_fmt_ee (code, result_mode, cstore_op0,
5318                                        cstore_op1);
5319           subtarget = target;
5320
5321           if (optimize || !(insn_data[(int) icode].operand[0].predicate
5322                             (subtarget, result_mode)))
5323             subtarget = gen_reg_rtx (result_mode);
5324
5325           pattern = GEN_FCN (icode) (subtarget, comparison, cstore_op0,
5326                                      cstore_op1);
5327
5328           if (pattern)
5329             {
5330               emit_insn (pattern);
5331               return emit_store_flag_1 (target, subtarget, result_mode,
5332                                         normalizep);
5333             }
5334         }
5335     }
5336
5337   delete_insns_since (last);
5338
5339   /* If optimizing, use different pseudo registers for each insn, instead
5340      of reusing the same pseudo.  This leads to better CSE, but slows
5341      down the compiler, since there are more pseudos */
5342   subtarget = (!optimize
5343                && (target_mode == mode)) ? target : NULL_RTX;
5344
5345   /* If we reached here, we can't do this with a scc insn.  However, there
5346      are some comparisons that can be done directly.  For example, if
5347      this is an equality comparison of integers, we can try to exclusive-or
5348      (or subtract) the two operands and use a recursive call to try the
5349      comparison with zero.  Don't do any of these cases if branches are
5350      very cheap.  */
5351
5352   if (BRANCH_COST (optimize_insn_for_speed_p (),
5353                    false) > 0
5354       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5355       && op1 != const0_rtx)
5356     {
5357       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5358                           OPTAB_WIDEN);
5359
5360       if (tem == 0)
5361         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5362                             OPTAB_WIDEN);
5363       if (tem != 0)
5364         tem = emit_store_flag (target, code, tem, const0_rtx,
5365                                mode, unsignedp, normalizep);
5366       if (tem == 0)
5367         delete_insns_since (last);
5368       return tem;
5369     }
5370
5371   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5372      the constant zero.  Reject all other comparisons at this point.  Only
5373      do LE and GT if branches are expensive since they are expensive on
5374      2-operand machines.  */
5375
5376   if (BRANCH_COST (optimize_insn_for_speed_p (),
5377                    false) == 0
5378       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5379       || (code != EQ && code != NE
5380           && (BRANCH_COST (optimize_insn_for_speed_p (),
5381                            false) <= 1 || (code != LE && code != GT))))
5382     return 0;
5383
5384   /* See what we need to return.  We can only return a 1, -1, or the
5385      sign bit.  */
5386
5387   if (normalizep == 0)
5388     {
5389       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5390         normalizep = STORE_FLAG_VALUE;
5391
5392       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5393                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5394                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5395         ;
5396       else
5397         return 0;
5398     }
5399
5400   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5401      do the necessary operation below.  */
5402
5403   tem = 0;
5404
5405   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5406      the sign bit set.  */
5407
5408   if (code == LE)
5409     {
5410       /* This is destructive, so SUBTARGET can't be OP0.  */
5411       if (rtx_equal_p (subtarget, op0))
5412         subtarget = 0;
5413
5414       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5415                           OPTAB_WIDEN);
5416       if (tem)
5417         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5418                             OPTAB_WIDEN);
5419     }
5420
5421   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5422      number of bits in the mode of OP0, minus one.  */
5423
5424   if (code == GT)
5425     {
5426       if (rtx_equal_p (subtarget, op0))
5427         subtarget = 0;
5428
5429       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5430                           size_int (GET_MODE_BITSIZE (mode) - 1),
5431                           subtarget, 0);
5432       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5433                           OPTAB_WIDEN);
5434     }
5435
5436   if (code == EQ || code == NE)
5437     {
5438       /* For EQ or NE, one way to do the comparison is to apply an operation
5439          that converts the operand into a positive number if it is nonzero
5440          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5441          for NE we negate.  This puts the result in the sign bit.  Then we
5442          normalize with a shift, if needed.
5443
5444          Two operations that can do the above actions are ABS and FFS, so try
5445          them.  If that doesn't work, and MODE is smaller than a full word,
5446          we can use zero-extension to the wider mode (an unsigned conversion)
5447          as the operation.  */
5448
5449       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5450          that is compensated by the subsequent overflow when subtracting
5451          one / negating.  */
5452
5453       if (optab_handler (abs_optab, mode)->insn_code != CODE_FOR_nothing)
5454         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5455       else if (optab_handler (ffs_optab, mode)->insn_code != CODE_FOR_nothing)
5456         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5457       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5458         {
5459           tem = convert_modes (word_mode, mode, op0, 1);
5460           mode = word_mode;
5461         }
5462
5463       if (tem != 0)
5464         {
5465           if (code == EQ)
5466             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5467                                 0, OPTAB_WIDEN);
5468           else
5469             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5470         }
5471
5472       /* If we couldn't do it that way, for NE we can "or" the two's complement
5473          of the value with itself.  For EQ, we take the one's complement of
5474          that "or", which is an extra insn, so we only handle EQ if branches
5475          are expensive.  */
5476
5477       if (tem == 0
5478           && (code == NE
5479               || BRANCH_COST (optimize_insn_for_speed_p (),
5480                               false) > 1))
5481         {
5482           if (rtx_equal_p (subtarget, op0))
5483             subtarget = 0;
5484
5485           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5486           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5487                               OPTAB_WIDEN);
5488
5489           if (tem && code == EQ)
5490             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5491         }
5492     }
5493
5494   if (tem && normalizep)
5495     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5496                         size_int (GET_MODE_BITSIZE (mode) - 1),
5497                         subtarget, normalizep == 1);
5498
5499   if (tem)
5500     {
5501       if (GET_MODE (tem) != target_mode)
5502         {
5503           convert_move (target, tem, 0);
5504           tem = target;
5505         }
5506       else if (!subtarget)
5507         {
5508           emit_move_insn (target, tem);
5509           tem = target;
5510         }
5511     }
5512   else
5513     delete_insns_since (last);
5514
5515   return tem;
5516 }
5517
5518 /* Like emit_store_flag, but always succeeds.  */
5519
5520 rtx
5521 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5522                        enum machine_mode mode, int unsignedp, int normalizep)
5523 {
5524   rtx tem, label;
5525
5526   /* First see if emit_store_flag can do the job.  */
5527   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5528   if (tem != 0)
5529     return tem;
5530
5531   if (normalizep == 0)
5532     normalizep = 1;
5533
5534   /* If this failed, we have to do this with set/compare/jump/set code.  */
5535
5536   if (!REG_P (target)
5537       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5538     target = gen_reg_rtx (GET_MODE (target));
5539
5540   emit_move_insn (target, const1_rtx);
5541   label = gen_label_rtx ();
5542   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5543                            NULL_RTX, label);
5544
5545   emit_move_insn (target, const0_rtx);
5546   emit_label (label);
5547
5548   return target;
5549 }
5550 \f
5551 /* Perform possibly multi-word comparison and conditional jump to LABEL
5552    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5553    now a thin wrapper around do_compare_rtx_and_jump.  */
5554
5555 static void
5556 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5557                  rtx label)
5558 {
5559   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5560   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5561                            NULL_RTX, NULL_RTX, label);
5562 }