1 ;; Machine description for NVPTX.
2 ;; Copyright (C) 2014-2017 Free Software Foundation, Inc.
3 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_c_enum "unspec" [
33 UNSPEC_FPINT_NEARBYINT
53 (define_c_enum "unspecv" [
71 (define_attr "subregs_ok" "false,true"
72 (const_string "false"))
74 (define_attr "atomic" "false,true"
75 (const_string "false"))
77 ;; The nvptx operand predicates, in general, don't permit subregs and
78 ;; only literal constants, which differ from the generic ones, which
79 ;; permit subregs and symbolc constants (as appropriate)
80 (define_predicate "nvptx_register_operand"
83 return register_operand (op, mode);
86 (define_predicate "nvptx_nonimmediate_operand"
87 (match_code "mem,reg")
89 return (REG_P (op) ? register_operand (op, mode)
90 : memory_operand (op, mode));
93 (define_predicate "nvptx_nonmemory_operand"
94 (match_code "reg,const_int,const_double")
96 return (REG_P (op) ? register_operand (op, mode)
97 : immediate_operand (op, mode));
100 (define_predicate "const0_operand"
101 (and (match_code "const_int")
102 (match_test "op == const0_rtx")))
104 ;; True if this operator is valid for predication.
105 (define_predicate "predicate_operator"
106 (match_code "eq,ne"))
108 (define_predicate "ne_operator"
111 (define_predicate "nvptx_comparison_operator"
112 (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu"))
114 (define_predicate "nvptx_float_comparison_operator"
115 (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered"))
117 ;; Test for a valid operand for a call instruction.
118 (define_predicate "call_insn_operand"
119 (match_code "symbol_ref,reg")
121 return REG_P (op) || SYMBOL_REF_FUNCTION_P (op);
124 ;; Return true if OP is a call with parallel USEs of the argument
126 (define_predicate "call_operation"
127 (match_code "parallel")
129 int arg_end = XVECLEN (op, 0);
131 for (int i = 1; i < arg_end; i++)
133 rtx elt = XVECEXP (op, 0, i);
135 if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0)))
141 (define_attr "predicable" "false,true"
142 (const_string "true"))
145 [(match_operator 0 "predicate_operator"
146 [(match_operand:BI 1 "nvptx_register_operand" "")
147 (match_operand:BI 2 "const0_operand" "")])]
152 (define_constraint "P0"
153 "An integer with the value 0."
154 (and (match_code "const_int")
155 (match_test "ival == 0")))
157 (define_constraint "P1"
158 "An integer with the value 1."
159 (and (match_code "const_int")
160 (match_test "ival == 1")))
162 (define_constraint "Pn"
163 "An integer with the value -1."
164 (and (match_code "const_int")
165 (match_test "ival == -1")))
167 (define_constraint "R"
171 (define_constraint "Ia"
172 "Any integer constant."
173 (and (match_code "const_int") (match_test "true")))
175 (define_mode_iterator QHSDISDFM [QI HI SI DI SF DF])
176 (define_mode_iterator QHSDIM [QI HI SI DI])
177 (define_mode_iterator HSDIM [HI SI DI])
178 (define_mode_iterator BHSDIM [BI HI SI DI])
179 (define_mode_iterator SDIM [SI DI])
180 (define_mode_iterator SDISDFM [SI DI SF DF])
181 (define_mode_iterator QHIM [QI HI])
182 (define_mode_iterator QHSIM [QI HI SI])
183 (define_mode_iterator SDFM [SF DF])
184 (define_mode_iterator SDCM [SC DC])
185 (define_mode_iterator BITS [SI SF])
186 (define_mode_iterator BITD [DI DF])
188 ;; This mode iterator allows :P to be used for patterns that operate on
189 ;; pointer-sized quantities. Exactly one of the two alternatives will match.
190 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
192 ;; We should get away with not defining memory alternatives, since we don't
193 ;; get variables in this mode and pseudos are never spilled.
195 [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R")
196 (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,Pn"))]
199 %.\\tmov%t0\\t%0, %1;
200 %.\\tsetp.eq.u32\\t%0, 1, 0;
201 %.\\tsetp.eq.u32\\t%0, 1, 1;")
203 (define_insn "*mov<mode>_insn"
204 [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m")
205 (match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))]
206 "!MEM_P (operands[0]) || REG_P (operands[1])"
208 if (which_alternative == 1)
209 return "%.\\tld%A1%u1\\t%0, %1;";
210 if (which_alternative == 2)
211 return "%.\\tst%A0%u0\\t%0, %1;";
213 return nvptx_output_mov_insn (operands[0], operands[1]);
215 [(set_attr "subregs_ok" "true")])
217 (define_insn "*mov<mode>_insn"
218 [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m")
219 (match_operand:SDFM 1 "general_operand" "RF,m,R"))]
220 "!MEM_P (operands[0]) || REG_P (operands[1])"
222 if (which_alternative == 1)
223 return "%.\\tld%A1%u0\\t%0, %1;";
224 if (which_alternative == 2)
225 return "%.\\tst%A0%u1\\t%0, %1;";
227 return nvptx_output_mov_insn (operands[0], operands[1]);
229 [(set_attr "subregs_ok" "true")])
231 (define_insn "load_arg_reg<mode>"
232 [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R")
233 (unspec:QHIM [(match_operand 1 "const_int_operand" "n")]
236 "%.\\tcvt%t0.u32\\t%0, %%ar%1;")
238 (define_insn "load_arg_reg<mode>"
239 [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R")
240 (unspec:SDISDFM [(match_operand 1 "const_int_operand" "n")]
243 "%.\\tmov%t0\\t%0, %%ar%1;")
245 (define_expand "mov<mode>"
246 [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "")
247 (match_operand:QHSDISDFM 1 "general_operand" ""))]
250 if (MEM_P (operands[0]) && !REG_P (operands[1]))
252 rtx tmp = gen_reg_rtx (<MODE>mode);
253 emit_move_insn (tmp, operands[1]);
254 emit_move_insn (operands[0], tmp);
259 (define_insn "zero_extendqihi2"
260 [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R")
261 (zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))]
264 %.\\tcvt.u16.u%T1\\t%0, %1;
265 %.\\tld%A1.u8\\t%0, %1;"
266 [(set_attr "subregs_ok" "true")])
268 (define_insn "zero_extend<mode>si2"
269 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
270 (zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
273 %.\\tcvt.u32.u%T1\\t%0, %1;
274 %.\\tld%A1.u%T1\\t%0, %1;"
275 [(set_attr "subregs_ok" "true")])
277 (define_insn "zero_extend<mode>di2"
278 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
279 (zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
282 %.\\tcvt.u64.u%T1\\t%0, %1;
283 %.\\tld%A1%u1\\t%0, %1;"
284 [(set_attr "subregs_ok" "true")])
286 (define_insn "extend<mode>si2"
287 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
288 (sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
291 %.\\tcvt.s32.s%T1\\t%0, %1;
292 %.\\tld%A1.s%T1\\t%0, %1;"
293 [(set_attr "subregs_ok" "true")])
295 (define_insn "extend<mode>di2"
296 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
297 (sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
300 %.\\tcvt.s64.s%T1\\t%0, %1;
301 %.\\tld%A1.s%T1\\t%0, %1;"
302 [(set_attr "subregs_ok" "true")])
304 (define_insn "trunchiqi2"
305 [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m")
306 (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))]
309 %.\\tcvt%t0.u16\\t%0, %1;
310 %.\\tst%A0.u8\\t%0, %1;"
311 [(set_attr "subregs_ok" "true")])
313 (define_insn "truncsi<mode>2"
314 [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m")
315 (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))]
318 %.\\tcvt%t0.u32\\t%0, %1;
319 %.\\tst%A0.u%T0\\t%0, %1;"
320 [(set_attr "subregs_ok" "true")])
322 (define_insn "truncdi<mode>2"
323 [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m")
324 (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))]
327 %.\\tcvt%t0.u64\\t%0, %1;
328 %.\\tst%A0.u%T0\\t%0, %1;"
329 [(set_attr "subregs_ok" "true")])
331 ;; Integer arithmetic
333 (define_insn "add<mode>3"
334 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
335 (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
336 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
338 "%.\\tadd%t0\\t%0, %1, %2;")
340 (define_insn "sub<mode>3"
341 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
342 (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
343 (match_operand:HSDIM 2 "nvptx_register_operand" "R")))]
345 "%.\\tsub%t0\\t%0, %1, %2;")
347 (define_insn "mul<mode>3"
348 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
349 (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
350 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
352 "%.\\tmul.lo%t0\\t%0, %1, %2;")
354 (define_insn "*mad<mode>3"
355 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
356 (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
357 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri"))
358 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
360 "%.\\tmad.lo%t0\\t%0, %1, %2, %3;")
362 (define_insn "div<mode>3"
363 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
364 (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
365 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
367 "%.\\tdiv.s%T0\\t%0, %1, %2;")
369 (define_insn "udiv<mode>3"
370 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
371 (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
372 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
374 "%.\\tdiv.u%T0\\t%0, %1, %2;")
376 (define_insn "mod<mode>3"
377 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
378 (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
379 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
381 "%.\\trem.s%T0\\t%0, %1, %2;")
383 (define_insn "umod<mode>3"
384 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
385 (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
386 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
388 "%.\\trem.u%T0\\t%0, %1, %2;")
390 (define_insn "smin<mode>3"
391 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
392 (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
393 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
395 "%.\\tmin.s%T0\\t%0, %1, %2;")
397 (define_insn "umin<mode>3"
398 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
399 (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
400 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
402 "%.\\tmin.u%T0\\t%0, %1, %2;")
404 (define_insn "smax<mode>3"
405 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
406 (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
407 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
409 "%.\\tmax.s%T0\\t%0, %1, %2;")
411 (define_insn "umax<mode>3"
412 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
413 (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
414 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
416 "%.\\tmax.u%T0\\t%0, %1, %2;")
418 (define_insn "abs<mode>2"
419 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
420 (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
422 "%.\\tabs.s%T0\\t%0, %1;")
424 (define_insn "neg<mode>2"
425 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
426 (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
428 "%.\\tneg.s%T0\\t%0, %1;")
430 (define_insn "one_cmpl<mode>2"
431 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
432 (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
434 "%.\\tnot.b%T0\\t%0, %1;")
436 (define_insn "bitrev<mode>2"
437 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
438 (unspec:SDIM [(match_operand:SDIM 1 "nvptx_register_operand" "R")]
441 "%.\\tbrev.b%T0\\t%0, %1;")
443 (define_insn "clz<mode>2"
444 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
445 (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
447 "%.\\tclz.b%T1\\t%0, %1;")
449 (define_expand "ctz<mode>2"
450 [(set (match_operand:SI 0 "nvptx_register_operand" "")
451 (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))]
454 rtx tmpreg = gen_reg_rtx (<MODE>mode);
455 emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1]));
456 emit_insn (gen_clz<mode>2 (operands[0], tmpreg));
462 (define_insn "ashl<mode>3"
463 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
464 (ashift:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
465 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
467 "%.\\tshl.b%T0\\t%0, %1, %2;")
469 (define_insn "ashr<mode>3"
470 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
471 (ashiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
472 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
474 "%.\\tshr.s%T0\\t%0, %1, %2;")
476 (define_insn "lshr<mode>3"
477 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
478 (lshiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
479 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
481 "%.\\tshr.u%T0\\t%0, %1, %2;")
483 ;; Logical operations
485 (define_insn "and<mode>3"
486 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
487 (and:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
488 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
490 "%.\\tand.b%T0\\t%0, %1, %2;")
492 (define_insn "ior<mode>3"
493 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
494 (ior:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
495 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
497 "%.\\tor.b%T0\\t%0, %1, %2;")
499 (define_insn "xor<mode>3"
500 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
501 (xor:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
502 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
504 "%.\\txor.b%T0\\t%0, %1, %2;")
506 ;; Comparisons and branches
508 (define_insn "*cmp<mode>"
509 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
510 (match_operator:BI 1 "nvptx_comparison_operator"
511 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
512 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
514 "%.\\tsetp%c1\\t%0, %2, %3;")
516 (define_insn "*cmp<mode>"
517 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
518 (match_operator:BI 1 "nvptx_float_comparison_operator"
519 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
520 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
522 "%.\\tsetp%c1\\t%0, %2, %3;")
526 (label_ref (match_operand 0 "" "")))]
530 (define_insn "br_true"
532 (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R")
534 (label_ref (match_operand 1 "" ""))
538 [(set_attr "predicable" "false")])
540 (define_insn "br_false"
542 (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R")
544 (label_ref (match_operand 1 "" ""))
548 [(set_attr "predicable" "false")])
550 ;; unified conditional branch
551 (define_insn "br_true_uni"
552 [(set (pc) (if_then_else
553 (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
554 UNSPEC_BR_UNIFIED) (const_int 0))
555 (label_ref (match_operand 1 "" "")) (pc)))]
557 "%j0\\tbra.uni\\t%l1;"
558 [(set_attr "predicable" "false")])
560 (define_insn "br_false_uni"
561 [(set (pc) (if_then_else
562 (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
563 UNSPEC_BR_UNIFIED) (const_int 0))
564 (label_ref (match_operand 1 "" "")) (pc)))]
566 "%J0\\tbra.uni\\t%l1;"
567 [(set_attr "predicable" "false")])
569 (define_expand "cbranch<mode>4"
571 (if_then_else (match_operator 0 "nvptx_comparison_operator"
572 [(match_operand:HSDIM 1 "nvptx_register_operand" "")
573 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "")])
574 (label_ref (match_operand 3 "" ""))
578 rtx t = nvptx_expand_compare (operands[0]);
580 operands[1] = XEXP (t, 0);
581 operands[2] = XEXP (t, 1);
584 (define_expand "cbranch<mode>4"
586 (if_then_else (match_operator 0 "nvptx_float_comparison_operator"
587 [(match_operand:SDFM 1 "nvptx_register_operand" "")
588 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "")])
589 (label_ref (match_operand 3 "" ""))
593 rtx t = nvptx_expand_compare (operands[0]);
595 operands[1] = XEXP (t, 0);
596 operands[2] = XEXP (t, 1);
599 (define_expand "cbranchbi4"
601 (if_then_else (match_operator 0 "predicate_operator"
602 [(match_operand:BI 1 "nvptx_register_operand" "")
603 (match_operand:BI 2 "const0_operand" "")])
604 (label_ref (match_operand 3 "" ""))
609 ;; Conditional stores
611 (define_insn "setcc_from_bi"
612 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
613 (ne:SI (match_operand:BI 1 "nvptx_register_operand" "R")
616 "%.\\tselp%t0 %0,-1,0,%1;")
618 (define_insn "sel_true<mode>"
619 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
621 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
622 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
623 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
625 "%.\\tselp%t0\\t%0, %2, %3, %1;")
627 (define_insn "sel_true<mode>"
628 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
630 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
631 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
632 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
634 "%.\\tselp%t0\\t%0, %2, %3, %1;")
636 (define_insn "sel_false<mode>"
637 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
639 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
640 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
641 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
643 "%.\\tselp%t0\\t%0, %3, %2, %1;")
645 (define_insn "sel_false<mode>"
646 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
648 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
649 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
650 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
652 "%.\\tselp%t0\\t%0, %3, %2, %1;")
654 (define_insn "setcc_int<mode>"
655 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
656 (match_operator:SI 1 "nvptx_comparison_operator"
657 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
658 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
660 "%.\\tset%t0%c1\\t%0, %2, %3;")
662 (define_insn "setcc_int<mode>"
663 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
664 (match_operator:SI 1 "nvptx_float_comparison_operator"
665 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
666 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
668 "%.\\tset%t0%c1\\t%0, %2, %3;")
670 (define_insn "setcc_float<mode>"
671 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
672 (match_operator:SF 1 "nvptx_comparison_operator"
673 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
674 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
676 "%.\\tset%t0%c1\\t%0, %2, %3;")
678 (define_insn "setcc_float<mode>"
679 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
680 (match_operator:SF 1 "nvptx_float_comparison_operator"
681 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
682 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
684 "%.\\tset%t0%c1\\t%0, %2, %3;")
686 (define_expand "cstorebi4"
687 [(set (match_operand:SI 0 "nvptx_register_operand")
688 (match_operator:SI 1 "ne_operator"
689 [(match_operand:BI 2 "nvptx_register_operand")
690 (match_operand:BI 3 "const0_operand")]))]
694 (define_expand "cstore<mode>4"
695 [(set (match_operand:SI 0 "nvptx_register_operand")
696 (match_operator:SI 1 "nvptx_comparison_operator"
697 [(match_operand:HSDIM 2 "nvptx_register_operand")
698 (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))]
702 (define_expand "cstore<mode>4"
703 [(set (match_operand:SI 0 "nvptx_register_operand")
704 (match_operator:SI 1 "nvptx_float_comparison_operator"
705 [(match_operand:SDFM 2 "nvptx_register_operand")
706 (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))]
712 (define_insn "call_insn"
713 [(match_parallel 2 "call_operation"
714 [(call (mem:QI (match_operand 0 "call_insn_operand" "Rs"))
715 (match_operand 1))])]
718 return nvptx_output_call_insn (insn, NULL_RTX, operands[0]);
721 (define_insn "call_value_insn"
722 [(match_parallel 3 "call_operation"
723 [(set (match_operand 0 "nvptx_register_operand" "=R")
724 (call (mem:QI (match_operand 1 "call_insn_operand" "Rs"))
725 (match_operand 2)))])]
728 return nvptx_output_call_insn (insn, operands[0], operands[1]);
731 (define_expand "call"
732 [(match_operand 0 "" "")]
735 nvptx_expand_call (NULL_RTX, operands[0]);
739 (define_expand "call_value"
740 [(match_operand 0 "" "")
741 (match_operand 1 "" "")]
744 nvptx_expand_call (operands[0], operands[1]);
748 ;; Floating point arithmetic.
750 (define_insn "add<mode>3"
751 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
752 (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
753 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
755 "%.\\tadd%t0\\t%0, %1, %2;")
757 (define_insn "sub<mode>3"
758 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
759 (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
760 (match_operand:SDFM 2 "nvptx_register_operand" "R")))]
762 "%.\\tsub%t0\\t%0, %1, %2;")
764 (define_insn "mul<mode>3"
765 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
766 (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
767 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
769 "%.\\tmul%t0\\t%0, %1, %2;")
771 (define_insn "fma<mode>4"
772 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
773 (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
774 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
775 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
777 "%.\\tfma%#%t0\\t%0, %1, %2, %3;")
779 (define_insn "div<mode>3"
780 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
781 (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
782 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
784 "%.\\tdiv%#%t0\\t%0, %1, %2;")
786 (define_insn "copysign<mode>3"
787 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
788 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")
789 (match_operand:SDFM 2 "nvptx_register_operand" "R")]
792 "%.\\tcopysign%t0\\t%0, %2, %1;")
794 (define_insn "smin<mode>3"
795 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
796 (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
797 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
799 "%.\\tmin%t0\\t%0, %1, %2;")
801 (define_insn "smax<mode>3"
802 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
803 (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
804 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
806 "%.\\tmax%t0\\t%0, %1, %2;")
808 (define_insn "abs<mode>2"
809 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
810 (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
812 "%.\\tabs%t0\\t%0, %1;")
814 (define_insn "neg<mode>2"
815 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
816 (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
818 "%.\\tneg%t0\\t%0, %1;")
820 (define_insn "sqrt<mode>2"
821 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
822 (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
824 "%.\\tsqrt%#%t0\\t%0, %1;")
826 (define_expand "sincossf3"
827 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
828 (unspec:SF [(match_operand:SF 2 "nvptx_register_operand" "R")]
830 (set (match_operand:SF 1 "nvptx_register_operand" "=R")
831 (unspec:SF [(match_dup 2)] UNSPEC_SIN))]
832 "flag_unsafe_math_optimizations"
834 operands[2] = make_safe_from (operands[2], operands[0]);
837 (define_insn "sinsf2"
838 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
839 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
841 "flag_unsafe_math_optimizations"
842 "%.\\tsin.approx%t0\\t%0, %1;")
844 (define_insn "cossf2"
845 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
846 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
848 "flag_unsafe_math_optimizations"
849 "%.\\tcos.approx%t0\\t%0, %1;")
851 (define_insn "log2sf2"
852 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
853 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
855 "flag_unsafe_math_optimizations"
856 "%.\\tlg2.approx%t0\\t%0, %1;")
858 (define_insn "exp2sf2"
859 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
860 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
862 "flag_unsafe_math_optimizations"
863 "%.\\tex2.approx%t0\\t%0, %1;")
865 ;; Conversions involving floating point
867 (define_insn "extendsfdf2"
868 [(set (match_operand:DF 0 "nvptx_register_operand" "=R")
869 (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))]
871 "%.\\tcvt%t0%t1\\t%0, %1;")
873 (define_insn "truncdfsf2"
874 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
875 (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))]
877 "%.\\tcvt%#%t0%t1\\t%0, %1;")
879 (define_insn "floatunssi<mode>2"
880 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
881 (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
883 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
885 (define_insn "floatsi<mode>2"
886 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
887 (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
889 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
891 (define_insn "floatunsdi<mode>2"
892 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
893 (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
895 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
897 (define_insn "floatdi<mode>2"
898 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
899 (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
901 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
903 (define_insn "fixuns_trunc<mode>si2"
904 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
905 (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
907 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
909 (define_insn "fix_trunc<mode>si2"
910 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
911 (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
913 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
915 (define_insn "fixuns_trunc<mode>di2"
916 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
917 (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
919 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
921 (define_insn "fix_trunc<mode>di2"
922 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
923 (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
925 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
927 (define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC
928 UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT])
929 (define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor")
930 (UNSPEC_FPINT_BTRUNC "btrunc")
931 (UNSPEC_FPINT_CEIL "ceil")
932 (UNSPEC_FPINT_NEARBYINT "nearbyint")])
933 (define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
934 (UNSPEC_FPINT_BTRUNC ".rzi")
935 (UNSPEC_FPINT_CEIL ".rpi")
936 (UNSPEC_FPINT_NEARBYINT "%#i")])
938 (define_insn "<FPINT:fpint_name><SDFM:mode>2"
939 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
940 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
943 "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;")
945 (define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL])
946 (define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor")
947 (UNSPEC_FPINT_CEIL "lceil")])
948 (define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
949 (UNSPEC_FPINT_CEIL ".rpi")])
951 (define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2"
952 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
953 (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
956 "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;")
965 (define_insn "return"
969 return nvptx_output_return ();
971 [(set_attr "predicable" "false")])
973 (define_expand "epilogue"
974 [(clobber (const_int 0))]
977 if (TARGET_SOFT_STACK)
978 emit_insn (gen_set_softstack_insn (gen_rtx_REG (Pmode,
979 SOFTSTACK_PREV_REGNUM)));
980 emit_jump_insn (gen_return ());
984 (define_expand "nonlocal_goto"
985 [(match_operand 0 "" "")
986 (match_operand 1 "" "")
987 (match_operand 2 "" "")
988 (match_operand 3 "" "")]
991 sorry ("target cannot support nonlocal goto.");
992 emit_insn (gen_nop ());
996 (define_expand "nonlocal_goto_receiver"
1000 sorry ("target cannot support nonlocal goto.");
1003 (define_expand "allocate_stack"
1004 [(match_operand 0 "nvptx_register_operand")
1005 (match_operand 1 "nvptx_register_operand")]
1008 if (TARGET_SOFT_STACK)
1010 emit_move_insn (stack_pointer_rtx,
1011 gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
1012 emit_insn (gen_set_softstack_insn (stack_pointer_rtx));
1013 emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
1016 /* The ptx documentation specifies an alloca intrinsic (for 32 bit
1017 only) but notes it is not implemented. The assembler emits a
1018 confused error message. Issue a blunt one now instead. */
1019 sorry ("target cannot support alloca.");
1020 emit_insn (gen_nop ());
1024 (define_insn "set_softstack_insn"
1025 [(unspec [(match_operand 0 "nvptx_register_operand" "R")]
1026 UNSPEC_SET_SOFTSTACK)]
1029 return nvptx_output_set_softstack (REGNO (operands[0]));
1032 (define_expand "restore_stack_block"
1033 [(match_operand 0 "register_operand" "")
1034 (match_operand 1 "register_operand" "")]
1037 if (TARGET_SOFT_STACK)
1039 emit_move_insn (operands[0], operands[1]);
1040 emit_insn (gen_set_softstack_insn (operands[0]));
1045 (define_expand "restore_stack_function"
1046 [(match_operand 0 "register_operand" "")
1047 (match_operand 1 "register_operand" "")]
1054 [(trap_if (const_int 1) (const_int 0))]
1058 (define_insn "trap_if_true"
1059 [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R")
1064 [(set_attr "predicable" "false")])
1066 (define_insn "trap_if_false"
1067 [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R")
1072 [(set_attr "predicable" "false")])
1074 (define_expand "ctrap<mode>4"
1075 [(trap_if (match_operator 0 "nvptx_comparison_operator"
1076 [(match_operand:SDIM 1 "nvptx_register_operand")
1077 (match_operand:SDIM 2 "nvptx_nonmemory_operand")])
1078 (match_operand 3 "const0_operand"))]
1081 rtx t = nvptx_expand_compare (operands[0]);
1082 emit_insn (gen_trap_if_true (t));
1086 (define_insn "oacc_dim_size"
1087 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1088 (unspec:SI [(match_operand:SI 1 "const_int_operand" "")]
1092 static const char *const asms[] =
1093 { /* Must match oacc_loop_levels ordering. */
1094 "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */
1095 "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */
1096 "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */
1098 return asms[INTVAL (operands[1])];
1101 (define_insn "oacc_dim_pos"
1102 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1103 (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")]
1107 static const char *const asms[] =
1108 { /* Must match oacc_loop_levels ordering. */
1109 "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */
1110 "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */
1111 "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */
1113 return asms[INTVAL (operands[1])];
1116 (define_insn "nvptx_fork"
1117 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1121 [(set_attr "predicable" "false")])
1123 (define_insn "nvptx_forked"
1124 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1128 [(set_attr "predicable" "false")])
1130 (define_insn "nvptx_joining"
1131 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1135 [(set_attr "predicable" "false")])
1137 (define_insn "nvptx_join"
1138 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1142 [(set_attr "predicable" "false")])
1144 (define_expand "oacc_fork"
1145 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1146 (match_operand:SI 1 "general_operand" ""))
1147 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1151 if (operands[0] != const0_rtx)
1152 emit_move_insn (operands[0], operands[1]);
1153 nvptx_expand_oacc_fork (INTVAL (operands[2]));
1157 (define_expand "oacc_join"
1158 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1159 (match_operand:SI 1 "general_operand" ""))
1160 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1164 if (operands[0] != const0_rtx)
1165 emit_move_insn (operands[0], operands[1]);
1166 nvptx_expand_oacc_join (INTVAL (operands[2]));
1170 ;; only 32-bit shuffles exist.
1171 (define_insn "nvptx_shuffle<mode>"
1172 [(set (match_operand:BITS 0 "nvptx_register_operand" "=R")
1174 [(match_operand:BITS 1 "nvptx_register_operand" "R")
1175 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
1176 (match_operand:SI 3 "const_int_operand" "n")]
1179 "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;")
1181 (define_insn "nvptx_vote_ballot"
1182 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1183 (unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")]
1184 UNSPEC_VOTE_BALLOT))]
1186 "%.\\tvote.ballot.b32\\t%0, %1;")
1188 ;; Patterns for OpenMP SIMD-via-SIMT lowering
1190 (define_insn "omp_simt_enter_insn"
1191 [(set (match_operand 0 "nvptx_register_operand" "=R")
1192 (unspec_volatile [(match_operand 1 "nvptx_nonmemory_operand" "Ri")
1193 (match_operand 2 "nvptx_nonmemory_operand" "Ri")]
1194 UNSPECV_SIMT_ENTER))]
1197 return nvptx_output_simt_enter (operands[0], operands[1], operands[2]);
1200 (define_expand "omp_simt_enter"
1201 [(match_operand 0 "nvptx_register_operand" "=R")
1202 (match_operand 1 "nvptx_nonmemory_operand" "Ri")
1203 (match_operand 2 "const_int_operand" "n")]
1206 if (!CONST_INT_P (operands[1]))
1207 cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U;
1209 cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]),
1210 cfun->machine->simt_stack_size);
1211 cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]),
1212 cfun->machine->simt_stack_align);
1213 cfun->machine->has_simtreg = true;
1214 emit_insn (gen_omp_simt_enter_insn (operands[0], operands[1], operands[2]));
1218 (define_insn "omp_simt_exit"
1219 [(unspec_volatile [(match_operand 0 "nvptx_register_operand" "R")]
1223 return nvptx_output_simt_exit (operands[0]);
1226 ;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index
1227 (define_insn "omp_simt_lane"
1228 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1229 (unspec:SI [(const_int 0)] UNSPEC_LANEID))]
1231 "%.\\tmov.u32\\t%0, %%laneid;")
1233 ;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and
1234 ;; place a compiler barrier to disallow unrolling/peeling the containing loop
1235 (define_expand "omp_simt_ordered"
1236 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1237 (match_operand:SI 1 "nvptx_register_operand" "R")]
1240 emit_move_insn (operands[0], operands[1]);
1241 emit_insn (gen_nvptx_nounroll ());
1245 ;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange
1247 (define_expand "omp_simt_xchg_bfly"
1248 [(match_operand 0 "nvptx_register_operand" "=R")
1249 (match_operand 1 "nvptx_register_operand" "R")
1250 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1253 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1258 ;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1
1259 ;; from lane given by index in operand 2 to operand 0 in all lanes
1260 (define_expand "omp_simt_xchg_idx"
1261 [(match_operand 0 "nvptx_register_operand" "=R")
1262 (match_operand 1 "nvptx_register_operand" "R")
1263 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1266 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1271 ;; Implement IFN_GOMP_SIMT_VOTE_ANY:
1272 ;; set operand 0 to zero iff all lanes supply zero in operand 1
1273 (define_expand "omp_simt_vote_any"
1274 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1275 (match_operand:SI 1 "nvptx_register_operand" "R")]
1278 rtx pred = gen_reg_rtx (BImode);
1279 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1280 emit_insn (gen_nvptx_vote_ballot (operands[0], pred));
1284 ;; Implement IFN_GOMP_SIMT_LAST_LANE:
1285 ;; set operand 0 to the lowest lane index that passed non-zero in operand 1
1286 (define_expand "omp_simt_last_lane"
1287 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1288 (match_operand:SI 1 "nvptx_register_operand" "R")]
1291 rtx pred = gen_reg_rtx (BImode);
1292 rtx tmp = gen_reg_rtx (SImode);
1293 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1294 emit_insn (gen_nvptx_vote_ballot (tmp, pred));
1295 emit_insn (gen_ctzsi2 (operands[0], tmp));
1299 ;; extract parts of a 64 bit object into 2 32-bit ints
1300 (define_insn "unpack<mode>si2"
1301 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1302 (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R")
1303 (const_int 0)] UNSPEC_BIT_CONV))
1304 (set (match_operand:SI 1 "nvptx_register_operand" "=R")
1305 (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))]
1307 "%.\\tmov.b64\\t{%0,%1}, %2;")
1309 ;; pack 2 32-bit ints into a 64 bit object
1310 (define_insn "packsi<mode>2"
1311 [(set (match_operand:BITD 0 "nvptx_register_operand" "=R")
1312 (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R")
1313 (match_operand:SI 2 "nvptx_register_operand" "R")]
1316 "%.\\tmov.b64\\t%0, {%1,%2};")
1320 (define_expand "atomic_compare_and_swap<mode>"
1321 [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output
1322 (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output
1323 (match_operand:SDIM 2 "memory_operand") ;; memory
1324 (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input
1325 (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input
1326 (match_operand:SI 5 "const_int_operand") ;; is_weak
1327 (match_operand:SI 6 "const_int_operand") ;; success model
1328 (match_operand:SI 7 "const_int_operand")] ;; failure model
1331 emit_insn (gen_atomic_compare_and_swap<mode>_1
1332 (operands[1], operands[2], operands[3], operands[4], operands[6]));
1334 rtx cond = gen_reg_rtx (BImode);
1335 emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3]));
1336 emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0)));
1340 (define_insn "atomic_compare_and_swap<mode>_1"
1341 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1342 (unspec_volatile:SDIM
1343 [(match_operand:SDIM 1 "memory_operand" "+m")
1344 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
1345 (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
1346 (match_operand:SI 4 "const_int_operand")]
1349 (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
1351 "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"
1352 [(set_attr "atomic" "true")])
1354 (define_insn "atomic_exchange<mode>"
1355 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output
1356 (unspec_volatile:SDIM
1357 [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory
1358 (match_operand:SI 3 "const_int_operand")] ;; model
1361 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input
1363 "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;"
1364 [(set_attr "atomic" "true")])
1366 (define_insn "atomic_fetch_add<mode>"
1367 [(set (match_operand:SDIM 1 "memory_operand" "+m")
1368 (unspec_volatile:SDIM
1369 [(plus:SDIM (match_dup 1)
1370 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
1371 (match_operand:SI 3 "const_int_operand")] ;; model
1373 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1376 "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
1377 [(set_attr "atomic" "true")])
1379 (define_insn "atomic_fetch_addsf"
1380 [(set (match_operand:SF 1 "memory_operand" "+m")
1382 [(plus:SF (match_dup 1)
1383 (match_operand:SF 2 "nvptx_nonmemory_operand" "RF"))
1384 (match_operand:SI 3 "const_int_operand")] ;; model
1386 (set (match_operand:SF 0 "nvptx_register_operand" "=R")
1389 "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
1390 [(set_attr "atomic" "true")])
1392 (define_code_iterator any_logic [and ior xor])
1393 (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
1395 ;; Currently disabled until we add better subtarget support - requires sm_32.
1396 (define_insn "atomic_fetch_<logic><mode>"
1397 [(set (match_operand:SDIM 1 "memory_operand" "+m")
1398 (unspec_volatile:SDIM
1399 [(any_logic:SDIM (match_dup 1)
1400 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
1401 (match_operand:SI 3 "const_int_operand")] ;; model
1403 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1406 "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;"
1407 [(set_attr "atomic" "true")])
1409 (define_insn "nvptx_barsync"
1410 [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")]
1414 [(set_attr "predicable" "false")])
1416 (define_insn "nvptx_nounroll"
1417 [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)]
1419 "\\t.pragma \\\"nounroll\\\";"
1420 [(set_attr "predicable" "false")])