gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
break;
+ /* Lower sqrt builtins to gimple/internal function sqrt. */
+ BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP)
+ new_stmt = gimple_build_call_internal (IFN_SQRT,
+ 1, args[0]);
+ gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
+ break;
+
/*lower store and load neon builtins to gimple. */
BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD)
BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD)
--- /dev/null
+/* PR target/64821 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* Check that we constant fold sqrt(4.0) into 2.0. */
+/* { dg-final { scan-tree-dump-not " \\\.SQRT" "optimized" } } */
+/* { dg-final { scan-tree-dump " 2\\\.0e\\\+0" "optimized" } } */
+/* { dg-final { scan-assembler-not "fsqrt" } } */
+/* We should produce a fmov to d0 with 2.0 but currently don't, see PR 103959. */
+/* { dg-final { scan-assembler-times "\n\tfmov\td0, 2.0e.0" 1 { xfail *-*-* } } } */
+
+#include <arm_neon.h>
+
+float64x1_t f64(void)
+{
+ float64x1_t a = (float64x1_t){4.0};
+ return vsqrt_f64 (a);
+}
--- /dev/null
+/* PR target/64821 */
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-optimized" } */
+#include <arm_neon.h>
+
+/* Check that we lower __builtin_aarch64_sqrt* into the internal function SQRT. */
+/* { dg-final { scan-tree-dump-times " __builtin_aarch64_sqrt" 0 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \\\.SQRT " 4 "optimized" } } */
+
+float64x1_t f64(float64x1_t a)
+{
+ return vsqrt_f64 (a);
+}
+
+float64x2_t f64q(float64x2_t a)
+{
+ return vsqrtq_f64 (a);
+}
+
+float32x2_t f32(float32x2_t a)
+{
+ return vsqrt_f32 (a);
+}
+
+float32x4_t f32q(float32x4_t a)
+{
+ return vsqrtq_f32 (a);
+}