From e37df8b0e3049c3ef122d1b1002005810f90a4a5 Mon Sep 17 00:00:00 2001
From: Ganbarukamo41 <ganbarukamo@gmail.com>
Date: Tue, 17 Mar 2020 07:10:27 +0900
Subject: [PATCH] Implement Vector.Ceiling / Vector.Floor (#31993)

* Add Ceil/Floor to S.P.CoreLib and expose publicly

* Make Ceil/Floor JIT intrinsic

* Add missing identifier for closing endif

* Add JIT test for Ceil/Floor

* Add test for Ceil/Floor in S.Numerics.Vectors
* Also fix the old path for the new structure

* Apply jit-format

* Do not attempt to treat Ceil/Floor if SSE4.1 isn't available

* Mark Vector<T>.Ceiling/Floor as Intrinsic

* Add missing underscore

* Revert "Add missing idenfitier for closing endif"
* Oops

* Add missing breaks
* Oops, again

* Simplify switch case

* Per review suggestion

Co-Authored-By: Egor Chesakov <egor.chesakov@microsoft.com>

Co-authored-by: Egor Chesakov <egor.chesakov@microsoft.com>
---
 src/coreclr/src/jit/codegen.h                 |   1 +
 src/coreclr/src/jit/codegenarm64.cpp          |  13 +-
 src/coreclr/src/jit/lsraarm64.cpp             |   2 +
 src/coreclr/src/jit/lsraxarch.cpp             |   5 +
 src/coreclr/src/jit/simd.cpp                  |  13 ++
 src/coreclr/src/jit/simdcodegenxarch.cpp      |  68 +++++++++-
 src/coreclr/src/jit/simdintrinsiclist.h       |   3 +
 .../tests/src/JIT/SIMD/VectorCeilFloor.cs     |  30 +++++
 .../src/JIT/SIMD/VectorCeilFloor_r.csproj     |  14 ++
 .../src/JIT/SIMD/VectorCeilFloor_ro.csproj    |  13 ++
 .../ref/System.Numerics.Vectors.cs            |   4 +
 .../tests/GenericVectorTests.cs               |  64 ++++++++-
 .../tests/GenericVectorTests.tt               |  72 ++++++++++-
 .../src/System/Numerics/Vector.cs             | 102 +++++++++++++++
 .../src/System/Numerics/Vector.tt             | 122 ++++++++++++++++++
 .../src/System/Numerics/Vector_Operations.cs  |  64 +++++++++
 16 files changed, 578 insertions(+), 12 deletions(-)
 create mode 100644 src/coreclr/tests/src/JIT/SIMD/VectorCeilFloor.cs
 create mode 100644 src/coreclr/tests/src/JIT/SIMD/VectorCeilFloor_r.csproj
 create mode 100644 src/coreclr/tests/src/JIT/SIMD/VectorCeilFloor_ro.csproj
diff --git a/src/coreclr/src/jit/codegen.h b/src/coreclr/src/jit/codegen.h
index d145be73284..7ebedb57fef 100644
--- a/src/coreclr/src/jit/codegen.h
+++ b/src/coreclr/src/jit/codegen.h
@@ -978,6 +978,7 @@ protected:
     void genSIMDIntrinsicInit(GenTreeSIMD* simdNode);
     void genSIMDIntrinsicInitN(GenTreeSIMD* simdNode);
     void genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode);
+    void genSIMDIntrinsicUnOpWithImm(GenTreeSIMD* simdNode);
     void genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode);
     void genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode);
     void genSIMDIntrinsicDotProduct(GenTreeSIMD* simdNode);
diff --git a/src/coreclr/src/jit/codegenarm64.cpp b/src/coreclr/src/jit/codegenarm64.cpp
index f227fc36410..4970dbd6261 100644
--- a/src/coreclr/src/jit/codegenarm64.cpp
+++ b/src/coreclr/src/jit/codegenarm64.cpp
@@ -3785,6 +3785,8 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
         case SIMDIntrinsicConvertToInt32:
         case SIMDIntrinsicConvertToDouble:
         case SIMDIntrinsicConvertToInt64:
+        case SIMDIntrinsicCeil:
+        case SIMDIntrinsicFloor:
             genSIMDIntrinsicUnOp(simdNode);
             break;
 
@@ -3891,7 +3893,7 @@ insOpts CodeGen::genGetSimdInsOpt(emitAttr size, var_types elementType)
 // Arguments:
 //   intrinsicId    -   SIMD intrinsic Id
 //   baseType       -   Base type of the SIMD vector
-//   immed          -   Out param. Any immediate byte operand that needs to be passed to SSE2 opcode
+//   ival           -   Out param. Any immediate byte operand that needs to be passed to SSE2 opcode
 //
 //
 // Return Value:
@@ -3976,6 +3978,12 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
             case SIMDIntrinsicWidenHi:
                 result = INS_fcvtl2;
                 break;
+            case SIMDIntrinsicCeil:
+                result = INS_frintp;
+                break;
+            case SIMDIntrinsicFloor:
+                result = INS_frintm;
+                break;
             default:
                 assert(!"Unsupported SIMD intrinsic");
                 unreached();
@@ -4210,7 +4218,8 @@ void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
            simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToSingle ||
            simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt32 ||
            simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToDouble ||
-           simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt64);
+           simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt64 ||
+           simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCeil || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicFloor);
 
     GenTree*  op1       = simdNode->gtGetOp1();
     var_types baseType  = simdNode->gtSIMDBaseType;
diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp
index 0bcb661a8e4..6a695a1185a 100644
--- a/src/coreclr/src/jit/lsraarm64.cpp
+++ b/src/coreclr/src/jit/lsraarm64.cpp
@@ -810,6 +810,8 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
         case SIMDIntrinsicConvertToInt64:
         case SIMDIntrinsicWidenLo:
         case SIMDIntrinsicWidenHi:
+        case SIMDIntrinsicCeil:
+        case SIMDIntrinsicFloor:
             // No special handling required.
             break;
 
diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp
index e23d1c80da6..19c914e614d 100644
--- a/src/coreclr/src/jit/lsraxarch.cpp
+++ b/src/coreclr/src/jit/lsraxarch.cpp
@@ -1975,6 +1975,11 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree)
             noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
             break;
 
+        case SIMDIntrinsicCeil:
+        case SIMDIntrinsicFloor:
+            assert(compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported);
+            break;
+
         case SIMDIntrinsicAdd:
         case SIMDIntrinsicSub:
         case SIMDIntrinsicMul:
diff --git a/src/coreclr/src/jit/simd.cpp b/src/coreclr/src/jit/simd.cpp
index d970ca24986..eaaa424399f 100644
--- a/src/coreclr/src/jit/simd.cpp
+++ b/src/coreclr/src/jit/simd.cpp
@@ -3123,6 +3123,19 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE                opcode,
         }
         break;
 
+        case SIMDIntrinsicCeil:
+        case SIMDIntrinsicFloor:
+#if defined(TARGET_XARCH)
+            // Rounding instructions are only available from SSE4.1.
+            if (getSIMDSupportLevel() < SIMD_SSE4_Supported)
+            {
+                return nullptr;
+            }
+#endif // defined(TARGET_XARCH)
+            op1    = impSIMDPopStack(simdType);
+            retVal = gtNewSIMDNode(genActualType(callType), op1, simdIntrinsicID, baseType, size);
+            break;
+
         case SIMDIntrinsicAbs:
             op1    = impSIMDPopStack(simdType);
             retVal = impSIMDAbs(clsHnd, baseType, size, op1);
diff --git a/src/coreclr/src/jit/simdcodegenxarch.cpp b/src/coreclr/src/jit/simdcodegenxarch.cpp
index ecb5ba856da..d49fa600845 100644
--- a/src/coreclr/src/jit/simdcodegenxarch.cpp
+++ b/src/coreclr/src/jit/simdcodegenxarch.cpp
@@ -36,16 +36,29 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #define INSERTPS_TARGET_SELECT(i) ((i) << 4)
 #define INSERTPS_ZERO(i) (1 << (i))
 
+// ROUNDPS/PD:
+// - Bit 0 through 1 - Rounding mode
+//   * 0b00 - Round to nearest (even)
+//   * 0b01 - Round toward Neg. Infinity
+//   * 0b10 - Round toward Pos. Infinity
+//   * 0b11 - Round toward zero (Truncate)
+// - Bit 2 - Source of rounding control, 0b0 for immediate.
+// - Bit 3 - Precision exception, 0b1 to ignore. (We don't raise FP exceptions)
+#define ROUNDPS_TO_NEAREST_IMM 0b1000
+#define ROUNDPS_TOWARD_NEGATIVE_INFINITY_IMM 0b1001
+#define ROUNDPS_TOWARD_POSITIVE_INFINITY_IMM 0b1010
+#define ROUNDPS_TOWARD_ZERO_IMM 0b1011
+
 // getOpForSIMDIntrinsic: return the opcode for the given SIMD Intrinsic
 //
 // Arguments:
 //   intrinsicId    -   SIMD intrinsic Id
 //   baseType       -   Base type of the SIMD vector
-//   immed          -   Out param. Any immediate byte operand that needs to be passed to SSE2 opcode
+//   ival           -   Out param. Any immediate byte operand that needs to be passed to SSE2 opcode
 //
 //
 // Return Value:
-//   Instruction (op) to be used, and immed is set if instruction requires an immediate operand.
+//   Instruction (op) to be used, and ival is set if instruction requires an immediate operand.
 //
 instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_types baseType, unsigned* ival /*=nullptr*/)
 {
@@ -637,6 +650,26 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
             result = INS_insertps;
             break;
 
+        case SIMDIntrinsicCeil:
+        case SIMDIntrinsicFloor:
+            if (compiler->getSIMDSupportLevel() >= SIMD_SSE4_Supported)
+            {
+                if (baseType == TYP_FLOAT)
+                {
+                    result = INS_roundps;
+                }
+                else
+                {
+                    assert(baseType == TYP_DOUBLE);
+                    result = INS_roundpd;
+                }
+
+                assert(ival != nullptr);
+                *ival = (intrinsicId == SIMDIntrinsicCeil) ? ROUNDPS_TOWARD_POSITIVE_INFINITY_IMM
+                                                           : ROUNDPS_TOWARD_NEGATIVE_INFINITY_IMM;
+            }
+            break;
+
         default:
             assert(!"Unsupported SIMD intrinsic");
             unreached();
@@ -1052,6 +1085,32 @@ void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
     genProduceReg(simdNode);
 }
 
+//----------------------------------------------------------------------------------
+// genSIMDIntrinsicUnOpWithImm: Generate code for SIMD Intrinsic unary operations with an imm8, such as Ceil.
+//
+// Arguments:
+//    simdNode - The GT_SIMD node
+//
+// Return Value:
+//    None.
+//
+void CodeGen::genSIMDIntrinsicUnOpWithImm(GenTreeSIMD* simdNode)
+{
+    assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCeil || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicFloor);
+
+    GenTree*  op1       = simdNode->gtGetOp1();
+    var_types baseType  = simdNode->gtSIMDBaseType;
+    regNumber targetReg = simdNode->GetRegNum();
+    assert(targetReg != REG_NA);
+    var_types targetType = simdNode->TypeGet();
+
+    regNumber   op1Reg = genConsumeReg(op1);
+    unsigned    ival;
+    instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType, &ival);
+    assert((ival >= 0) && (ival <= 255));
+    GetEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, op1Reg, (int8_t)ival);
+}
+
 //----------------------------------------------------------------------------------
 // genSIMDIntrinsic32BitConvert: Generate code for 32-bit SIMD Convert (int/uint <-> float)
 //
@@ -3231,6 +3290,11 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
             genSIMDIntrinsicUpperRestore(simdNode);
             break;
 
+        case SIMDIntrinsicCeil:
+        case SIMDIntrinsicFloor:
+            genSIMDIntrinsicUnOpWithImm(simdNode);
+            break;
+
         default:
             noway_assert(!"Unimplemented SIMD intrinsic.");
             unreached();
diff --git a/src/coreclr/src/jit/simdintrinsiclist.h b/src/coreclr/src/jit/simdintrinsiclist.h
index 9015abbedef..7b535c0112d 100644
--- a/src/coreclr/src/jit/simdintrinsiclist.h
+++ b/src/coreclr/src/jit/simdintrinsiclist.h
@@ -99,6 +99,9 @@ SIMD_INTRINSIC("op_Division",               false,       Div,
 // SquareRoot is recognized as an intrinsic only for float or double vectors
 SIMD_INTRINSIC("SquareRoot",                false,       Sqrt,                     "sqrt",                   TYP_STRUCT,     1,      {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF},    {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
 
+SIMD_INTRINSIC("Ceiling",                   false,       Ceil,                     "ceil",                   TYP_STRUCT,     1,      {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF},    {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+SIMD_INTRINSIC("Floor",                     false,       Floor,                    "floor",                  TYP_STRUCT,     1,      {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF},    {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+
 SIMD_INTRINSIC("Min",                       false,       Min,                      "min",                    TYP_STRUCT,     2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
 SIMD_INTRINSIC("Max",                       false,       Max,                      "max",                    TYP_STRUCT,     2,      {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF},   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
 SIMD_INTRINSIC("Abs",                       false,       Abs,                      "abs",                    TYP_STRUCT,     1,      {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF },   {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_USHORT, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
diff --git a/src/coreclr/tests/src/JIT/SIMD/VectorCeilFloor.cs b/src/coreclr/tests/src/JIT/SIMD/VectorCeilFloor.cs
new file mode 100644
index 00000000000..1f0a307844b
--- /dev/null
+++ b/src/coreclr/tests/src/JIT/SIMD/VectorCeilFloor.cs
@@ -0,0 +1,30 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+using System;
+using System.Numerics;
+
+internal partial class VectorTest
+{
+    private const int Pass = 100;
+    private const int Fail = -1;
+
+    private static int Main()
+    {
+        int returnVal = Pass;
+        
+        if (!CheckVector(Vector.Ceiling(new Vector<float>(4.6f)), 5)) returnVal = Fail;
+        if (!CheckVector(Vector.Ceiling(new Vector<float>(-4.6f)), -4)) returnVal = Fail;
+        if (!CheckVector(Vector.Floor(new Vector<float>(4.6f)), 4)) returnVal = Fail;
+        if (!CheckVector(Vector.Floor(new Vector<float>(-4.6f)), -5)) returnVal = Fail;
+
+        if (!CheckVector(Vector.Ceiling(new Vector<double>(4.6)), 5)) returnVal = Fail;
+        if (!CheckVector(Vector.Ceiling(new Vector<double>(-4.6)), -4)) returnVal = Fail;
+        if (!CheckVector(Vector.Floor(new Vector<double>(4.6)), 4)) returnVal = Fail;
+        if (!CheckVector(Vector.Floor(new Vector<double>(-4.6)), -5)) returnVal = Fail;
+
+        return returnVal;
+    }
+}
diff --git a/src/coreclr/tests/src/JIT/SIMD/VectorCeilFloor_r.csproj b/src/coreclr/tests/src/JIT/SIMD/VectorCeilFloor_r.csproj
new file mode 100644
index 00000000000..9f98e36c4a3
--- /dev/null
+++ b/src/coreclr/tests/src/JIT/SIMD/VectorCeilFloor_r.csproj
@@ -0,0 +1,14 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <CLRTestPriority>1</CLRTestPriority>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize />
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="VectorCeilFloor.cs" />
+    <Compile Include="VectorUtil.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/coreclr/tests/src/JIT/SIMD/VectorCeilFloor_ro.csproj b/src/coreclr/tests/src/JIT/SIMD/VectorCeilFloor_ro.csproj
new file mode 100644
index 00000000000..95c2407f138
--- /dev/null
+++ b/src/coreclr/tests/src/JIT/SIMD/VectorCeilFloor_ro.csproj
@@ -0,0 +1,13 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>None</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="VectorCeilFloor.cs" />
+    <Compile Include="VectorUtil.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/libraries/System.Numerics.Vectors/ref/System.Numerics.Vectors.cs b/src/libraries/System.Numerics.Vectors/ref/System.Numerics.Vectors.cs
index 9b4a40677b3..3c017e72c5a 100644
--- a/src/libraries/System.Numerics.Vectors/ref/System.Numerics.Vectors.cs
+++ b/src/libraries/System.Numerics.Vectors/ref/System.Numerics.Vectors.cs
@@ -293,6 +293,10 @@ namespace System.Numerics
         [System.CLSCompliantAttribute(false)]
         public static void Widen(System.Numerics.Vector<System.UInt32> source, out System.Numerics.Vector<System.UInt64> dest1, out System.Numerics.Vector<System.UInt64> dest2) { throw null; }
         public static System.Numerics.Vector<T> Xor<T>(System.Numerics.Vector<T> left, System.Numerics.Vector<T> right) where T : struct { throw null; }
+        public static System.Numerics.Vector<System.Single> Ceiling(System.Numerics.Vector<System.Single> value) { throw null; }
+        public static System.Numerics.Vector<System.Double> Ceiling(System.Numerics.Vector<System.Double> value) { throw null; }
+        public static System.Numerics.Vector<System.Single> Floor(System.Numerics.Vector<System.Single> value) { throw null; }
+        public static System.Numerics.Vector<System.Double> Floor(System.Numerics.Vector<System.Double> value) { throw null; }
     }
     public partial struct Vector2 : System.IEquatable<System.Numerics.Vector2>, System.IFormattable
     {
diff --git a/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs b/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs
index d87f4e79e51..f3499278bc8 100644
--- a/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs
+++ b/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.cs
@@ -2286,8 +2286,8 @@ namespace System.Numerics.Tests
             T[] values = GenerateRandomValuesForVector<T>();
             Vector<T> vector = new Vector<T>(values);
 
-            Vector<T> SquareRootVector = Vector.SquareRoot(vector);
-            ValidateVector(SquareRootVector,
+            Vector<T> squareRootVector = Vector.SquareRoot(vector);
+            ValidateVector(squareRootVector,
                 (index, val) =>
                 {
                     T expected = Util.Sqrt(values[index]);
@@ -2295,6 +2295,66 @@ namespace System.Numerics.Tests
                 });
         }
 
+        [Fact]
+        public void CeilingSingle()
+        {
+            float[] values = GenerateRandomValuesForVector<float>();
+            Vector<float> vector = new Vector<float>(values);
+
+            Vector<float> ceilVector = Vector.Ceiling(vector);
+            ValidateVector(ceilVector,
+                (index, val) =>
+                {
+                    float expected = MathF.Ceiling(values[index]);
+                    AssertEqual(expected, val, $"Ceiling( {FullString(values[index])} )", -1);
+                });
+        }
+
+        [Fact]
+        public void CeilingDouble()
+        {
+            double[] values = GenerateRandomValuesForVector<double>();
+            Vector<double> vector = new Vector<double>(values);
+
+            Vector<double> ceilVector = Vector.Ceiling(vector);
+            ValidateVector(ceilVector,
+                (index, val) =>
+                {
+                    double expected = Math.Ceiling(values[index]);
+                    AssertEqual(expected, val, $"Ceiling( {FullString(values[index])} )", -1);
+                });
+        }
+
+        [Fact]
+        public void FloorSingle()
+        {
+            float[] values = GenerateRandomValuesForVector<float>();
+            Vector<float> vector = new Vector<float>(values);
+
+            Vector<float> ceilVector = Vector.Floor(vector);
+            ValidateVector(ceilVector,
+                (index, val) =>
+                {
+                    float expected = MathF.Floor(values[index]);
+                    AssertEqual(expected, val, $"Ceiling( {FullString(values[index])} )", -1);
+                });
+        }
+
+        [Fact]
+        public void FloorDouble()
+        {
+            double[] values = GenerateRandomValuesForVector<double>();
+            Vector<double> vector = new Vector<double>(values);
+
+            Vector<double> ceilVector = Vector.Floor(vector);
+            ValidateVector(ceilVector,
+                (index, val) =>
+                {
+                    double expected = Math.Floor(values[index]);
+                    AssertEqual(expected, val, $"Ceiling( {FullString(values[index])} )", -1);
+                });
+        }
+
         [Fact]
         public void AbsByte() { TestAbs<byte>(); }
         [Fact]
diff --git a/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.tt b/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.tt
index be0546beec2..1a4476cf2bc 100644
--- a/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.tt
+++ b/src/libraries/System.Numerics.Vectors/tests/GenericVectorTests.tt
@@ -5,7 +5,7 @@
 <#@ import namespace="System" #>
 <#@ import namespace="System.Linq" #>
 <#@ import namespace="System.Runtime.InteropServices" #>
-<#@ include file="..\..\common\src\corelib\System\Numerics\GenerationConfig.ttinclude" #><# GenerateCopyrightHeader(); #>
+<#@ include file="..\..\System.Private.CoreLib\src\System\Numerics\GenerationConfig.ttinclude" #><# GenerateCopyrightHeader(); #>
 
 using System;
 using System.Globalization;
@@ -1458,7 +1458,7 @@ namespace System.Numerics.Tests
             }
             while (Util.AnyEqual(values1, values2));
 
-            Array.Copy(values1, 0, values2, 0, Vector<T>.Count / 2);
+            Array.Copy(values1, values2, Vector<T>.Count / 2);
             Vector<T> vec1 = new Vector<T>(values1);
             Vector<T> vec2 = new Vector<T>(values2);
 
@@ -1492,7 +1492,7 @@ namespace System.Numerics.Tests
             }
             while (Util.AnyEqual(values1, values2));
 
-            Array.Copy(values1, 0, values2, 0, Vector<T>.Count / 2);
+            Array.Copy(values1, values2, Vector<T>.Count / 2);
             Vector<T> vec1 = new Vector<T>(values1);
             Vector<T> vec2 = new Vector<T>(values2);
 
@@ -1529,7 +1529,7 @@ namespace System.Numerics.Tests
             }
             while (Util.AnyEqual(values1, values2));
 
-            Array.Copy(values1, 0, values2, 0, Vector<T>.Count / 2);
+            Array.Copy(values1, values2, Vector<T>.Count / 2);
             Vector<T> vec1 = new Vector<T>(values1);
             Vector<T> vec2 = new Vector<T>(values2);
 
@@ -1673,8 +1673,8 @@ namespace System.Numerics.Tests
             T[] values = GenerateRandomValuesForVector<T>();
             Vector<T> vector = new Vector<T>(values);
 
-            Vector<T> SquareRootVector = Vector.SquareRoot(vector);
-            ValidateVector(SquareRootVector,
+            Vector<T> squareRootVector = Vector.SquareRoot(vector);
+            ValidateVector(squareRootVector,
                 (index, val) =>
                 {
                     T expected = Util.Sqrt(values[index]);
@@ -1682,6 +1682,66 @@ namespace System.Numerics.Tests
                 });
         }
 
+        [Fact]
+        private void CeilingSingle()
+        {
+            float[] values = GenerateRandomValuesForVector<float>();
+            Vector<float> vector = new Vector<float>(values);
+
+            Vector<float> ceilVector = Vector.Ceiling(vector);
+            ValidateVector(ceilVector,
+                (index, val) =>
+                {
+                    float expected = MathF.Ceiling(values[index]);
+                    AssertEqual(expected, val, $"Ceiling( {FullString(values[index])} )", -1);
+                });
+        }
+
+        [Fact]
+        private void CeilingDouble()
+        {
+            double[] values = GenerateRandomValuesForVector<double>();
+            Vector<double> vector = new Vector<double>(values);
+
+            Vector<double> ceilVector = Vector.Ceiling(vector);
+            ValidateVector(ceilVector,
+                (index, val) =>
+                {
+                    double expected = Math.Ceiling(values[index]);
+                    AssertEqual(expected, val, $"Ceiling( {FullString(values[index])} )", -1);
+                });
+        }
+
+        [Fact]
+        private void FloorSingle()
+        {
+            float[] values = GenerateRandomValuesForVector<float>();
+            Vector<float> vector = new Vector<float>(values);
+
+            Vector<float> ceilVector = Vector.Floor(vector);
+            ValidateVector(ceilVector,
+                (index, val) =>
+                {
+                    float expected = MathF.Floor(values[index]);
+                    AssertEqual(expected, val, $"Ceiling( {FullString(values[index])} )", -1);
+                });
+        }
+
+        [Fact]
+        private void FloorDouble()
+        {
+            double[] values = GenerateRandomValuesForVector<double>();
+            Vector<double> vector = new Vector<double>(values);
+
+            Vector<double> ceilVector = Vector.Floor(vector);
+            ValidateVector(ceilVector,
+                (index, val) =>
+                {
+                    double expected = Math.Floor(values[index]);
+                    AssertEqual(expected, val, $"Ceiling( {FullString(values[index])} )", -1);
+                });
+        }
+
 <# 
     foreach (var type in supportedTypes)
     {
diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs
index 0407fd49281..f20b762afbf 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs
@@ -3456,6 +3456,108 @@ namespace System.Numerics
                 }
             }
         }
+
+        [Intrinsic]
+        internal static unsafe Vector<T> Ceiling(Vector<T> value)
+        {
+            if (Vector.IsHardwareAccelerated)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    float* dataPtr = stackalloc float[Count];
+                    for (int g = 0; g < Count; g++)
+                    {
+                        dataPtr[g] = MathF.Ceiling((float)(object)value[g]);
+                    }
+                    return new Vector<T>(dataPtr);
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    double* dataPtr = stackalloc double[Count];
+                    for (int g = 0; g < Count; g++)
+                    {
+                        dataPtr[g] = Math.Ceiling((double)(object)value[g]);
+                    }
+                    return new Vector<T>(dataPtr);
+                }
+                else
+                {
+                    throw new NotSupportedException(SR.Arg_TypeNotSupported);
+                }
+            }
+            else
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    value.register.single_0 = MathF.Ceiling(value.register.single_0);
+                    value.register.single_1 = MathF.Ceiling(value.register.single_1);
+                    value.register.single_2 = MathF.Ceiling(value.register.single_2);
+                    value.register.single_3 = MathF.Ceiling(value.register.single_3);
+                    return value;
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    value.register.double_0 = Math.Ceiling(value.register.double_0);
+                    value.register.double_1 = Math.Ceiling(value.register.double_1);
+                    return value;
+                }
+                else
+                {
+                    throw new NotSupportedException(SR.Arg_TypeNotSupported);
+                }
+            }
+        }
+
+        [Intrinsic]
+        internal static unsafe Vector<T> Floor(Vector<T> value)
+        {
+            if (Vector.IsHardwareAccelerated)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    float* dataPtr = stackalloc float[Count];
+                    for (int g = 0; g < Count; g++)
+                    {
+                        dataPtr[g] = MathF.Floor((float)(object)value[g]);
+                    }
+                    return new Vector<T>(dataPtr);
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    double* dataPtr = stackalloc double[Count];
+                    for (int g = 0; g < Count; g++)
+                    {
+                        dataPtr[g] = Math.Floor((double)(object)value[g]);
+                    }
+                    return new Vector<T>(dataPtr);
+                }
+                else
+                {
+                    throw new NotSupportedException(SR.Arg_TypeNotSupported);
+                }
+            }
+            else
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    value.register.single_0 = MathF.Floor(value.register.single_0);
+                    value.register.single_1 = MathF.Floor(value.register.single_1);
+                    value.register.single_2 = MathF.Floor(value.register.single_2);
+                    value.register.single_3 = MathF.Floor(value.register.single_3);
+                    return value;
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    value.register.double_0 = Math.Floor(value.register.double_0);
+                    value.register.double_1 = Math.Floor(value.register.double_1);
+                    return value;
+                }
+                else
+                {
+                    throw new NotSupportedException(SR.Arg_TypeNotSupported);
+                }
+            }
+        }
         #endregion Internal Math Methods
 
         #region Helper Methods
diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt
index 93dd6d37969..3c02ba2beba 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt
+++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.tt
@@ -1405,6 +1405,128 @@ namespace System.Numerics
                 }
             }
         }
+
+        [Intrinsic]
+        internal static unsafe Vector<T> Ceiling(Vector<T> value)
+        {
+            if (Vector.IsHardwareAccelerated)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    float* dataPtr = stackalloc float[Count];
+                    for (int g = 0; g < Count; g++)
+                    {
+                        dataPtr[g] = MathF.Ceiling((float)(object)value[g]);
+                    }
+                    return new Vector<T>(dataPtr);
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    double* dataPtr = stackalloc double[Count];
+                    for (int g = 0; g < Count; g++)
+                    {
+                        dataPtr[g] = Math.Ceiling((double)(object)value[g]);
+                    }
+                    return new Vector<T>(dataPtr);
+                }
+                else
+                {
+                    throw new NotSupportedException(SR.Arg_TypeNotSupported);
+                }
+            }
+            else
+            {
+                if (typeof(T) == typeof(float))
+                {
+<#
+    for (int g = 0; g < GetNumFields(typeof(float), totalSize); g++)
+    {
+#>
+                    value.<#=GetRegisterFieldName(typeof(float), g)#> = MathF.Ceiling(value.<#=GetRegisterFieldName(typeof(float), g)#>);
+<#
+    }
+#>
+                    return value;
+                }
+                else if (typeof(T) == typeof(double))
+                {
+<#
+    for (int g = 0; g < GetNumFields(typeof(double), totalSize); g++)
+    {
+#>
+                    value.<#=GetRegisterFieldName(typeof(double), g)#> = Math.Ceiling(value.<#=GetRegisterFieldName(typeof(double), g)#>);
+<#
+    }
+#>
+                    return value;
+                }
+                else
+                {
+                    throw new NotSupportedException(SR.Arg_TypeNotSupported);
+                }
+            }
+        }
+
+        [Intrinsic]
+        internal static unsafe Vector<T> Floor(Vector<T> value)
+        {
+            if (Vector.IsHardwareAccelerated)
+            {
+                if (typeof(T) == typeof(float))
+                {
+                    float* dataPtr = stackalloc float[Count];
+                    for (int g = 0; g < Count; g++)
+                    {
+                        dataPtr[g] = MathF.Floor((float)(object)value[g]);
+                    }
+                    return new Vector<T>(dataPtr);
+                }
+                else if (typeof(T) == typeof(double))
+                {
+                    double* dataPtr = stackalloc double[Count];
+                    for (int g = 0; g < Count; g++)
+                    {
+                        dataPtr[g] = Math.Floor((double)(object)value[g]);
+                    }
+                    return new Vector<T>(dataPtr);
+                }
+                else
+                {
+                    throw new NotSupportedException(SR.Arg_TypeNotSupported);
+                }
+            }
+            else
+            {
+                if (typeof(T) == typeof(float))
+                {
+<#
+    for (int g = 0; g < GetNumFields(typeof(float), totalSize); g++)
+    {
+#>
+                    value.<#=GetRegisterFieldName(typeof(float), g)#> = MathF.Floor(value.<#=GetRegisterFieldName(typeof(float), g)#>);
+<#
+    }
+#>
+                    return value;
+                }
+                else if (typeof(T) == typeof(double))
+                {
+<#
+    for (int g = 0; g < GetNumFields(typeof(double), totalSize); g++)
+    {
+#>
+                    value.<#=GetRegisterFieldName(typeof(double), g)#> = Math.Floor(value.<#=GetRegisterFieldName(typeof(double), g)#>);
+<#
+    }
+#>
+                    return value;
+                }
+                else
+                {
+                    throw new NotSupportedException(SR.Arg_TypeNotSupported);
+                }
+            }
+        }
         #endregion Internal Math Methods
 
         #region Helper Methods
diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_Operations.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_Operations.cs
index 8d41edaf123..f16d1c4fc57 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_Operations.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/Vector_Operations.cs
@@ -626,6 +626,70 @@ namespace System.Numerics
         {
             return Vector<T>.SquareRoot(value);
         }
+
+        /// <summary>
+        /// Returns a new vector whose elements are the smallest integral values that are greater than or equal to the given vector's elements.
+        /// </summary>
+        /// <param name="value">The source vector.</param>
+        /// <returns>
+        /// The vector whose elements are the smallest integral values that are greater than or equal to the given vector's elements.
+        /// If a value is equal to <see cref="float.NaN"/>, <see cref="float.NegativeInfinity"/> or <see cref="float.PositiveInfinity"/>, that value is returned.
+        /// Note that this method returns a <see cref="float"/> instead of an integral type.
+        /// </returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<float> Ceiling(Vector<float> value)
+        {
+            return Vector<float>.Ceiling(value);
+        }
+
+        /// <summary>
+        /// Returns a new vector whose elements are the smallest integral values that are greater than or equal to the given vector's elements.
+        /// </summary>
+        /// <param name="value">The source vector.</param>
+        /// <returns>
+        /// The vector whose elements are the smallest integral values that are greater than or equal to the given vector's elements.
+        /// If a value is equal to <see cref="double.NaN"/>, <see cref="double.NegativeInfinity"/> or <see cref="double.PositiveInfinity"/>, that value is returned.
+        /// Note that this method returns a <see cref="double"/> instead of an integral type.
+        /// </returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<double> Ceiling(Vector<double> value)
+        {
+            return Vector<double>.Ceiling(value);
+        }
+
+        /// <summary>
+        /// Returns a new vector whose elements are the largest integral values that are less than or equal to the given vector's elements.
+        /// </summary>
+        /// <param name="value">The source vector.</param>
+        /// <returns>
+        /// The vector whose elements are the largest integral values that are less than or equal to the given vector's elements.
+        /// If a value is equal to <see cref="float.NaN"/>, <see cref="float.NegativeInfinity"/> or <see cref="float.PositiveInfinity"/>, that value is returned.
+        /// Note that this method returns a <see cref="float"/> instead of an integral type.
+        /// </returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<float> Floor(Vector<float> value)
+        {
+            return Vector<float>.Floor(value);
+        }
+
+        /// <summary>
+        /// Returns a new vector whose elements are the largest integral values that are less than or equal to the given vector's elements.
+        /// </summary>
+        /// <param name="value">The source vector.</param>
+        /// <returns>
+        /// The vector whose elements are the largest integral values that are less than or equal to the given vector's elements.
+        /// If a value is equal to <see cref="double.NaN"/>, <see cref="double.NegativeInfinity"/> or <see cref="double.PositiveInfinity"/>, that value is returned.
+        /// Note that this method returns a <see cref="double"/> instead of an integral type.
+        /// </returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<double> Floor(Vector<double> value)
+        {
+            return Vector<double>.Floor(value);
+        }
         #endregion Vector Math Methods
 
         #region Named Arithmetic Operators
-- 
2.34.1