Fix use of uninitialized variables in managed decimal implementation (#25674)
[platform/upstream/coreclr.git] / src / System.Private.CoreLib / shared / System / Decimal.DecCalc.cs
1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4
5 using System.Diagnostics;
6 using System.Numerics;
7 using System.Runtime.CompilerServices;
8 using System.Runtime.InteropServices;
9 using Internal.Runtime.CompilerServices;
10 using X86 = System.Runtime.Intrinsics.X86;
11
12 namespace System
13 {
14     public partial struct Decimal
15     {
16         // Low level accessors used by a DecCalc and formatting
17         internal uint High => (uint)hi;
18         internal uint Low => (uint)lo;
19         internal uint Mid => (uint)mid;
20
21         internal bool IsNegative => flags < 0;
22
23         internal int Scale => (byte)(flags >> ScaleShift);
24
25 #if BIGENDIAN
26         private ulong Low64 => ((ulong)Mid << 32) | Low;
27 #else
28         private ulong Low64 => Unsafe.As<int, ulong>(ref Unsafe.AsRef(in lo));
29 #endif
30
31         private static ref DecCalc AsMutable(ref decimal d) => ref Unsafe.As<decimal, DecCalc>(ref d);
32
33         #region APIs need by number formatting.
34
35         internal static uint DecDivMod1E9(ref decimal value)
36         {
37             return DecCalc.DecDivMod1E9(ref AsMutable(ref value));
38         }
39
40         #endregion
41
42         /// <summary>
43         /// Class that contains all the mathematical calculations for decimal. Most of which have been ported from oleaut32.
44         /// </summary>
45         [StructLayout(LayoutKind.Explicit)]
46         private struct DecCalc
47         {
48             // NOTE: Do not change the offsets of these fields. This structure must have the same layout as Decimal.
49             [FieldOffset(0)]
50             private uint uflags;
51             [FieldOffset(4)]
52             private uint uhi;
53             [FieldOffset(8)]
54             private uint ulo;
55             [FieldOffset(12)]
56             private uint umid;
57
58             /// <summary>
59             /// The low and mid fields combined in little-endian order
60             /// </summary>
61             [FieldOffset(8)]
62             private ulong ulomidLE;
63
64             private uint High
65             {
66                 get => uhi;
67                 set => uhi = value;
68             }
69
70             private uint Low
71             {
72                 get => ulo;
73                 set => ulo = value;
74             }
75
76             private uint Mid
77             {
78                 get => umid;
79                 set => umid = value;
80             }
81
82             private bool IsNegative => (int)uflags < 0;
83
84             private int Scale => (byte)(uflags >> ScaleShift);
85
86             private ulong Low64
87             {
88 #if BIGENDIAN
89                 get { return ((ulong)umid << 32) | ulo; }
90                 set { umid = (uint)(value >> 32); ulo = (uint)value; }
91 #else
92                 get => ulomidLE;
93                 set => ulomidLE = value;
94 #endif
95             }
96
97             private const uint SignMask = 0x80000000;
98             private const uint ScaleMask = 0x00FF0000;
99
100             private const int DEC_SCALE_MAX = 28;
101
102             private const uint TenToPowerNine = 1000000000;
103             private const ulong TenToPowerEighteen = 1000000000000000000;
104
105             // The maximum power of 10 that a 32 bit integer can store
106             private const int MaxInt32Scale = 9;
107             // The maximum power of 10 that a 64 bit integer can store
108             private const int MaxInt64Scale = 19;
109
110             // Fast access for 10^n where n is 0-9
111             private static readonly uint[] s_powers10 = new uint[] {
112                 1,
113                 10,
114                 100,
115                 1000,
116                 10000,
117                 100000,
118                 1000000,
119                 10000000,
120                 100000000,
121                 1000000000
122             };
123
124             // Fast access for 10^n where n is 1-19
125             private static readonly ulong[] s_ulongPowers10 = new ulong[] {
126                 10,
127                 100,
128                 1000,
129                 10000,
130                 100000,
131                 1000000,
132                 10000000,
133                 100000000,
134                 1000000000,
135                 10000000000,
136                 100000000000,
137                 1000000000000,
138                 10000000000000,
139                 100000000000000,
140                 1000000000000000,
141                 10000000000000000,
142                 100000000000000000,
143                 1000000000000000000,
144                 10000000000000000000,
145             };
146
147             private static readonly double[] s_doublePowers10 = new double[] {
148                 1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
149                 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
150                 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29,
151                 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39,
152                 1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49,
153                 1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59,
154                 1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69,
155                 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79,
156                 1e80
157             };
158
159             // Used to fill uninitialized stack variables with non-zero pattern in debug builds
160             [Conditional("DEBUG")]
161             private static unsafe void DebugPoison<T>(ref T s) where T: unmanaged
162             {
163                 MemoryMarshal.AsBytes(MemoryMarshal.CreateSpan(ref s, 1)).Fill(0xCD);
164             }
165
166             #region Decimal Math Helpers
167
168             private static unsafe uint GetExponent(float f)
169             {
170                 // Based on pulling out the exp from this single struct layout
171                 //typedef struct {
172                 //    ULONG mant:23;
173                 //    ULONG exp:8;
174                 //    ULONG sign:1;
175                 //} SNGSTRUCT;
176
177                 return (byte)(*(uint*)&f >> 23);
178             }
179
180             private static unsafe uint GetExponent(double d)
181             {
182                 // Based on pulling out the exp from this double struct layout
183                 //typedef struct {
184                 //   DWORDLONG mant:52;
185                 //   DWORDLONG signexp:12;
186                 // } DBLSTRUCT;
187
188                 return (uint)(*(ulong*)&d >> 52) & 0x7FFu;
189             }
190
191             private static ulong UInt32x32To64(uint a, uint b)
192             {
193                 return (ulong)a * (ulong)b;
194             }
195
196             private static void UInt64x64To128(ulong a, ulong b, ref DecCalc result)
197             {
198                 ulong low = UInt32x32To64((uint)a, (uint)b); // lo partial prod
199                 ulong mid = UInt32x32To64((uint)a, (uint)(b >> 32)); // mid 1 partial prod
200                 ulong high = UInt32x32To64((uint)(a >> 32), (uint)(b >> 32));
201                 high += mid >> 32;
202                 low += mid <<= 32;
203                 if (low < mid)  // test for carry
204                     high++;
205
206                 mid = UInt32x32To64((uint)(a >> 32), (uint)b);
207                 high += mid >> 32;
208                 low += mid <<= 32;
209                 if (low < mid)  // test for carry
210                     high++;
211
212                 if (high > uint.MaxValue)
213                     Number.ThrowOverflowException(TypeCode.Decimal);
214                 result.Low64 = low;
215                 result.High = (uint)high;
216             }
217
218             /// <summary>
219             /// Do full divide, yielding 96-bit result and 32-bit remainder.
220             /// </summary>
221             /// <param name="bufNum">96-bit dividend as array of uints, least-sig first</param>
222             /// <param name="den">32-bit divisor</param>
223             /// <returns>Returns remainder. Quotient overwrites dividend.</returns>
224             private static uint Div96By32(ref Buf12 bufNum, uint den)
225             {
226                 // TODO: https://github.com/dotnet/coreclr/issues/3439
227                 ulong tmp, div;
228                 if (bufNum.U2 != 0)
229                 {
230                     tmp = bufNum.High64;
231                     div = tmp / den;
232                     bufNum.High64 = div;
233                     tmp = ((tmp - (uint)div * den) << 32) | bufNum.U0;
234                     if (tmp == 0)
235                         return 0;
236                     uint div32 = (uint)(tmp / den);
237                     bufNum.U0 = div32;
238                     return (uint)tmp - div32 * den;
239                 }
240
241                 tmp = bufNum.Low64;
242                 if (tmp == 0)
243                     return 0;
244                 div = tmp / den;
245                 bufNum.Low64 = div;
246                 return (uint)(tmp - div * den);
247             }
248
249             [MethodImpl(MethodImplOptions.AggressiveInlining)]
250             private static bool Div96ByConst(ref ulong high64, ref uint low, uint pow)
251             {
252 #if BIT64
253                 ulong div64 = high64 / pow;
254                 uint div = (uint)((((high64 - div64 * pow) << 32) + low) / pow);
255                 if (low == div * pow)
256                 {
257                     high64 = div64;
258                     low = div;
259                     return true;
260                 }
261 #else
262                 // 32-bit RyuJIT doesn't convert 64-bit division by constant into multiplication by reciprocal. Do half-width divisions instead.
263                 Debug.Assert(pow <= ushort.MaxValue);
264                 uint num, mid32, low16, div;
265                 if (high64 <= uint.MaxValue)
266                 {
267                     num = (uint)high64;
268                     mid32 = num / pow;
269                     num = (num - mid32 * pow) << 16;
270
271                     num += low >> 16;
272                     low16 = num / pow;
273                     num = (num - low16 * pow) << 16;
274
275                     num += (ushort)low;
276                     div = num / pow;
277                     if (num == div * pow)
278                     {
279                         high64 = mid32;
280                         low = (low16 << 16) + div;
281                         return true;
282                     }
283                 }
284                 else
285                 {
286                     num = (uint)(high64 >> 32);
287                     uint high32 = num / pow;
288                     num = (num - high32 * pow) << 16;
289
290                     num += (uint)high64 >> 16;
291                     mid32 = num / pow;
292                     num = (num - mid32 * pow) << 16;
293
294                     num += (ushort)high64;
295                     div = num / pow;
296                     num = (num - div * pow) << 16;
297                     mid32 = div + (mid32 << 16);
298
299                     num += low >> 16;
300                     low16 = num / pow;
301                     num = (num - low16 * pow) << 16;
302
303                     num += (ushort)low;
304                     div = num / pow;
305                     if (num == div * pow)
306                     {
307                         high64 = ((ulong)high32 << 32) | mid32;
308                         low = (low16 << 16) + div;
309                         return true;
310                     }
311                 }
312 #endif
313                 return false;
314             }
315
316             /// <summary>
317             /// Normalize (unscale) the number by trying to divide out 10^8, 10^4, 10^2, and 10^1.
318             /// If a division by one of these powers returns a zero remainder, then we keep the quotient.
319             /// </summary>
320             [MethodImpl(MethodImplOptions.AggressiveInlining)]
321             private static void Unscale(ref uint low, ref ulong high64, ref int scale)
322             {
323                 // Since 10 = 2 * 5, there must be a factor of 2 for every power of 10 we can extract.
324                 // We use this as a quick test on whether to try a given power.
325
326 #if BIT64
327                 while ((byte)low == 0 && scale >= 8 && Div96ByConst(ref high64, ref low, 100000000))
328                     scale -= 8;
329
330                 if ((low & 0xF) == 0 && scale >= 4 && Div96ByConst(ref high64, ref low, 10000))
331                     scale -= 4;
332 #else
333                 while ((low & 0xF) == 0 && scale >= 4 && Div96ByConst(ref high64, ref low, 10000))
334                     scale -= 4;
335 #endif
336
337                 if ((low & 3) == 0 && scale >= 2 && Div96ByConst(ref high64, ref low, 100))
338                     scale -= 2;
339
340                 if ((low & 1) == 0 && scale >= 1 && Div96ByConst(ref high64, ref low, 10))
341                     scale--;
342             }
343
344             /// <summary>
345             /// Do partial divide, yielding 32-bit result and 64-bit remainder.
346             /// Divisor must be larger than upper 64 bits of dividend.
347             /// </summary>
348             /// <param name="bufNum">96-bit dividend as array of uints, least-sig first</param>
349             /// <param name="den">64-bit divisor</param>
350             /// <returns>Returns quotient. Remainder overwrites lower 64-bits of dividend.</returns>
351             private static uint Div96By64(ref Buf12 bufNum, ulong den)
352             {
353                 Debug.Assert(den > bufNum.High64);
354                 uint quo;
355                 ulong num;
356                 uint num2 = bufNum.U2;
357                 if (num2 == 0)
358                 {
359                     num = bufNum.Low64;
360                     if (num < den)
361                         // Result is zero.  Entire dividend is remainder.
362                         return 0;
363
364                     // TODO: https://github.com/dotnet/coreclr/issues/3439
365                     quo = (uint)(num / den);
366                     num -= quo * den; // remainder
367                     bufNum.Low64 = num;
368                     return quo;
369                 }
370
371                 uint denHigh32 = (uint)(den >> 32);
372                 if (num2 >= denHigh32)
373                 {
374                     // Divide would overflow.  Assume a quotient of 2^32, and set
375                     // up remainder accordingly.
376                     //
377                     num = bufNum.Low64;
378                     num -= den << 32;
379                     quo = 0;
380
381                     // Remainder went negative.  Add divisor back in until it's positive,
382                     // a max of 2 times.
383                     //
384                     do
385                     {
386                         quo--;
387                         num += den;
388                     } while (num >= den);
389
390                     bufNum.Low64 = num;
391                     return quo;
392                 }
393
394                 // Hardware divide won't overflow
395                 //
396                 ulong num64 = bufNum.High64;
397                 if (num64 < denHigh32)
398                     // Result is zero.  Entire dividend is remainder.
399                     //
400                     return 0;
401
402                 // TODO: https://github.com/dotnet/coreclr/issues/3439
403                 quo = (uint)(num64 / denHigh32);
404                 num = bufNum.U0 | ((num64 - quo * denHigh32) << 32); // remainder
405
406                 // Compute full remainder, rem = dividend - (quo * divisor).
407                 //
408                 ulong prod = UInt32x32To64(quo, (uint)den); // quo * lo divisor
409                 num -= prod;
410
411                 if (num > ~prod)
412                 {
413                     // Remainder went negative.  Add divisor back in until it's positive,
414                     // a max of 2 times.
415                     //
416                     do
417                     {
418                         quo--;
419                         num += den;
420                     } while (num >= den);
421                 }
422
423                 bufNum.Low64 = num;
424                 return quo;
425             }
426
427             /// <summary>
428             /// Do partial divide, yielding 32-bit result and 96-bit remainder.
429             /// Top divisor uint must be larger than top dividend uint. This is
430             /// assured in the initial call because the divisor is normalized
431             /// and the dividend can't be. In subsequent calls, the remainder
432             /// is multiplied by 10^9 (max), so it can be no more than 1/4 of
433             /// the divisor which is effectively multiplied by 2^32 (4 * 10^9).
434             /// </summary>
435             /// <param name="bufNum">128-bit dividend as array of uints, least-sig first</param>
436             /// <param name="bufDen">96-bit divisor</param>
437             /// <returns>Returns quotient. Remainder overwrites lower 96-bits of dividend.</returns>
438             private static uint Div128By96(ref Buf16 bufNum, ref Buf12 bufDen)
439             {
440                 Debug.Assert(bufDen.U2 > bufNum.U3);
441                 ulong dividend = bufNum.High64;
442                 uint den = bufDen.U2;
443                 if (dividend < den)
444                     // Result is zero.  Entire dividend is remainder.
445                     //
446                     return 0;
447
448                 // TODO: https://github.com/dotnet/coreclr/issues/3439
449                 uint quo = (uint)(dividend / den);
450                 uint remainder = (uint)dividend - quo * den;
451
452                 // Compute full remainder, rem = dividend - (quo * divisor).
453                 //
454                 ulong prod1 = UInt32x32To64(quo, bufDen.U0); // quo * lo divisor
455                 ulong prod2 = UInt32x32To64(quo, bufDen.U1); // quo * mid divisor
456                 prod2 += prod1 >> 32;
457                 prod1 = (uint)prod1 | (prod2 << 32);
458                 prod2 >>= 32;
459
460                 ulong num = bufNum.Low64;
461                 num -= prod1;
462                 remainder -= (uint)prod2;
463
464                 // Propagate carries
465                 //
466                 if (num > ~prod1)
467                 {
468                     remainder--;
469                     if (remainder < ~(uint)prod2)
470                         goto PosRem;
471                 }
472                 else if (remainder <= ~(uint)prod2)
473                     goto PosRem;
474                 {
475                     // Remainder went negative.  Add divisor back in until it's positive,
476                     // a max of 2 times.
477                     //
478                     prod1 = bufDen.Low64;
479
480                     for (;;)
481                     {
482                         quo--;
483                         num += prod1;
484                         remainder += den;
485
486                         if (num < prod1)
487                         {
488                             // Detected carry. Check for carry out of top
489                             // before adding it in.
490                             //
491                             if (remainder++ < den)
492                                 break;
493                         }
494                         if (remainder < den)
495                             break; // detected carry
496                     }
497                 }
498 PosRem:
499
500                 bufNum.Low64 = num;
501                 bufNum.U2 = remainder;
502                 return quo;
503             }
504
505             /// <summary>
506             /// Multiply the two numbers. The low 96 bits of the result overwrite
507             /// the input. The last 32 bits of the product are the return value.
508             /// </summary>
509             /// <param name="bufNum">96-bit number as array of uints, least-sig first</param>
510             /// <param name="power">Scale factor to multiply by</param>
511             /// <returns>Returns highest 32 bits of product</returns>
512             private static uint IncreaseScale(ref Buf12 bufNum, uint power)
513             {
514                 ulong tmp = UInt32x32To64(bufNum.U0, power);
515                 bufNum.U0 = (uint)tmp;
516                 tmp >>= 32;
517                 tmp += UInt32x32To64(bufNum.U1, power);
518                 bufNum.U1 = (uint)tmp;
519                 tmp >>= 32;
520                 tmp += UInt32x32To64(bufNum.U2, power);
521                 bufNum.U2 = (uint)tmp;
522                 return (uint)(tmp >> 32);
523             }
524
525             private static void IncreaseScale64(ref Buf12 bufNum, uint power)
526             {
527                 ulong tmp = UInt32x32To64(bufNum.U0, power);
528                 bufNum.U0 = (uint)tmp;
529                 tmp >>= 32;
530                 tmp += UInt32x32To64(bufNum.U1, power);
531                 bufNum.High64 = tmp;
532             }
533
534             /// <summary>
535             /// See if we need to scale the result to fit it in 96 bits.
536             /// Perform needed scaling. Adjust scale factor accordingly.
537             /// </summary>
538             /// <param name="bufRes">Array of uints with value, least-significant first</param>
539             /// <param name="hiRes">Index of last non-zero value in bufRes</param>
540             /// <param name="scale">Scale factor for this value, range 0 - 2 * DEC_SCALE_MAX</param>
541             /// <returns>Returns new scale factor. bufRes updated in place, always 3 uints.</returns>
542             private static unsafe int ScaleResult(Buf24* bufRes, uint hiRes, int scale)
543             {
544                 Debug.Assert(hiRes < bufRes->Length);
545                 uint* result = (uint*)bufRes;
546
547                 // See if we need to scale the result.  The combined scale must
548                 // be <= DEC_SCALE_MAX and the upper 96 bits must be zero.
549                 //
550                 // Start by figuring a lower bound on the scaling needed to make
551                 // the upper 96 bits zero.  hiRes is the index into result[]
552                 // of the highest non-zero uint.
553                 //
554                 int newScale = 0;
555                 if (hiRes > 2)
556                 {
557                     newScale = (int)hiRes * 32 - 64 - 1;
558                     newScale -= BitOperations.LeadingZeroCount(result[hiRes]);
559
560                     // Multiply bit position by log10(2) to figure it's power of 10.
561                     // We scale the log by 256.  log(2) = .30103, * 256 = 77.  Doing this
562                     // with a multiply saves a 96-byte lookup table.  The power returned
563                     // is <= the power of the number, so we must add one power of 10
564                     // to make it's integer part zero after dividing by 256.
565                     //
566                     // Note: the result of this multiplication by an approximation of
567                     // log10(2) have been exhaustively checked to verify it gives the
568                     // correct result.  (There were only 95 to check...)
569                     //
570                     newScale = ((newScale * 77) >> 8) + 1;
571
572                     // newScale = min scale factor to make high 96 bits zero, 0 - 29.
573                     // This reduces the scale factor of the result.  If it exceeds the
574                     // current scale of the result, we'll overflow.
575                     //
576                     if (newScale > scale)
577                         goto ThrowOverflow;
578                 }
579
580                 // Make sure we scale by enough to bring the current scale factor
581                 // into valid range.
582                 //
583                 if (newScale < scale - DEC_SCALE_MAX)
584                     newScale = scale - DEC_SCALE_MAX;
585
586                 if (newScale != 0)
587                 {
588                     // Scale by the power of 10 given by newScale.  Note that this is
589                     // NOT guaranteed to bring the number within 96 bits -- it could
590                     // be 1 power of 10 short.
591                     //
592                     scale -= newScale;
593                     uint sticky = 0;
594                     uint quotient, remainder = 0;
595
596                     for (;;)
597                     {
598                         sticky |= remainder; // record remainder as sticky bit
599
600                         uint power;
601                         // Scaling loop specialized for each power of 10 because division by constant is an order of magnitude faster (especially for 64-bit division that's actually done by 128bit DIV on x64)
602                         switch (newScale)
603                         {
604                             case 1:
605                                 power = DivByConst(result, hiRes, out quotient, out remainder, 10);
606                                 break;
607                             case 2:
608                                 power = DivByConst(result, hiRes, out quotient, out remainder, 100);
609                                 break;
610                             case 3:
611                                 power = DivByConst(result, hiRes, out quotient, out remainder, 1000);
612                                 break;
613                             case 4:
614                                 power = DivByConst(result, hiRes, out quotient, out remainder, 10000);
615                                 break;
616 #if BIT64
617                             case 5:
618                                 power = DivByConst(result, hiRes, out quotient, out remainder, 100000);
619                                 break;
620                             case 6:
621                                 power = DivByConst(result, hiRes, out quotient, out remainder, 1000000);
622                                 break;
623                             case 7:
624                                 power = DivByConst(result, hiRes, out quotient, out remainder, 10000000);
625                                 break;
626                             case 8:
627                                 power = DivByConst(result, hiRes, out quotient, out remainder, 100000000);
628                                 break;
629                             default:
630                                 power = DivByConst(result, hiRes, out quotient, out remainder, TenToPowerNine);
631                                 break;
632 #else
633                             default:
634                                 goto case 4;
635 #endif
636                         }
637                         result[hiRes] = quotient;
638                         // If first quotient was 0, update hiRes.
639                         //
640                         if (quotient == 0 && hiRes != 0)
641                             hiRes--;
642
643 #if BIT64
644                         newScale -= MaxInt32Scale;
645 #else
646                         newScale -= 4;
647 #endif
648                         if (newScale > 0)
649                             continue; // scale some more
650
651                         // If we scaled enough, hiRes would be 2 or less.  If not,
652                         // divide by 10 more.
653                         //
654                         if (hiRes > 2)
655                         {
656                             if (scale == 0)
657                                 goto ThrowOverflow;
658                             newScale = 1;
659                             scale--;
660                             continue; // scale by 10
661                         }
662
663                         // Round final result.  See if remainder >= 1/2 of divisor.
664                         // If remainder == 1/2 divisor, round up if odd or sticky bit set.
665                         //
666                         power >>= 1;  // power of 10 always even
667                         if (power <= remainder && (power < remainder || ((result[0] & 1) | sticky) != 0) && ++result[0] == 0)
668                         {
669                             uint cur = 0;
670                             do
671                             {
672                                 Debug.Assert(cur + 1 < bufRes->Length);
673                             }
674                             while (++result[++cur] == 0);
675
676                             if (cur > 2)
677                             {
678                                 // The rounding caused us to carry beyond 96 bits.
679                                 // Scale by 10 more.
680                                 //
681                                 if (scale == 0)
682                                     goto ThrowOverflow;
683                                 hiRes = cur;
684                                 sticky = 0;    // no sticky bit
685                                 remainder = 0; // or remainder
686                                 newScale = 1;
687                                 scale--;
688                                 continue; // scale by 10
689                             }
690                         }
691
692                         break;
693                     } // for(;;)
694                 }
695                 return scale;
696
697 ThrowOverflow:
698                 Number.ThrowOverflowException(TypeCode.Decimal);
699                 return 0;
700             }
701
702             [MethodImpl(MethodImplOptions.AggressiveInlining)]
703             private static unsafe uint DivByConst(uint* result, uint hiRes, out uint quotient, out uint remainder, uint power)
704             {
705                 uint high = result[hiRes];
706                 remainder = high - (quotient = high / power) * power;
707                 for (uint i = hiRes - 1; (int)i >= 0; i--)
708                 {
709 #if BIT64
710                     ulong num = result[i] + ((ulong)remainder << 32);
711                     remainder = (uint)num - (result[i] = (uint)(num / power)) * power;
712 #else
713                     // 32-bit RyuJIT doesn't convert 64-bit division by constant into multiplication by reciprocal. Do half-width divisions instead.
714                     Debug.Assert(power <= ushort.MaxValue);
715 #if BIGENDIAN
716                     const int low16 = 2, high16 = 0;
717 #else
718                     const int low16 = 0, high16 = 2;
719 #endif
720                     // byte* is used here because Roslyn doesn't do constant propagation for pointer arithmetic
721                     uint num = *(ushort*)((byte*)result + i * 4 + high16) + (remainder << 16);
722                     uint div = num / power;
723                     remainder = num - div * power;
724                     *(ushort*)((byte*)result + i * 4 + high16) = (ushort)div;
725
726                     num = *(ushort*)((byte*)result + i * 4 + low16) + (remainder << 16);
727                     div = num / power;
728                     remainder = num - div * power;
729                     *(ushort*)((byte*)result + i * 4 + low16) = (ushort)div;
730 #endif
731                 }
732                 return power;
733             }
734
735             /// <summary>
736             /// Adjust the quotient to deal with an overflow.
737             /// We need to divide by 10, feed in the high bit to undo the overflow and then round as required.
738             /// </summary>
739             private static int OverflowUnscale(ref Buf12 bufQuo, int scale, bool sticky)
740             {
741                 if (--scale < 0)
742                     Number.ThrowOverflowException(TypeCode.Decimal);
743
744                 Debug.Assert(bufQuo.U2 == 0);
745
746                 // We have overflown, so load the high bit with a one.
747                 const ulong highbit = 1UL << 32;
748                 bufQuo.U2 = (uint)(highbit / 10);
749                 ulong tmp = ((highbit % 10) << 32) + bufQuo.U1;
750                 uint div = (uint)(tmp / 10);
751                 bufQuo.U1 = div;
752                 tmp = ((tmp - div * 10) << 32) + bufQuo.U0;
753                 div = (uint)(tmp / 10);
754                 bufQuo.U0 = div;
755                 uint remainder = (uint)(tmp - div * 10);
756                 // The remainder is the last digit that does not fit, so we can use it to work out if we need to round up
757                 if (remainder > 5 || remainder == 5 && (sticky || (bufQuo.U0 & 1) != 0))
758                     Add32To96(ref bufQuo, 1);
759                 return scale;
760             }
761
762             /// <summary>
763             /// Determine the max power of 10, &lt;= 9, that the quotient can be scaled
764             /// up by and still fit in 96 bits.
765             /// </summary>
766             /// <param name="bufQuo">96-bit quotient</param>
767             /// <param name="scale ">Scale factor of quotient, range -DEC_SCALE_MAX to DEC_SCALE_MAX-1</param>
768             /// <returns>power of 10 to scale by</returns>
769             private static int SearchScale(ref Buf12 bufQuo, int scale)
770             {
771                 const uint OVFL_MAX_9_HI = 4;
772                 const uint OVFL_MAX_8_HI = 42;
773                 const uint OVFL_MAX_7_HI = 429;
774                 const uint OVFL_MAX_6_HI = 4294;
775                 const uint OVFL_MAX_5_HI = 42949;
776                 const uint OVFL_MAX_4_HI = 429496;
777                 const uint OVFL_MAX_3_HI = 4294967;
778                 const uint OVFL_MAX_2_HI = 42949672;
779                 const uint OVFL_MAX_1_HI = 429496729;
780                 const ulong OVFL_MAX_9_MIDLO = 5441186219426131129;
781
782                 uint resHi = bufQuo.U2;
783                 ulong resMidLo = bufQuo.Low64;
784                 int curScale = 0;
785
786                 // Quick check to stop us from trying to scale any more.
787                 //
788                 if (resHi > OVFL_MAX_1_HI)
789                 {
790                     goto HaveScale;
791                 }
792
793                 var powerOvfl = PowerOvflValues;
794                 if (scale > DEC_SCALE_MAX - 9)
795                 {
796                     // We can't scale by 10^9 without exceeding the max scale factor.
797                     // See if we can scale to the max.  If not, we'll fall into
798                     // standard search for scale factor.
799                     //
800                     curScale = DEC_SCALE_MAX - scale;
801                     if (resHi < powerOvfl[curScale - 1].Hi)
802                         goto HaveScale;
803                 }
804                 else if (resHi < OVFL_MAX_9_HI || resHi == OVFL_MAX_9_HI && resMidLo <= OVFL_MAX_9_MIDLO)
805                     return 9;
806
807                 // Search for a power to scale by < 9.  Do a binary search.
808                 //
809                 if (resHi > OVFL_MAX_5_HI)
810                 {
811                     if (resHi > OVFL_MAX_3_HI)
812                     {
813                         curScale = 2;
814                         if (resHi > OVFL_MAX_2_HI)
815                             curScale--;
816                     }
817                     else
818                     {
819                         curScale = 4;
820                         if (resHi > OVFL_MAX_4_HI)
821                             curScale--;
822                     }
823                 }
824                 else
825                 {
826                     if (resHi > OVFL_MAX_7_HI)
827                     {
828                         curScale = 6;
829                         if (resHi > OVFL_MAX_6_HI)
830                             curScale--;
831                     }
832                     else
833                     {
834                         curScale = 8;
835                         if (resHi > OVFL_MAX_8_HI)
836                             curScale--;
837                     }
838                 }
839
840                 // In all cases, we already found we could not use the power one larger.
841                 // So if we can use this power, it is the biggest, and we're done.  If
842                 // we can't use this power, the one below it is correct for all cases
843                 // unless it's 10^1 -- we might have to go to 10^0 (no scaling).
844                 //
845                 if (resHi == powerOvfl[curScale - 1].Hi && resMidLo > powerOvfl[curScale - 1].MidLo)
846                     curScale--;
847
848                 HaveScale:
849                 // curScale = largest power of 10 we can scale by without overflow,
850                 // curScale < 9.  See if this is enough to make scale factor
851                 // positive if it isn't already.
852                 //
853                 if (curScale + scale < 0)
854                     Number.ThrowOverflowException(TypeCode.Decimal);
855
856                 return curScale;
857             }
858
859             /// <summary>
860             /// Add a 32-bit uint to an array of 3 uints representing a 96-bit integer.
861             /// </summary>
862             /// <returns>Returns false if there is an overflow</returns>
863             private static bool Add32To96(ref Buf12 bufNum, uint value)
864             {
865                 if ((bufNum.Low64 += value) < value)
866                 {
867                     if (++bufNum.U2 == 0)
868                         return false;
869                 }
870                 return true;
871             }
872
873             /// <summary>
874             /// Adds or subtracts two decimal values.
875             /// On return, d1 contains the result of the operation and d2 is trashed.
876             /// </summary>
877             /// <param name="sign">True means subtract and false means add.</param>
878             internal static unsafe void DecAddSub(ref DecCalc d1, ref DecCalc d2, bool sign)
879             {
880                 ulong low64 = d1.Low64;
881                 uint high = d1.High, flags = d1.uflags, d2flags = d2.uflags;
882
883                 uint xorflags = d2flags ^ flags;
884                 sign ^= (xorflags & SignMask) != 0;
885
886                 if ((xorflags & ScaleMask) == 0)
887                 {
888                     // Scale factors are equal, no alignment necessary.
889                     //
890                     goto AlignedAdd;
891                 }
892                 else
893                 {
894                     // Scale factors are not equal.  Assume that a larger scale
895                     // factor (more decimal places) is likely to mean that number
896                     // is smaller.  Start by guessing that the right operand has
897                     // the larger scale factor.  The result will have the larger
898                     // scale factor.
899                     //
900                     uint d1flags = flags;
901                     flags = d2flags & ScaleMask | flags & SignMask; // scale factor of "smaller",  but sign of "larger"
902                     int scale = (int)(flags - d1flags) >> ScaleShift;
903
904                     if (scale < 0)
905                     {
906                         // Guessed scale factor wrong. Swap operands.
907                         //
908                         scale = -scale;
909                         flags = d1flags;
910                         if (sign)
911                             flags ^= SignMask;
912                         low64 = d2.Low64;
913                         high = d2.High;
914                         d2 = d1;
915                     }
916
917                     uint power;
918                     ulong tmp64, tmpLow;
919
920                     // d1 will need to be multiplied by 10^scale so
921                     // it will have the same scale as d2.  We could be
922                     // extending it to up to 192 bits of precision.
923
924                     // Scan for zeros in the upper words.
925                     //
926                     if (high == 0)
927                     {
928                         if (low64 <= uint.MaxValue)
929                         {
930                             if ((uint)low64 == 0)
931                             {
932                                 // Left arg is zero, return right.
933                                 //
934                                 uint signFlags = flags & SignMask;
935                                 if (sign)
936                                     signFlags ^= SignMask;
937                                 d1 = d2;
938                                 d1.uflags = d2.uflags & ScaleMask | signFlags;
939                                 return;
940                             }
941
942                             do
943                             {
944                                 if (scale <= MaxInt32Scale)
945                                 {
946                                     low64 = UInt32x32To64((uint)low64, s_powers10[scale]);
947                                     goto AlignedAdd;
948                                 }
949                                 scale -= MaxInt32Scale;
950                                 low64 = UInt32x32To64((uint)low64, TenToPowerNine);
951                             } while (low64 <= uint.MaxValue);
952                         }
953
954                         do
955                         {
956                             power = TenToPowerNine;
957                             if (scale < MaxInt32Scale)
958                                 power = s_powers10[scale];
959                             tmpLow = UInt32x32To64((uint)low64, power);
960                             tmp64 = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32);
961                             low64 = (uint)tmpLow + (tmp64 << 32);
962                             high = (uint)(tmp64 >> 32);
963                             if ((scale -= MaxInt32Scale) <= 0)
964                                 goto AlignedAdd;
965                         } while (high == 0);
966                     }
967
968                     while (true)
969                     {
970                         // Scaling won't make it larger than 4 uints
971                         //
972                         power = TenToPowerNine;
973                         if (scale < MaxInt32Scale)
974                             power = s_powers10[scale];
975                         tmpLow = UInt32x32To64((uint)low64, power);
976                         tmp64 = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32);
977                         low64 = (uint)tmpLow + (tmp64 << 32);
978                         tmp64 >>= 32;
979                         tmp64 += UInt32x32To64(high, power);
980
981                         scale -= MaxInt32Scale;
982                         if (tmp64 > uint.MaxValue)
983                             break;
984
985                         high = (uint)tmp64;
986                         // Result fits in 96 bits.  Use standard aligned add.
987                         if (scale <= 0)
988                             goto AlignedAdd;
989                     }
990
991                     // Have to scale by a bunch. Move the number to a buffer where it has room to grow as it's scaled.
992                     //
993                     Buf24 bufNum;
994                     _ = &bufNum; // workaround for CS0165
995                     DebugPoison(ref bufNum);
996
997                     bufNum.Low64 = low64;
998                     bufNum.Mid64 = tmp64;
999                     uint hiProd = 3;
1000
1001                     // Scaling loop, up to 10^9 at a time. hiProd stays updated with index of highest non-zero uint.
1002                     //
1003                     for (; scale > 0; scale -= MaxInt32Scale)
1004                     {
1005                         power = TenToPowerNine;
1006                         if (scale < MaxInt32Scale)
1007                             power = s_powers10[scale];
1008                         tmp64 = 0;
1009                         uint* rgulNum = (uint*)&bufNum;
1010                         for (uint cur = 0; ;)
1011                         {
1012                             Debug.Assert(cur < bufNum.Length);
1013                             tmp64 += UInt32x32To64(rgulNum[cur], power);
1014                             rgulNum[cur] = (uint)tmp64;
1015                             cur++;
1016                             tmp64 >>= 32;
1017                             if (cur > hiProd)
1018                                 break;
1019                         }
1020
1021                         if ((uint)tmp64 != 0)
1022                         {
1023                             // We're extending the result by another uint.
1024                             Debug.Assert(hiProd + 1 < bufNum.Length);
1025                             rgulNum[++hiProd] = (uint)tmp64;
1026                         }
1027                     }
1028
1029                     // Scaling complete, do the add.  Could be subtract if signs differ.
1030                     //
1031                     tmp64 = bufNum.Low64;
1032                     low64 = d2.Low64;
1033                     uint tmpHigh = bufNum.U2;
1034                     high = d2.High;
1035
1036                     if (sign)
1037                     {
1038                         // Signs differ, subtract.
1039                         //
1040                         low64 = tmp64 - low64;
1041                         high = tmpHigh - high;
1042
1043                         // Propagate carry
1044                         //
1045                         if (low64 > tmp64)
1046                         {
1047                             high--;
1048                             if (high < tmpHigh)
1049                                 goto NoCarry;
1050                         }
1051                         else if (high <= tmpHigh)
1052                             goto NoCarry;
1053
1054                         // Carry the subtraction into the higher bits.
1055                         // 
1056                         uint* number = (uint*)&bufNum;
1057                         uint cur = 3;
1058                         do
1059                         {
1060                             Debug.Assert(cur < bufNum.Length);
1061                         } while (number[cur++]-- == 0);
1062                         Debug.Assert(hiProd < bufNum.Length);
1063                         if (number[hiProd] == 0 && --hiProd <= 2)
1064                             goto ReturnResult;
1065                     }
1066                     else
1067                     {
1068                         // Signs the same, add.
1069                         //
1070                         low64 += tmp64;
1071                         high += tmpHigh;
1072
1073                         // Propagate carry
1074                         //
1075                         if (low64 < tmp64)
1076                         {
1077                             high++;
1078                             if (high > tmpHigh)
1079                                 goto NoCarry;
1080                         }
1081                         else if (high >= tmpHigh)
1082                             goto NoCarry;
1083
1084                         uint* number = (uint*)&bufNum;
1085                         for (uint cur = 3; ++number[cur++] == 0;)
1086                         {
1087                             Debug.Assert(cur < bufNum.Length);
1088                             if (hiProd < cur)
1089                             {
1090                                 number[cur] = 1;
1091                                 hiProd = cur;
1092                                 break;
1093                             }
1094                         }
1095                     }
1096 NoCarry:
1097
1098                     bufNum.Low64 = low64;
1099                     bufNum.U2 = high;
1100                     scale = ScaleResult(&bufNum, hiProd, (byte)(flags >> ScaleShift));
1101                     flags = (flags & ~ScaleMask) | ((uint)scale << ScaleShift);
1102                     low64 = bufNum.Low64;
1103                     high = bufNum.U2;
1104                     goto ReturnResult;
1105                 }
1106
1107 SignFlip:
1108                 {
1109                     // Got negative result.  Flip its sign.
1110                     flags ^= SignMask;
1111                     high = ~high;
1112                     low64 = (ulong)-(long)low64;
1113                     if (low64 == 0)
1114                         high++;
1115                     goto ReturnResult;
1116                 }
1117
1118 AlignedScale:
1119                 {
1120                     // The addition carried above 96 bits.
1121                     // Divide the value by 10, dropping the scale factor.
1122                     //
1123                     if ((flags & ScaleMask) == 0)
1124                         Number.ThrowOverflowException(TypeCode.Decimal);
1125                     flags -= 1 << ScaleShift;
1126
1127                     const uint den = 10;
1128                     ulong num = high + (1UL << 32);
1129                     high = (uint)(num / den);
1130                     num = ((num - high * den) << 32) + (low64 >> 32);
1131                     uint div = (uint)(num / den);
1132                     num = ((num - div * den) << 32) + (uint)low64;
1133                     low64 = div;
1134                     low64 <<= 32;
1135                     div = (uint)(num / den);
1136                     low64 += div;
1137                     div = (uint)num - div * den;
1138
1139                     // See if we need to round up.
1140                     //
1141                     if (div >= 5 && (div > 5 || (low64 & 1) != 0))
1142                     {
1143                         if (++low64 == 0)
1144                             high++;
1145                     }
1146                     goto ReturnResult;
1147                 }
1148
1149 AlignedAdd:
1150                 {
1151                     ulong d1Low64 = low64;
1152                     uint d1High = high;
1153                     if (sign)
1154                     {
1155                         // Signs differ - subtract
1156                         //
1157                         low64 = d1Low64 - d2.Low64;
1158                         high = d1High - d2.High;
1159
1160                         // Propagate carry
1161                         //
1162                         if (low64 > d1Low64)
1163                         {
1164                             high--;
1165                             if (high >= d1High)
1166                                 goto SignFlip;
1167                         }
1168                         else if (high > d1High)
1169                             goto SignFlip;
1170                     }
1171                     else
1172                     {
1173                         // Signs are the same - add
1174                         //
1175                         low64 = d1Low64 + d2.Low64;
1176                         high = d1High + d2.High;
1177
1178                         // Propagate carry
1179                         //
1180                         if (low64 < d1Low64)
1181                         {
1182                             high++;
1183                             if (high <= d1High)
1184                                 goto AlignedScale;
1185                         }
1186                         else if (high < d1High)
1187                             goto AlignedScale;
1188                     }
1189                     goto ReturnResult;
1190                 }
1191
1192 ReturnResult:
1193                 d1.uflags = flags;
1194                 d1.High = high;
1195                 d1.Low64 = low64;
1196                 return;
1197             }
1198
1199 #endregion
1200
1201             /// <summary>
1202             /// Convert Decimal to Currency (similar to OleAut32 api.)
1203             /// </summary>
1204             internal static long VarCyFromDec(ref DecCalc pdecIn)
1205             {
1206                 long value;
1207
1208                 int scale = pdecIn.Scale - 4;
1209                 // Need to scale to get 4 decimal places.  -4 <= scale <= 24.
1210                 //
1211                 if (scale < 0)
1212                 {
1213                     if (pdecIn.High != 0)
1214                         goto ThrowOverflow;
1215                     uint pwr = s_powers10[-scale];
1216                     ulong high = UInt32x32To64(pwr, pdecIn.Mid);
1217                     if (high > uint.MaxValue)
1218                         goto ThrowOverflow;
1219                     ulong low = UInt32x32To64(pwr, pdecIn.Low);
1220                     low += high <<= 32;
1221                     if (low < high)
1222                         goto ThrowOverflow;
1223                     value = (long)low;
1224                 }
1225                 else
1226                 {
1227                     if (scale != 0)
1228                         InternalRound(ref pdecIn, (uint)scale, MidpointRounding.ToEven);
1229                     if (pdecIn.High != 0)
1230                         goto ThrowOverflow;
1231                     value = (long)pdecIn.Low64;
1232                 }
1233
1234                 if (value < 0 && (value != long.MinValue || !pdecIn.IsNegative))
1235                     goto ThrowOverflow;
1236
1237                 if (pdecIn.IsNegative)
1238                     value = -value;
1239
1240                 return value;
1241
1242 ThrowOverflow:
1243                 throw new OverflowException(SR.Overflow_Currency);
1244             }
1245
1246             /// <summary>
1247             /// Decimal Compare updated to return values similar to ICompareTo
1248             /// </summary>
1249             internal static int VarDecCmp(in decimal d1, in decimal d2)
1250             {
1251                 if ((d2.Low | d2.Mid | d2.High) == 0)
1252                 {
1253                     if ((d1.Low | d1.Mid | d1.High) == 0)
1254                         return 0;
1255                     return (d1.flags >> 31) | 1;
1256                 }
1257                 if ((d1.Low | d1.Mid | d1.High) == 0)
1258                     return -((d2.flags >> 31) | 1);
1259
1260                 int sign = (d1.flags >> 31) - (d2.flags >> 31);
1261                 if (sign != 0)
1262                     return sign;
1263                 return VarDecCmpSub(in d1, in d2);
1264             }
1265
1266             private static int VarDecCmpSub(in decimal d1, in decimal d2)
1267             {
1268                 int flags = d2.flags;
1269                 int sign = (flags >> 31) | 1;
1270                 int scale = flags - d1.flags;
1271
1272                 ulong low64 = d1.Low64;
1273                 uint high = d1.High;
1274
1275                 ulong d2Low64 = d2.Low64;
1276                 uint d2High = d2.High;
1277
1278                 if (scale != 0)
1279                 {
1280                     scale >>= ScaleShift;
1281
1282                     // Scale factors are not equal. Assume that a larger scale factor (more decimal places) is likely to mean that number is smaller.
1283                     // Start by guessing that the right operand has the larger scale factor.
1284                     if (scale < 0)
1285                     {
1286                         // Guessed scale factor wrong. Swap operands.
1287                         scale = -scale;
1288                         sign = -sign;
1289
1290                         ulong tmp64 = low64;
1291                         low64 = d2Low64;
1292                         d2Low64 = tmp64;
1293
1294                         uint tmp = high;
1295                         high = d2High;
1296                         d2High = tmp;
1297                     }
1298
1299                     // d1 will need to be multiplied by 10^scale so it will have the same scale as d2.
1300                     // Scaling loop, up to 10^9 at a time.
1301                     do
1302                     {
1303                         uint power = scale >= MaxInt32Scale ? TenToPowerNine : s_powers10[scale];
1304                         ulong tmpLow = UInt32x32To64((uint)low64, power);
1305                         ulong tmp = UInt32x32To64((uint)(low64 >> 32), power) + (tmpLow >> 32);
1306                         low64 = (uint)tmpLow + (tmp << 32);
1307                         tmp >>= 32;
1308                         tmp += UInt32x32To64(high, power);
1309                         // If the scaled value has more than 96 significant bits then it's greater than d2
1310                         if (tmp > uint.MaxValue)
1311                             return sign;
1312                         high = (uint)tmp;
1313                     } while ((scale -= MaxInt32Scale) > 0);
1314                 }
1315
1316                 uint cmpHigh = high - d2High;
1317                 if (cmpHigh != 0)
1318                 {
1319                     // check for overflow
1320                     if (cmpHigh > high)
1321                         sign = -sign;
1322                     return sign;
1323                 }
1324
1325                 ulong cmpLow64 = low64 - d2Low64;
1326                 if (cmpLow64 == 0)
1327                     sign = 0;
1328                 // check for overflow
1329                 else if (cmpLow64 > low64)
1330                     sign = -sign;
1331                 return sign;
1332             }
1333
1334             /// <summary>
1335             /// Decimal Multiply
1336             /// </summary>
1337             internal static unsafe void VarDecMul(ref DecCalc d1, ref DecCalc d2)
1338             {
1339                 int scale = (byte)(d1.uflags + d2.uflags >> ScaleShift);
1340
1341                 ulong tmp;
1342                 uint hiProd;
1343                 Buf24 bufProd;
1344                 _ = &bufProd; // workaround for CS0165
1345                 DebugPoison(ref bufProd);
1346
1347                 if ((d1.High | d1.Mid) == 0)
1348                 {
1349                     if ((d2.High | d2.Mid) == 0)
1350                     {
1351                         // Upper 64 bits are zero.
1352                         //
1353                         ulong low64 = UInt32x32To64(d1.Low, d2.Low);
1354                         if (scale > DEC_SCALE_MAX)
1355                         {
1356                             // Result scale is too big.  Divide result by power of 10 to reduce it.
1357                             // If the amount to divide by is > 19 the result is guaranteed
1358                             // less than 1/2.  [max value in 64 bits = 1.84E19]
1359                             //
1360                             if (scale > DEC_SCALE_MAX + MaxInt64Scale)
1361                                 goto ReturnZero;
1362
1363                             scale -= DEC_SCALE_MAX + 1;
1364                             ulong power = s_ulongPowers10[scale];
1365
1366                             // TODO: https://github.com/dotnet/coreclr/issues/3439
1367                             tmp = low64 / power;
1368                             ulong remainder = low64 - tmp * power;
1369                             low64 = tmp;
1370
1371                             // Round result.  See if remainder >= 1/2 of divisor.
1372                             // Divisor is a power of 10, so it is always even.
1373                             //
1374                             power >>= 1;
1375                             if (remainder >= power && (remainder > power || ((uint)low64 & 1) > 0))
1376                                 low64++;
1377
1378                             scale = DEC_SCALE_MAX;
1379                         }
1380                         d1.Low64 = low64;
1381                         d1.uflags = ((d2.uflags ^ d1.uflags) & SignMask) | ((uint)scale << ScaleShift);
1382                         return;
1383                     }
1384                     else
1385                     {
1386                         // Left value is 32-bit, result fits in 4 uints
1387                         tmp = UInt32x32To64(d1.Low, d2.Low);
1388                         bufProd.U0 = (uint)tmp;
1389
1390                         tmp = UInt32x32To64(d1.Low, d2.Mid) + (tmp >> 32);
1391                         bufProd.U1 = (uint)tmp;
1392                         tmp >>= 32;
1393
1394                         if (d2.High != 0)
1395                         {
1396                             tmp += UInt32x32To64(d1.Low, d2.High);
1397                             if (tmp > uint.MaxValue)
1398                             {
1399                                 bufProd.Mid64 = tmp;
1400                                 hiProd = 3;
1401                                 goto SkipScan;
1402                             }
1403                         }
1404                         bufProd.U2 = (uint)tmp;
1405                         hiProd = 2;
1406                     }
1407                 }
1408                 else if ((d2.High | d2.Mid) == 0)
1409                 {
1410                     // Right value is 32-bit, result fits in 4 uints
1411                     tmp = UInt32x32To64(d2.Low, d1.Low);
1412                     bufProd.U0 = (uint)tmp;
1413
1414                     tmp = UInt32x32To64(d2.Low, d1.Mid) + (tmp >> 32);
1415                     bufProd.U1 = (uint)tmp;
1416                     tmp >>= 32;
1417
1418                     if (d1.High != 0)
1419                     {
1420                         tmp += UInt32x32To64(d2.Low, d1.High);
1421                         if (tmp > uint.MaxValue)
1422                         {
1423                             bufProd.Mid64 = tmp;
1424                             hiProd = 3;
1425                             goto SkipScan;
1426                         }
1427                     }
1428                     bufProd.U2 = (uint)tmp;
1429                     hiProd = 2;
1430                 }
1431                 else
1432                 {
1433                     // Both operands have bits set in the upper 64 bits.
1434                     //
1435                     // Compute and accumulate the 9 partial products into a
1436                     // 192-bit (24-byte) result.
1437                     //
1438                     //        [l-h][l-m][l-l]      left high, middle, low
1439                     //         x    [r-h][r-m][r-l]      right high, middle, low
1440                     // ------------------------------
1441                     //
1442                     //             [0-h][0-l]      l-l * r-l
1443                     //        [1ah][1al]      l-l * r-m
1444                     //        [1bh][1bl]      l-m * r-l
1445                     //       [2ah][2al]          l-m * r-m
1446                     //       [2bh][2bl]          l-l * r-h
1447                     //       [2ch][2cl]          l-h * r-l
1448                     //      [3ah][3al]          l-m * r-h
1449                     //      [3bh][3bl]          l-h * r-m
1450                     // [4-h][4-l]              l-h * r-h
1451                     // ------------------------------
1452                     // [p-5][p-4][p-3][p-2][p-1][p-0]      prod[] array
1453                     //
1454
1455                     tmp = UInt32x32To64(d1.Low, d2.Low);
1456                     bufProd.U0 = (uint)tmp;
1457
1458                     ulong tmp2 = UInt32x32To64(d1.Low, d2.Mid) + (tmp >> 32);
1459
1460                     tmp = UInt32x32To64(d1.Mid, d2.Low);
1461                     tmp += tmp2; // this could generate carry
1462                     bufProd.U1 = (uint)tmp;
1463                     if (tmp < tmp2) // detect carry
1464                         tmp2 = (tmp >> 32) | (1UL << 32);
1465                     else
1466                         tmp2 = tmp >> 32;
1467
1468                     tmp = UInt32x32To64(d1.Mid, d2.Mid) + tmp2;
1469
1470                     if ((d1.High | d2.High) > 0)
1471                     {
1472                         // Highest 32 bits is non-zero.     Calculate 5 more partial products.
1473                         //
1474                         tmp2 = UInt32x32To64(d1.Low, d2.High);
1475                         tmp += tmp2; // this could generate carry
1476                         uint tmp3 = 0;
1477                         if (tmp < tmp2) // detect carry
1478                             tmp3 = 1;
1479
1480                         tmp2 = UInt32x32To64(d1.High, d2.Low);
1481                         tmp += tmp2; // this could generate carry
1482                         bufProd.U2 = (uint)tmp;
1483                         if (tmp < tmp2) // detect carry
1484                             tmp3++;
1485                         tmp2 = ((ulong)tmp3 << 32) | (tmp >> 32);
1486
1487                         tmp = UInt32x32To64(d1.Mid, d2.High);
1488                         tmp += tmp2; // this could generate carry
1489                         tmp3 = 0;
1490                         if (tmp < tmp2) // detect carry
1491                             tmp3 = 1;
1492
1493                         tmp2 = UInt32x32To64(d1.High, d2.Mid);
1494                         tmp += tmp2; // this could generate carry
1495                         bufProd.U3 = (uint)tmp;
1496                         if (tmp < tmp2) // detect carry
1497                             tmp3++;
1498                         tmp = ((ulong)tmp3 << 32) | (tmp >> 32);
1499
1500                         bufProd.High64 = UInt32x32To64(d1.High, d2.High) + tmp;
1501
1502                         hiProd = 5;
1503                     }
1504                     else
1505                     {
1506                         bufProd.Mid64 = tmp;
1507                         hiProd = 3;
1508                     }
1509                 }
1510
1511                 // Check for leading zero uints on the product
1512                 //
1513                 uint* product = (uint*)&bufProd;
1514                 while (product[(int)hiProd] == 0)
1515                 {
1516                     if (hiProd == 0)
1517                         goto ReturnZero;
1518                     hiProd--;
1519                 }
1520
1521 SkipScan:
1522                 if (hiProd > 2 || scale > DEC_SCALE_MAX)
1523                 {
1524                     scale = ScaleResult(&bufProd, hiProd, scale);
1525                 }
1526
1527                 d1.Low64 = bufProd.Low64;
1528                 d1.High = bufProd.U2;
1529                 d1.uflags = ((d2.uflags ^ d1.uflags) & SignMask) | ((uint)scale << ScaleShift);
1530                 return;
1531
1532 ReturnZero:
1533                 d1 = default;
1534             }
1535
1536             /// <summary>
1537             /// Convert float to Decimal
1538             /// </summary>
1539             internal static void VarDecFromR4(float input, out DecCalc result)
1540             {
1541                 result = default;
1542
1543                 // The most we can scale by is 10^28, which is just slightly more
1544                 // than 2^93.  So a float with an exponent of -94 could just
1545                 // barely reach 0.5, but smaller exponents will always round to zero.
1546                 //
1547                 const uint SNGBIAS = 126;
1548                 int exp = (int)(GetExponent(input) - SNGBIAS);
1549                 if (exp < -94)
1550                     return; // result should be zeroed out
1551
1552                 if (exp > 96)
1553                     Number.ThrowOverflowException(TypeCode.Decimal);
1554
1555                 uint flags = 0;
1556                 if (input < 0)
1557                 {
1558                     input = -input;
1559                     flags = SignMask;
1560                 }
1561
1562                 // Round the input to a 7-digit integer.  The R4 format has
1563                 // only 7 digits of precision, and we want to keep garbage digits
1564                 // out of the Decimal were making.
1565                 //
1566                 // Calculate max power of 10 input value could have by multiplying
1567                 // the exponent by log10(2).  Using scaled integer multiplcation,
1568                 // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1569                 //
1570                 double dbl = input;
1571                 int power = 6 - ((exp * 19728) >> 16);
1572                 // power is between -22 and 35
1573
1574                 if (power >= 0)
1575                 {
1576                     // We have less than 7 digits, scale input up.
1577                     //
1578                     if (power > DEC_SCALE_MAX)
1579                         power = DEC_SCALE_MAX;
1580
1581                     dbl *= s_doublePowers10[power];
1582                 }
1583                 else
1584                 {
1585                     if (power != -1 || dbl >= 1E7)
1586                         dbl /= s_doublePowers10[-power];
1587                     else
1588                         power = 0; // didn't scale it
1589                 }
1590
1591                 Debug.Assert(dbl < 1E7);
1592                 if (dbl < 1E6 && power < DEC_SCALE_MAX)
1593                 {
1594                     dbl *= 10;
1595                     power++;
1596                     Debug.Assert(dbl >= 1E6);
1597                 }
1598
1599                 // Round to integer
1600                 //
1601                 uint mant;
1602                 // with SSE4.1 support ROUNDSD can be used
1603                 if (X86.Sse41.IsSupported)
1604                     mant = (uint)(int)Math.Round(dbl);
1605                 else
1606                 {
1607                     mant = (uint)(int)dbl;
1608                     dbl -= (int)mant;  // difference between input & integer
1609                     if (dbl > 0.5 || dbl == 0.5 && (mant & 1) != 0)
1610                         mant++;
1611                 }
1612
1613                 if (mant == 0)
1614                     return;  // result should be zeroed out
1615
1616                 if (power < 0)
1617                 {
1618                     // Add -power factors of 10, -power <= (29 - 7) = 22.
1619                     //
1620                     power = -power;
1621                     if (power < 10)
1622                     {
1623                         result.Low64 = UInt32x32To64(mant, s_powers10[power]);
1624                     }
1625                     else
1626                     {
1627                         // Have a big power of 10.
1628                         //
1629                         if (power > 18)
1630                         {
1631                             ulong low64 = UInt32x32To64(mant, s_powers10[power - 18]);
1632                             UInt64x64To128(low64, TenToPowerEighteen, ref result);
1633                         }
1634                         else
1635                         {
1636                             ulong low64 = UInt32x32To64(mant, s_powers10[power - 9]);
1637                             ulong hi64 = UInt32x32To64(TenToPowerNine, (uint)(low64 >> 32));
1638                             low64 = UInt32x32To64(TenToPowerNine, (uint)low64);
1639                             result.Low = (uint)low64;
1640                             hi64 += low64 >> 32;
1641                             result.Mid = (uint)hi64;
1642                             hi64 >>= 32;
1643                             result.High = (uint)hi64;
1644                         }
1645                     }
1646                 }
1647                 else
1648                 {
1649                     // Factor out powers of 10 to reduce the scale, if possible.
1650                     // The maximum number we could factor out would be 6.  This
1651                     // comes from the fact we have a 7-digit number, and the
1652                     // MSD must be non-zero -- but the lower 6 digits could be
1653                     // zero.  Note also the scale factor is never negative, so
1654                     // we can't scale by any more than the power we used to
1655                     // get the integer.
1656                     //
1657                     int lmax = power;
1658                     if (lmax > 6)
1659                         lmax = 6;
1660
1661                     if ((mant & 0xF) == 0 && lmax >= 4)
1662                     {
1663                         const uint den = 10000;
1664                         uint div = mant / den;
1665                         if (mant == div * den)
1666                         {
1667                             mant = div;
1668                             power -= 4;
1669                             lmax -= 4;
1670                         }
1671                     }
1672
1673                     if ((mant & 3) == 0 && lmax >= 2)
1674                     {
1675                         const uint den = 100;
1676                         uint div = mant / den;
1677                         if (mant == div * den)
1678                         {
1679                             mant = div;
1680                             power -= 2;
1681                             lmax -= 2;
1682                         }
1683                     }
1684
1685                     if ((mant & 1) == 0 && lmax >= 1)
1686                     {
1687                         const uint den = 10;
1688                         uint div = mant / den;
1689                         if (mant == div * den)
1690                         {
1691                             mant = div;
1692                             power--;
1693                         }
1694                     }
1695
1696                     flags |= (uint)power << ScaleShift;
1697                     result.Low = mant;
1698                 }
1699
1700                 result.uflags = flags;
1701             }
1702
1703             /// <summary>
1704             /// Convert double to Decimal
1705             /// </summary>
1706             internal static void VarDecFromR8(double input, out DecCalc result)
1707             {
1708                 result = default;
1709
1710                 // The most we can scale by is 10^28, which is just slightly more
1711                 // than 2^93.  So a float with an exponent of -94 could just
1712                 // barely reach 0.5, but smaller exponents will always round to zero.
1713                 //
1714                 const uint DBLBIAS = 1022;
1715                 int exp = (int)(GetExponent(input) - DBLBIAS);
1716                 if (exp < -94)
1717                     return; // result should be zeroed out
1718
1719                 if (exp > 96)
1720                     Number.ThrowOverflowException(TypeCode.Decimal);
1721
1722                 uint flags = 0;
1723                 if (input < 0)
1724                 {
1725                     input = -input;
1726                     flags = SignMask;
1727                 }
1728
1729                 // Round the input to a 15-digit integer.  The R8 format has
1730                 // only 15 digits of precision, and we want to keep garbage digits
1731                 // out of the Decimal were making.
1732                 //
1733                 // Calculate max power of 10 input value could have by multiplying
1734                 // the exponent by log10(2).  Using scaled integer multiplcation,
1735                 // log10(2) * 2 ^ 16 = .30103 * 65536 = 19728.3.
1736                 //
1737                 double dbl = input;
1738                 int power = 14 - ((exp * 19728) >> 16);
1739                 // power is between -14 and 43
1740
1741                 if (power >= 0)
1742                 {
1743                     // We have less than 15 digits, scale input up.
1744                     //
1745                     if (power > DEC_SCALE_MAX)
1746                         power = DEC_SCALE_MAX;
1747
1748                     dbl *= s_doublePowers10[power];
1749                 }
1750                 else
1751                 {
1752                     if (power != -1 || dbl >= 1E15)
1753                         dbl /= s_doublePowers10[-power];
1754                     else
1755                         power = 0; // didn't scale it
1756                 }
1757
1758                 Debug.Assert(dbl < 1E15);
1759                 if (dbl < 1E14 && power < DEC_SCALE_MAX)
1760                 {
1761                     dbl *= 10;
1762                     power++;
1763                     Debug.Assert(dbl >= 1E14);
1764                 }
1765
1766                 // Round to int64
1767                 //
1768                 ulong mant;
1769                 // with SSE4.1 support ROUNDSD can be used
1770                 if (X86.Sse41.IsSupported)
1771                     mant = (ulong)(long)Math.Round(dbl);
1772                 else
1773                 {
1774                     mant = (ulong)(long)dbl;
1775                     dbl -= (long)mant;  // difference between input & integer
1776                     if (dbl > 0.5 || dbl == 0.5 && (mant & 1) != 0)
1777                         mant++;
1778                 }
1779
1780                 if (mant == 0)
1781                     return;  // result should be zeroed out
1782
1783                 if (power < 0)
1784                 {
1785                     // Add -power factors of 10, -power <= (29 - 15) = 14.
1786                     //
1787                     power = -power;
1788                     if (power < 10)
1789                     {
1790                         var pow10 = s_powers10[power];
1791                         ulong low64 = UInt32x32To64((uint)mant, pow10);
1792                         ulong hi64 = UInt32x32To64((uint)(mant >> 32), pow10);
1793                         result.Low = (uint)low64;
1794                         hi64 += low64 >> 32;
1795                         result.Mid = (uint)hi64;
1796                         hi64 >>= 32;
1797                         result.High = (uint)hi64;
1798                     }
1799                     else
1800                     {
1801                         // Have a big power of 10.
1802                         //
1803                         Debug.Assert(power <= 14);
1804                         UInt64x64To128(mant, s_ulongPowers10[power - 1], ref result);
1805                     }
1806                 }
1807                 else
1808                 {
1809                     // Factor out powers of 10 to reduce the scale, if possible.
1810                     // The maximum number we could factor out would be 14.  This
1811                     // comes from the fact we have a 15-digit number, and the
1812                     // MSD must be non-zero -- but the lower 14 digits could be
1813                     // zero.  Note also the scale factor is never negative, so
1814                     // we can't scale by any more than the power we used to
1815                     // get the integer.
1816                     //
1817                     int lmax = power;
1818                     if (lmax > 14)
1819                         lmax = 14;
1820
1821                     if ((byte)mant == 0 && lmax >= 8)
1822                     {
1823                         const uint den = 100000000;
1824                         ulong div = mant / den;
1825                         if ((uint)mant == (uint)(div * den))
1826                         {
1827                             mant = div;
1828                             power -= 8;
1829                             lmax -= 8;
1830                         }
1831                     }
1832
1833                     if (((uint)mant & 0xF) == 0 && lmax >= 4)
1834                     {
1835                         const uint den = 10000;
1836                         ulong div = mant / den;
1837                         if ((uint)mant == (uint)(div * den))
1838                         {
1839                             mant = div;
1840                             power -= 4;
1841                             lmax -= 4;
1842                         }
1843                     }
1844
1845                     if (((uint)mant & 3) == 0 && lmax >= 2)
1846                     {
1847                         const uint den = 100;
1848                         ulong div = mant / den;
1849                         if ((uint)mant == (uint)(div * den))
1850                         {
1851                             mant = div;
1852                             power -= 2;
1853                             lmax -= 2;
1854                         }
1855                     }
1856
1857                     if (((uint)mant & 1) == 0 && lmax >= 1)
1858                     {
1859                         const uint den = 10;
1860                         ulong div = mant / den;
1861                         if ((uint)mant == (uint)(div * den))
1862                         {
1863                             mant = div;
1864                             power--;
1865                         }
1866                     }
1867
1868                     flags |= (uint)power << ScaleShift;
1869                     result.Low64 = mant;
1870                 }
1871
1872                 result.uflags = flags;
1873             }
1874
1875             /// <summary>
1876             /// Convert Decimal to float
1877             /// </summary>
1878             internal static float VarR4FromDec(in decimal value)
1879             {
1880                 return (float)VarR8FromDec(in value);
1881             }
1882
1883             /// <summary>
1884             /// Convert Decimal to double
1885             /// </summary>
1886             internal static double VarR8FromDec(in decimal value)
1887             {
1888                 // Value taken via reverse engineering the double that corresponds to 2^64. (oleaut32 has ds2to64 = DEFDS(0, 0, DBLBIAS + 65, 0))
1889                 const double ds2to64 = 1.8446744073709552e+019;
1890
1891                 double dbl = ((double)value.Low64 +
1892                     (double)value.High * ds2to64) / s_doublePowers10[value.Scale];
1893
1894                 if (value.IsNegative)
1895                     dbl = -dbl;
1896
1897                 return dbl;
1898             }
1899
1900             internal static int GetHashCode(in decimal d)
1901             {
1902                 if ((d.Low | d.Mid | d.High) == 0)
1903                     return 0;
1904
1905                 uint flags = (uint)d.flags;
1906                 if ((flags & ScaleMask) == 0 || (d.Low & 1) != 0)
1907                     return (int)(flags ^ d.High ^ d.Mid ^ d.Low);
1908
1909                 int scale = (byte)(flags >> ScaleShift);
1910                 uint low = d.Low;
1911                 ulong high64 = ((ulong)d.High << 32) | d.Mid;
1912
1913                 Unscale(ref low, ref high64, ref scale);
1914
1915                 flags = ((flags) & ~ScaleMask) | (uint)scale << ScaleShift;
1916                 return (int)(flags ^ (uint)(high64 >> 32) ^ (uint)high64 ^ low);
1917             }
1918
1919             /// <summary>
1920             /// Divides two decimal values.
1921             /// On return, d1 contains the result of the operation.
1922             /// </summary>
1923             internal static unsafe void VarDecDiv(ref DecCalc d1, ref DecCalc d2)
1924             {
1925                 Buf12 bufQuo;
1926                 _ = &bufQuo; // workaround for CS0165
1927                 DebugPoison(ref bufQuo);
1928
1929                 uint power;
1930                 int curScale;
1931
1932                 int scale = (sbyte)(d1.uflags - d2.uflags >> ScaleShift);
1933                 bool unscale = false;
1934                 uint tmp;
1935
1936                 if ((d2.High | d2.Mid) == 0)
1937                 {
1938                     // Divisor is only 32 bits.  Easy divide.
1939                     //
1940                     uint den = d2.Low;
1941                     if (den == 0)
1942                         throw new DivideByZeroException();
1943
1944                     bufQuo.Low64 = d1.Low64;
1945                     bufQuo.U2 = d1.High;
1946                     uint remainder = Div96By32(ref bufQuo, den);
1947
1948                     for (;;)
1949                     {
1950                         if (remainder == 0)
1951                         {
1952                             if (scale < 0)
1953                             {
1954                                 curScale = Math.Min(9, -scale);
1955                                 goto HaveScale;
1956                             }
1957                             break;
1958                         }
1959
1960                         // We need to unscale if and only if we have a non-zero remainder
1961                         unscale = true;
1962
1963                         // We have computed a quotient based on the natural scale
1964                         // ( <dividend scale> - <divisor scale> ).  We have a non-zero
1965                         // remainder, so now we should increase the scale if possible to
1966                         // include more quotient bits.
1967                         //
1968                         // If it doesn't cause overflow, we'll loop scaling by 10^9 and
1969                         // computing more quotient bits as long as the remainder stays
1970                         // non-zero.  If scaling by that much would cause overflow, we'll
1971                         // drop out of the loop and scale by as much as we can.
1972                         //
1973                         // Scaling by 10^9 will overflow if bufQuo[2].bufQuo[1] >= 2^32 / 10^9
1974                         // = 4.294 967 296.  So the upper limit is bufQuo[2] == 4 and
1975                         // bufQuo[1] == 0.294 967 296 * 2^32 = 1,266,874,889.7+.  Since
1976                         // quotient bits in bufQuo[0] could be all 1's, then 1,266,874,888
1977                         // is the largest value in bufQuo[1] (when bufQuo[2] == 4) that is
1978                         // assured not to overflow.
1979                         //
1980                         if (scale == DEC_SCALE_MAX || (curScale = SearchScale(ref bufQuo, scale)) == 0)
1981                         {
1982                             // No more scaling to be done, but remainder is non-zero.
1983                             // Round quotient.
1984                             //
1985                             tmp = remainder << 1;
1986                             if (tmp < remainder || tmp >= den && (tmp > den || (bufQuo.U0 & 1) != 0))
1987                                 goto RoundUp;
1988                             break;
1989                         }
1990
1991                         HaveScale:
1992                         power = s_powers10[curScale];
1993                         scale += curScale;
1994
1995                         if (IncreaseScale(ref bufQuo, power) != 0)
1996                             goto ThrowOverflow;
1997
1998                         ulong num = UInt32x32To64(remainder, power);
1999                         // TODO: https://github.com/dotnet/coreclr/issues/3439
2000                         uint div = (uint)(num / den);
2001                         remainder = (uint)num - div * den;
2002
2003                         if (!Add32To96(ref bufQuo, div))
2004                         {
2005                             scale = OverflowUnscale(ref bufQuo, scale, remainder != 0);
2006                             break;
2007                         }
2008                     } // for (;;)
2009                 }
2010                 else
2011                 {
2012                     // Divisor has bits set in the upper 64 bits.
2013                     //
2014                     // Divisor must be fully normalized (shifted so bit 31 of the most
2015                     // significant uint is 1).  Locate the MSB so we know how much to
2016                     // normalize by.  The dividend will be shifted by the same amount so
2017                     // the quotient is not changed.
2018                     //
2019                     tmp = d2.High;
2020                     if (tmp == 0)
2021                         tmp = d2.Mid;
2022
2023                     curScale = BitOperations.LeadingZeroCount(tmp);
2024
2025                     // Shift both dividend and divisor left by curScale.
2026                     //
2027                     Buf16 bufRem;
2028                     _ = &bufRem; // workaround for CS0165
2029                     DebugPoison(ref bufRem);
2030
2031                     bufRem.Low64 = d1.Low64 << curScale;
2032                     bufRem.High64 = (d1.Mid + ((ulong)d1.High << 32)) >> (32 - curScale);
2033
2034                     ulong divisor = d2.Low64 << curScale;
2035
2036                     if (d2.High == 0)
2037                     {
2038                         // Have a 64-bit divisor in sdlDivisor.  The remainder
2039                         // (currently 96 bits spread over 4 uints) will be < divisor.
2040                         //
2041                         bufQuo.U2 = 0;
2042                         bufQuo.U1 = Div96By64(ref *(Buf12*)&bufRem.U1, divisor);
2043                         bufQuo.U0 = Div96By64(ref *(Buf12*)&bufRem, divisor);
2044
2045                         for (;;)
2046                         {
2047                             if (bufRem.Low64 == 0)
2048                             {
2049                                 if (scale < 0)
2050                                 {
2051                                     curScale = Math.Min(9, -scale);
2052                                     goto HaveScale64;
2053                                 }
2054                                 break;
2055                             }
2056
2057                             // We need to unscale if and only if we have a non-zero remainder
2058                             unscale = true;
2059
2060                             // Remainder is non-zero.  Scale up quotient and remainder by
2061                             // powers of 10 so we can compute more significant bits.
2062                             //
2063                             if (scale == DEC_SCALE_MAX || (curScale = SearchScale(ref bufQuo, scale)) == 0)
2064                             {
2065                                 // No more scaling to be done, but remainder is non-zero.
2066                                 // Round quotient.
2067                                 //
2068                                 ulong tmp64 = bufRem.Low64;
2069                                 if ((long)tmp64 < 0 || (tmp64 <<= 1) > divisor ||
2070                                   (tmp64 == divisor && (bufQuo.U0 & 1) != 0))
2071                                     goto RoundUp;
2072                                 break;
2073                             }
2074
2075                             HaveScale64:
2076                             power = s_powers10[curScale];
2077                             scale += curScale;
2078
2079                             if (IncreaseScale(ref bufQuo, power) != 0)
2080                                 goto ThrowOverflow;
2081
2082                             IncreaseScale64(ref *(Buf12*)&bufRem, power);
2083                             tmp = Div96By64(ref *(Buf12*)&bufRem, divisor);
2084                             if (!Add32To96(ref bufQuo, tmp))
2085                             {
2086                                 scale = OverflowUnscale(ref bufQuo, scale, bufRem.Low64 != 0);
2087                                 break;
2088                             }
2089                         } // for (;;)
2090                     }
2091                     else
2092                     {
2093                         // Have a 96-bit divisor in bufDivisor.
2094                         //
2095                         // Start by finishing the shift left by curScale.
2096                         //
2097                         Buf12 bufDivisor;
2098                         _ = &bufDivisor; // workaround for CS0165
2099                         DebugPoison(ref bufDivisor);
2100
2101                         bufDivisor.Low64 = divisor;
2102                         bufDivisor.U2 = (uint)((d2.Mid + ((ulong)d2.High << 32)) >> (32 - curScale));
2103
2104                         // The remainder (currently 96 bits spread over 4 uints) will be < divisor.
2105                         //
2106                         bufQuo.Low64 = Div128By96(ref bufRem, ref bufDivisor);
2107                         bufQuo.U2 = 0;
2108
2109                         for (;;)
2110                         {
2111                             if ((bufRem.Low64 | bufRem.U2) == 0)
2112                             {
2113                                 if (scale < 0)
2114                                 {
2115                                     curScale = Math.Min(9, -scale);
2116                                     goto HaveScale96;
2117                                 }
2118                                 break;
2119                             }
2120
2121                             // We need to unscale if and only if we have a non-zero remainder
2122                             unscale = true;
2123
2124                             // Remainder is non-zero.  Scale up quotient and remainder by
2125                             // powers of 10 so we can compute more significant bits.
2126                             //
2127                             if (scale == DEC_SCALE_MAX || (curScale = SearchScale(ref bufQuo, scale)) == 0)
2128                             {
2129                                 // No more scaling to be done, but remainder is non-zero.
2130                                 // Round quotient.
2131                                 //
2132                                 if ((int)bufRem.U2 < 0)
2133                                 {
2134                                     goto RoundUp;
2135                                 }
2136
2137                                 tmp = bufRem.U1 >> 31;
2138                                 bufRem.Low64 <<= 1;
2139                                 bufRem.U2 = (bufRem.U2 << 1) + tmp;
2140
2141                                 if (bufRem.U2 > bufDivisor.U2 || bufRem.U2 == bufDivisor.U2 &&
2142                                   (bufRem.Low64 > bufDivisor.Low64 || bufRem.Low64 == bufDivisor.Low64 &&
2143                                   (bufQuo.U0 & 1) != 0))
2144                                     goto RoundUp;
2145                                 break;
2146                             }
2147
2148                             HaveScale96:
2149                             power = s_powers10[curScale];
2150                             scale += curScale;
2151
2152                             if (IncreaseScale(ref bufQuo, power) != 0)
2153                                 goto ThrowOverflow;
2154
2155                             bufRem.U3 = IncreaseScale(ref *(Buf12*)&bufRem, power);
2156                             tmp = Div128By96(ref bufRem, ref bufDivisor);
2157                             if (!Add32To96(ref bufQuo, tmp))
2158                             {
2159                                 scale = OverflowUnscale(ref bufQuo, scale, (bufRem.Low64 | bufRem.High64) != 0);
2160                                 break;
2161                             }
2162                         } // for (;;)
2163                     }
2164                 }
2165
2166 Unscale:
2167                 if (unscale)
2168                 {
2169                     uint low = bufQuo.U0;
2170                     ulong high64 = bufQuo.High64;
2171                     Unscale(ref low, ref high64, ref scale);
2172                     d1.Low = low;
2173                     d1.Mid = (uint)high64;
2174                     d1.High = (uint)(high64 >> 32);
2175                 }
2176                 else
2177                 {
2178                     d1.Low64 = bufQuo.Low64;
2179                     d1.High = bufQuo.U2;
2180                 }
2181
2182                 d1.uflags = ((d1.uflags ^ d2.uflags) & SignMask) | ((uint)scale << ScaleShift);
2183                 return;
2184
2185 RoundUp:
2186                 {
2187                     if (++bufQuo.Low64 == 0 && ++bufQuo.U2 == 0)
2188                     {
2189                         scale = OverflowUnscale(ref bufQuo, scale, true);
2190                     }
2191                     goto Unscale;
2192                 }
2193
2194 ThrowOverflow:
2195                 Number.ThrowOverflowException(TypeCode.Decimal);
2196             }
2197
2198             /// <summary>
2199             /// Computes the remainder between two decimals.
2200             /// On return, d1 contains the result of the operation and d2 is trashed.
2201             /// </summary>
2202             internal static void VarDecMod(ref DecCalc d1, ref DecCalc d2)
2203             {
2204                 if ((d2.ulo | d2.umid | d2.uhi) == 0)
2205                     throw new DivideByZeroException();
2206
2207                 if ((d1.ulo | d1.umid | d1.uhi) == 0)
2208                     return;
2209
2210                 // In the operation x % y the sign of y does not matter. Result will have the sign of x.
2211                 d2.uflags = (d2.uflags & ~SignMask) | (d1.uflags & SignMask);
2212
2213                 int cmp = VarDecCmpSub(in Unsafe.As<DecCalc, decimal>(ref d1), in Unsafe.As<DecCalc, decimal>(ref d2));
2214                 if (cmp == 0)
2215                 {
2216                     d1.ulo = 0;
2217                     d1.umid = 0;
2218                     d1.uhi = 0;
2219                     if (d2.uflags > d1.uflags)
2220                         d1.uflags = d2.uflags;
2221                     return;
2222                 }
2223                 if ((cmp ^ (int)(d1.uflags & SignMask)) < 0)
2224                     return;
2225
2226                 // The divisor is smaller than the dividend and both are non-zero. Calculate the integer remainder using the larger scaling factor. 
2227
2228                 int scale = (sbyte)(d1.uflags - d2.uflags >> ScaleShift);
2229                 if (scale > 0)
2230                 {
2231                     // Divisor scale can always be increased to dividend scale for remainder calculation.
2232                     do
2233                     {
2234                         uint power = scale >= MaxInt32Scale ? TenToPowerNine : s_powers10[scale];
2235                         ulong tmp = UInt32x32To64(d2.Low, power);
2236                         d2.Low = (uint)tmp;
2237                         tmp >>= 32;
2238                         tmp += (d2.Mid + ((ulong)d2.High << 32)) * power;
2239                         d2.Mid = (uint)tmp;
2240                         d2.High = (uint)(tmp >> 32);
2241                     } while ((scale -= MaxInt32Scale) > 0);
2242                     scale = 0;
2243                 }
2244
2245                 do
2246                 {
2247                     if (scale < 0)
2248                     {
2249                         d1.uflags = d2.uflags;
2250                         // Try to scale up dividend to match divisor.
2251                         Buf12 bufQuo;
2252                         unsafe { _ = &bufQuo; } // workaround for CS0165
2253                         DebugPoison(ref bufQuo);
2254
2255                         bufQuo.Low64 = d1.Low64;
2256                         bufQuo.U2 = d1.High;
2257                         do
2258                         {
2259                             int iCurScale = SearchScale(ref bufQuo, DEC_SCALE_MAX + scale);
2260                             if (iCurScale == 0)
2261                                 break;
2262                             uint power = iCurScale >= MaxInt32Scale ? TenToPowerNine : s_powers10[iCurScale];
2263                             scale += iCurScale;
2264                             ulong tmp = UInt32x32To64(bufQuo.U0, power);
2265                             bufQuo.U0 = (uint)tmp;
2266                             tmp >>= 32;
2267                             bufQuo.High64 = tmp + bufQuo.High64 * power;
2268                             if (power != TenToPowerNine)
2269                                 break;
2270                         }
2271                         while (scale < 0);
2272                         d1.Low64 = bufQuo.Low64;
2273                         d1.High = bufQuo.U2;
2274                     }
2275
2276                     if (d1.High == 0)
2277                     {
2278                         Debug.Assert(d2.High == 0);
2279                         Debug.Assert(scale == 0);
2280                         d1.Low64 %= d2.Low64;
2281                         return;
2282                     }
2283                     else if ((d2.High | d2.Mid) == 0)
2284                     {
2285                         uint den = d2.Low;
2286                         ulong tmp = ((ulong)d1.High << 32) | d1.Mid;
2287                         tmp = ((tmp % den) << 32) | d1.Low;
2288                         d1.Low64 = tmp % den;
2289                         d1.High = 0;
2290                     }
2291                     else
2292                     {
2293                         VarDecModFull(ref d1, ref d2, scale);
2294                         return;
2295                     }
2296                 } while (scale < 0);
2297             }
2298
2299             private static unsafe void VarDecModFull(ref DecCalc d1, ref DecCalc d2, int scale)
2300             {
2301                 // Divisor has bits set in the upper 64 bits.
2302                 //
2303                 // Divisor must be fully normalized (shifted so bit 31 of the most significant uint is 1). 
2304                 // Locate the MSB so we know how much to normalize by. 
2305                 // The dividend will be shifted by the same amount so the quotient is not changed.
2306                 //
2307                 uint tmp = d2.High;
2308                 if (tmp == 0)
2309                     tmp = d2.Mid;
2310                 int shift = BitOperations.LeadingZeroCount(tmp);
2311
2312                 Buf28 b;
2313                 _ = &b; // workaround for CS0165
2314                 DebugPoison(ref b);
2315
2316                 b.Buf24.Low64 = d1.Low64 << shift;
2317                 b.Buf24.Mid64 = (d1.Mid + ((ulong)d1.High << 32)) >> (32 - shift);
2318
2319                 // The dividend might need to be scaled up to 221 significant bits.
2320                 // Maximum scaling is required when the divisor is 2^64 with scale 28 and is left shifted 31 bits
2321                 // and the dividend is decimal.MaxValue: (2^96 - 1) * 10^28 << 31 = 221 bits.
2322                 uint high = 3;
2323                 while (scale < 0)
2324                 {
2325                     uint power = scale <= -MaxInt32Scale ? TenToPowerNine : s_powers10[-scale];
2326                     uint* buf = (uint*)&b;
2327                     ulong tmp64 = UInt32x32To64(b.Buf24.U0, power);
2328                     b.Buf24.U0 = (uint)tmp64;
2329                     for (int i = 1; i <= high; i++)
2330                     {
2331                         tmp64 >>= 32;
2332                         tmp64 += UInt32x32To64(buf[i], power);
2333                         buf[i] = (uint)tmp64;
2334                     }
2335                     // The high bit of the dividend must not be set.
2336                     if (tmp64 > int.MaxValue)
2337                     {
2338                         Debug.Assert(high + 1 < b.Length);
2339                         buf[++high] = (uint)(tmp64 >> 32);
2340                     }
2341
2342                     scale += MaxInt32Scale;
2343                 }
2344
2345                 if (d2.High == 0)
2346                 {
2347                     ulong divisor = d2.Low64 << shift;
2348                     switch (high)
2349                     {
2350                         case 6:
2351                             Div96By64(ref *(Buf12*)&b.Buf24.U4, divisor);
2352                             goto case 5;
2353                         case 5:
2354                             Div96By64(ref *(Buf12*)&b.Buf24.U3, divisor);
2355                             goto case 4;
2356                         case 4:
2357                             Div96By64(ref *(Buf12*)&b.Buf24.U2, divisor);
2358                             break;
2359                     }
2360                     Div96By64(ref *(Buf12*)&b.Buf24.U1, divisor);
2361                     Div96By64(ref *(Buf12*)&b, divisor);
2362
2363                     d1.Low64 = b.Buf24.Low64 >> shift;
2364                     d1.High = 0;
2365                 }
2366                 else
2367                 {
2368                     Buf12 bufDivisor;
2369                     _ = &bufDivisor; // workaround for CS0165
2370                     DebugPoison(ref bufDivisor);
2371
2372                     bufDivisor.Low64 = d2.Low64 << shift;
2373                     bufDivisor.U2 = (uint)((d2.Mid + ((ulong)d2.High << 32)) >> (32 - shift));
2374
2375                     switch (high)
2376                     {
2377                         case 6:
2378                             Div128By96(ref *(Buf16*)&b.Buf24.U3, ref bufDivisor);
2379                             goto case 5;
2380                         case 5:
2381                             Div128By96(ref *(Buf16*)&b.Buf24.U2, ref bufDivisor);
2382                             goto case 4;
2383                         case 4:
2384                             Div128By96(ref *(Buf16*)&b.Buf24.U1, ref bufDivisor);
2385                             break;
2386                     }
2387                     Div128By96(ref *(Buf16*)&b, ref bufDivisor);
2388
2389                     d1.Low64 = (b.Buf24.Low64 >> shift) + ((ulong)b.Buf24.U2 << (32 - shift) << 32);
2390                     d1.High = b.Buf24.U2 >> shift;
2391                 }
2392             }
2393
2394             // Does an in-place round by the specified scale
2395             internal static void InternalRound(ref DecCalc d, uint scale, MidpointRounding mode)
2396             {
2397                 // the scale becomes the desired decimal count
2398                 d.uflags -= scale << ScaleShift;
2399
2400                 uint remainder, sticky = 0, power;
2401                 // First divide the value by constant 10^9 up to three times
2402                 while (scale >= MaxInt32Scale)
2403                 {
2404                     scale -= MaxInt32Scale;
2405
2406                     const uint divisor = TenToPowerNine;
2407                     uint n = d.uhi;
2408                     if (n == 0)
2409                     {
2410                         ulong tmp = d.Low64;
2411                         ulong div = tmp / divisor;
2412                         d.Low64 = div;
2413                         remainder = (uint)(tmp - div * divisor);
2414                     }
2415                     else
2416                     {
2417                         uint q;
2418                         d.uhi = q = n / divisor;
2419                         remainder = n - q * divisor;
2420                         n = d.umid;
2421                         if ((n | remainder) != 0)
2422                         {
2423                             d.umid = q = (uint)((((ulong)remainder << 32) | n) / divisor);
2424                             remainder = n - q * divisor;
2425                         }
2426                         n = d.ulo;
2427                         if ((n | remainder) != 0)
2428                         {
2429                             d.ulo = q = (uint)((((ulong)remainder << 32) | n) / divisor);
2430                             remainder = n - q * divisor;
2431                         }
2432                     }
2433                     power = divisor;
2434                     if (scale == 0)
2435                         goto checkRemainder;
2436                     sticky |= remainder;
2437                 }
2438
2439                 {
2440                     power = s_powers10[scale];
2441                     // TODO: https://github.com/dotnet/coreclr/issues/3439
2442                     uint n = d.uhi;
2443                     if (n == 0)
2444                     {
2445                         ulong tmp = d.Low64;
2446                         if (tmp == 0)
2447                         {
2448                             if (mode <= MidpointRounding.ToZero)
2449                                 goto done;
2450                             remainder = 0;
2451                             goto checkRemainder;
2452                         }
2453                         ulong div = tmp / power;
2454                         d.Low64 = div;
2455                         remainder = (uint)(tmp - div * power);
2456                     }
2457                     else
2458                     {
2459                         uint q;
2460                         d.uhi = q = n / power;
2461                         remainder = n - q * power;
2462                         n = d.umid;
2463                         if ((n | remainder) != 0)
2464                         {
2465                             d.umid = q = (uint)((((ulong)remainder << 32) | n) / power);
2466                             remainder = n - q * power;
2467                         }
2468                         n = d.ulo;
2469                         if ((n | remainder) != 0)
2470                         {
2471                             d.ulo = q = (uint)((((ulong)remainder << 32) | n) / power);
2472                             remainder = n - q * power;
2473                         }
2474                     }
2475                 }
2476
2477 checkRemainder:
2478                 if (mode == MidpointRounding.ToZero)
2479                     goto done;
2480                 else if (mode == MidpointRounding.ToEven)
2481                 {
2482                     // To do IEEE rounding, we add LSB of result to sticky bits so either causes round up if remainder * 2 == last divisor.
2483                     remainder <<= 1;
2484                     if ((sticky | d.ulo & 1) != 0)
2485                         remainder++;
2486                     if (power >= remainder)
2487                         goto done;
2488                 }
2489                 else if (mode == MidpointRounding.AwayFromZero)
2490                 {
2491                     // Round away from zero at the mid point.
2492                     remainder <<= 1;
2493                     if (power > remainder)
2494                         goto done;
2495                 }
2496                 else if (mode == MidpointRounding.ToNegativeInfinity)
2497                 {
2498                     // Round toward -infinity if we have chopped off a non-zero amount from a negative value.
2499                     if ((remainder | sticky) == 0 || !d.IsNegative)
2500                         goto done;
2501                 }
2502                 else
2503                 {
2504                     Debug.Assert(mode == MidpointRounding.ToPositiveInfinity);
2505                     // Round toward infinity if we have chopped off a non-zero amount from a positive value.
2506                     if ((remainder | sticky) == 0 || d.IsNegative)
2507                         goto done;
2508                 }
2509                 if (++d.Low64 == 0)
2510                     d.uhi++;
2511 done:
2512                 return;
2513             }
2514
2515             internal static uint DecDivMod1E9(ref DecCalc value)
2516             {
2517                 ulong high64 = ((ulong)value.uhi << 32) + value.umid;
2518                 ulong div64 = high64 / TenToPowerNine;
2519                 value.uhi = (uint)(div64 >> 32);
2520                 value.umid = (uint)div64;
2521
2522                 ulong num = ((high64 - (uint)div64 * TenToPowerNine) << 32) + value.ulo;
2523                 uint div = (uint)(num / TenToPowerNine);
2524                 value.ulo = div;
2525                 return (uint)num - div * TenToPowerNine;
2526             }
2527
2528             struct PowerOvfl
2529             {
2530                 public readonly uint Hi;
2531                 public readonly ulong MidLo;
2532
2533                 public PowerOvfl(uint hi, uint mid, uint lo)
2534                 {
2535                     Hi = hi;
2536                     MidLo = ((ulong)mid << 32) + lo;
2537                 }
2538             }
2539
2540             static readonly PowerOvfl[] PowerOvflValues = new[]
2541             {
2542                 // This is a table of the largest values that can be in the upper two
2543                 // uints of a 96-bit number that will not overflow when multiplied
2544                 // by a given power.  For the upper word, this is a table of
2545                 // 2^32 / 10^n for 1 <= n <= 8.  For the lower word, this is the
2546                 // remaining fraction part * 2^32.  2^32 = 4294967296.
2547                 //
2548                 new PowerOvfl(429496729, 2576980377, 2576980377),  // 10^1 remainder 0.6
2549                 new PowerOvfl(42949672,  4123168604, 687194767),   // 10^2 remainder 0.16
2550                 new PowerOvfl(4294967,   1271310319, 2645699854),  // 10^3 remainder 0.616
2551                 new PowerOvfl(429496,    3133608139, 694066715),   // 10^4 remainder 0.1616
2552                 new PowerOvfl(42949,     2890341191, 2216890319),  // 10^5 remainder 0.51616
2553                 new PowerOvfl(4294,      4154504685, 2369172679),  // 10^6 remainder 0.551616
2554                 new PowerOvfl(429,       2133437386, 4102387834),  // 10^7 remainder 0.9551616
2555                 new PowerOvfl(42,        4078814305, 410238783),   // 10^8 remainder 0.09991616
2556             };
2557
2558             [StructLayout(LayoutKind.Explicit)]
2559             private struct Buf12
2560             {
2561                 [FieldOffset(0 * 4)]
2562                 public uint U0;
2563                 [FieldOffset(1 * 4)]
2564                 public uint U1;
2565                 [FieldOffset(2 * 4)]
2566                 public uint U2;
2567
2568                 [FieldOffset(0)]
2569                 private ulong ulo64LE;
2570                 [FieldOffset(4)]
2571                 private ulong uhigh64LE;
2572
2573                 public ulong Low64
2574                 {
2575 #if BIGENDIAN
2576                     get => ((ulong)U1 << 32) | U0;
2577                     set { U1 = (uint)(value >> 32); U0 = (uint)value; }
2578 #else
2579                     get => ulo64LE;
2580                     set => ulo64LE = value;
2581 #endif
2582                 }
2583
2584                 /// <summary>
2585                 /// U1-U2 combined (overlaps with Low64)
2586                 /// </summary>
2587                 public ulong High64
2588                 {
2589 #if BIGENDIAN
2590                     get => ((ulong)U2 << 32) | U1;
2591                     set { U2 = (uint)(value >> 32); U1 = (uint)value; }
2592 #else
2593                     get => uhigh64LE;
2594                     set => uhigh64LE = value;
2595 #endif
2596                 }
2597             }
2598
2599             [StructLayout(LayoutKind.Explicit)]
2600             private struct Buf16
2601             {
2602                 [FieldOffset(0 * 4)]
2603                 public uint U0;
2604                 [FieldOffset(1 * 4)]
2605                 public uint U1;
2606                 [FieldOffset(2 * 4)]
2607                 public uint U2;
2608                 [FieldOffset(3 * 4)]
2609                 public uint U3;
2610
2611                 [FieldOffset(0 * 8)]
2612                 private ulong ulo64LE;
2613                 [FieldOffset(1 * 8)]
2614                 private ulong uhigh64LE;
2615
2616                 public ulong Low64
2617                 {
2618 #if BIGENDIAN
2619                     get => ((ulong)U1 << 32) | U0;
2620                     set { U1 = (uint)(value >> 32); U0 = (uint)value; }
2621 #else
2622                     get => ulo64LE;
2623                     set => ulo64LE = value;
2624 #endif
2625                 }
2626
2627                 public ulong High64
2628                 {
2629 #if BIGENDIAN
2630                     get => ((ulong)U3 << 32) | U2;
2631                     set { U3 = (uint)(value >> 32); U2 = (uint)value; }
2632 #else
2633                     get => uhigh64LE;
2634                     set => uhigh64LE = value;
2635 #endif
2636                 }
2637             }
2638
2639             [StructLayout(LayoutKind.Explicit)]
2640             private struct Buf24
2641             {
2642                 [FieldOffset(0 * 4)]
2643                 public uint U0;
2644                 [FieldOffset(1 * 4)]
2645                 public uint U1;
2646                 [FieldOffset(2 * 4)]
2647                 public uint U2;
2648                 [FieldOffset(3 * 4)]
2649                 public uint U3;
2650                 [FieldOffset(4 * 4)]
2651                 public uint U4;
2652                 [FieldOffset(5 * 4)]
2653                 public uint U5;
2654
2655                 [FieldOffset(0 * 8)]
2656                 private ulong ulo64LE;
2657                 [FieldOffset(1 * 8)]
2658                 private ulong umid64LE;
2659                 [FieldOffset(2 * 8)]
2660                 private ulong uhigh64LE;
2661
2662                 public ulong Low64
2663                 {
2664 #if BIGENDIAN
2665                     get => ((ulong)U1 << 32) | U0;
2666                     set { U1 = (uint)(value >> 32); U0 = (uint)value; }
2667 #else
2668                     get => ulo64LE;
2669                     set => ulo64LE = value;
2670 #endif
2671                 }
2672
2673                 public ulong Mid64
2674                 {
2675 #if BIGENDIAN
2676                     get => ((ulong)U3 << 32) | U2;
2677                     set { U3 = (uint)(value >> 32); U2 = (uint)value; }
2678 #else
2679                     get => umid64LE;
2680                     set => umid64LE = value;
2681 #endif
2682                 }
2683
2684                 public ulong High64
2685                 {
2686 #if BIGENDIAN
2687                     get => ((ulong)U5 << 32) | U4;
2688                     set { U5 = (uint)(value >> 32); U4 = (uint)value; }
2689 #else
2690                     get => uhigh64LE;
2691                     set => uhigh64LE = value;
2692 #endif
2693                 }
2694
2695                 public int Length => 6;
2696             }
2697
2698             private struct Buf28
2699             {
2700                 public Buf24 Buf24;
2701                 public uint U6;
2702
2703                 public int Length => 7;
2704             }
2705         }
2706     }
2707 }