1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 #if V8_TARGET_ARCH_ARM64
9 #include "src/arm64/simulator-arm64.h"
10 #include "src/codegen.h"
11 #include "src/macro-assembler.h"
16 #define __ ACCESS_MASM(masm)
18 #if defined(USE_SIMULATOR)
19 byte* fast_exp_arm64_machine_code = NULL;
20 double fast_exp_simulator(double x) {
21 Simulator * simulator = Simulator::current(Isolate::Current());
22 Simulator::CallArgument args[] = {
23 Simulator::CallArgument(x),
24 Simulator::CallArgument::End()
26 return simulator->CallDouble(fast_exp_arm64_machine_code, args);
31 UnaryMathFunction CreateExpFunction() {
32 if (!FLAG_fast_math) return &std::exp;
34 // Use the Math.exp implemetation in MathExpGenerator::EmitMathExp() to create
35 // an AAPCS64-compliant exp() function. This will be faster than the C
36 // library's exp() function, but probably less accurate.
39 static_cast<byte*>(base::OS::Allocate(1 * KB, &actual_size, true));
40 if (buffer == NULL) return &std::exp;
42 ExternalReference::InitializeMathExpData();
43 MacroAssembler masm(NULL, buffer, static_cast<int>(actual_size));
44 masm.SetStackPointer(csp);
46 // The argument will be in d0 on entry.
47 DoubleRegister input = d0;
48 // Use other caller-saved registers for all other values.
49 DoubleRegister result = d1;
50 DoubleRegister double_temp1 = d2;
51 DoubleRegister double_temp2 = d3;
56 MathExpGenerator::EmitMathExp(&masm, input, result,
57 double_temp1, double_temp2,
59 // Move the result to the return register.
60 masm.Fmov(d0, result);
65 DCHECK(!RelocInfo::RequiresRelocation(desc));
67 CpuFeatures::FlushICache(buffer, actual_size);
68 base::OS::ProtectCode(buffer, actual_size);
70 #if !defined(USE_SIMULATOR)
71 return FUNCTION_CAST<UnaryMathFunction>(buffer);
73 fast_exp_arm64_machine_code = buffer;
74 return &fast_exp_simulator;
79 UnaryMathFunction CreateSqrtFunction() {
84 // -------------------------------------------------------------------------
85 // Platform-specific RuntimeCallHelper functions.
87 void StubRuntimeCallHelper::BeforeCall(MacroAssembler* masm) const {
88 masm->EnterFrame(StackFrame::INTERNAL);
89 DCHECK(!masm->has_frame());
90 masm->set_has_frame(true);
94 void StubRuntimeCallHelper::AfterCall(MacroAssembler* masm) const {
95 masm->LeaveFrame(StackFrame::INTERNAL);
96 DCHECK(masm->has_frame());
97 masm->set_has_frame(false);
101 // -------------------------------------------------------------------------
104 void ElementsTransitionGenerator::GenerateMapChangeElementsTransition(
105 MacroAssembler* masm,
110 AllocationSiteMode mode,
111 Label* allocation_memento_found) {
113 "ElementsTransitionGenerator::GenerateMapChangeElementsTransition");
114 DCHECK(!AreAliased(receiver, key, value, target_map));
116 if (mode == TRACK_ALLOCATION_SITE) {
117 DCHECK(allocation_memento_found != NULL);
118 __ JumpIfJSArrayHasAllocationMemento(receiver, x10, x11,
119 allocation_memento_found);
122 // Set transitioned map.
123 __ Str(target_map, FieldMemOperand(receiver, HeapObject::kMapOffset));
124 __ RecordWriteField(receiver,
125 HeapObject::kMapOffset,
135 void ElementsTransitionGenerator::GenerateSmiToDouble(
136 MacroAssembler* masm,
141 AllocationSiteMode mode,
143 ASM_LOCATION("ElementsTransitionGenerator::GenerateSmiToDouble");
144 Label gc_required, only_change_map;
145 Register elements = x4;
146 Register length = x5;
147 Register array_size = x6;
150 Register scratch = x6;
152 // Verify input registers don't conflict with locals.
153 DCHECK(!AreAliased(receiver, key, value, target_map,
154 elements, length, array_size, array));
156 if (mode == TRACK_ALLOCATION_SITE) {
157 __ JumpIfJSArrayHasAllocationMemento(receiver, x10, x11, fail);
160 // Check for empty arrays, which only require a map transition and no changes
161 // to the backing store.
162 __ Ldr(elements, FieldMemOperand(receiver, JSObject::kElementsOffset));
163 __ JumpIfRoot(elements, Heap::kEmptyFixedArrayRootIndex, &only_change_map);
166 __ Ldrsw(length, UntagSmiFieldMemOperand(elements,
167 FixedArray::kLengthOffset));
169 // Allocate new FixedDoubleArray.
170 __ Lsl(array_size, length, kDoubleSizeLog2);
171 __ Add(array_size, array_size, FixedDoubleArray::kHeaderSize);
172 __ Allocate(array_size, array, x10, x11, &gc_required, DOUBLE_ALIGNMENT);
173 // Register array is non-tagged heap object.
175 // Set the destination FixedDoubleArray's length and map.
176 Register map_root = array_size;
177 __ LoadRoot(map_root, Heap::kFixedDoubleArrayMapRootIndex);
178 __ SmiTag(x11, length);
179 __ Str(x11, MemOperand(array, FixedDoubleArray::kLengthOffset));
180 __ Str(map_root, MemOperand(array, HeapObject::kMapOffset));
182 __ Str(target_map, FieldMemOperand(receiver, HeapObject::kMapOffset));
183 __ RecordWriteField(receiver, HeapObject::kMapOffset, target_map, scratch,
184 kLRHasBeenSaved, kDontSaveFPRegs, OMIT_REMEMBERED_SET,
187 // Replace receiver's backing store with newly created FixedDoubleArray.
188 __ Add(x10, array, kHeapObjectTag);
189 __ Str(x10, FieldMemOperand(receiver, JSObject::kElementsOffset));
190 __ RecordWriteField(receiver, JSObject::kElementsOffset, x10,
191 scratch, kLRHasBeenSaved, kDontSaveFPRegs,
192 EMIT_REMEMBERED_SET, OMIT_SMI_CHECK);
194 // Prepare for conversion loop.
195 Register src_elements = x10;
196 Register dst_elements = x11;
197 Register dst_end = x12;
198 __ Add(src_elements, elements, FixedArray::kHeaderSize - kHeapObjectTag);
199 __ Add(dst_elements, array, FixedDoubleArray::kHeaderSize);
200 __ Add(dst_end, dst_elements, Operand(length, LSL, kDoubleSizeLog2));
202 FPRegister nan_d = d1;
203 __ Fmov(nan_d, rawbits_to_double(kHoleNanInt64));
208 __ Bind(&only_change_map);
209 __ Str(target_map, FieldMemOperand(receiver, HeapObject::kMapOffset));
210 __ RecordWriteField(receiver, HeapObject::kMapOffset, target_map, scratch,
211 kLRHasNotBeenSaved, kDontSaveFPRegs, OMIT_REMEMBERED_SET,
215 // Call into runtime if GC is required.
216 __ Bind(&gc_required);
220 // Iterate over the array, copying and coverting smis to doubles. If an
221 // element is non-smi, write a hole to the destination.
225 __ Ldr(x13, MemOperand(src_elements, kPointerSize, PostIndex));
226 __ SmiUntagToDouble(d0, x13, kSpeculativeUntag);
227 __ Tst(x13, kSmiTagMask);
228 __ Fcsel(d0, d0, nan_d, eq);
229 __ Str(d0, MemOperand(dst_elements, kDoubleSize, PostIndex));
232 __ Cmp(dst_elements, dst_end);
241 void ElementsTransitionGenerator::GenerateDoubleToObject(
242 MacroAssembler* masm,
247 AllocationSiteMode mode,
249 ASM_LOCATION("ElementsTransitionGenerator::GenerateDoubleToObject");
250 Register elements = x4;
251 Register array_size = x6;
253 Register length = x5;
255 // Verify input registers don't conflict with locals.
256 DCHECK(!AreAliased(receiver, key, value, target_map,
257 elements, array_size, array, length));
259 if (mode == TRACK_ALLOCATION_SITE) {
260 __ JumpIfJSArrayHasAllocationMemento(receiver, x10, x11, fail);
263 // Check for empty arrays, which only require a map transition and no changes
264 // to the backing store.
265 Label only_change_map;
267 __ Ldr(elements, FieldMemOperand(receiver, JSObject::kElementsOffset));
268 __ JumpIfRoot(elements, Heap::kEmptyFixedArrayRootIndex, &only_change_map);
271 // TODO(all): These registers may not need to be pushed. Examine
272 // RecordWriteStub and check whether it's needed.
273 __ Push(target_map, receiver, key, value);
274 __ Ldrsw(length, UntagSmiFieldMemOperand(elements,
275 FixedArray::kLengthOffset));
276 // Allocate new FixedArray.
278 __ Mov(array_size, FixedDoubleArray::kHeaderSize);
279 __ Add(array_size, array_size, Operand(length, LSL, kPointerSizeLog2));
280 __ Allocate(array_size, array, x10, x11, &gc_required, NO_ALLOCATION_FLAGS);
282 // Set destination FixedDoubleArray's length and map.
283 Register map_root = array_size;
284 __ LoadRoot(map_root, Heap::kFixedArrayMapRootIndex);
285 __ SmiTag(x11, length);
286 __ Str(x11, MemOperand(array, FixedDoubleArray::kLengthOffset));
287 __ Str(map_root, MemOperand(array, HeapObject::kMapOffset));
289 // Prepare for conversion loop.
290 Register src_elements = x10;
291 Register dst_elements = x11;
292 Register dst_end = x12;
293 __ Add(src_elements, elements,
294 FixedDoubleArray::kHeaderSize - kHeapObjectTag);
295 __ Add(dst_elements, array, FixedArray::kHeaderSize);
296 __ Add(array, array, kHeapObjectTag);
297 __ Add(dst_end, dst_elements, Operand(length, LSL, kPointerSizeLog2));
299 Register the_hole = x14;
300 Register heap_num_map = x15;
301 __ LoadRoot(the_hole, Heap::kTheHoleValueRootIndex);
302 __ LoadRoot(heap_num_map, Heap::kHeapNumberMapRootIndex);
307 // Call into runtime if GC is required.
308 __ Bind(&gc_required);
309 __ Pop(value, key, receiver, target_map);
314 Label loop, convert_hole;
316 __ Ldr(x13, MemOperand(src_elements, kPointerSize, PostIndex));
317 __ Cmp(x13, kHoleNanInt64);
318 __ B(eq, &convert_hole);
320 // Non-hole double, copy value into a heap number.
321 Register heap_num = length;
322 Register scratch = array_size;
323 Register scratch2 = elements;
324 __ AllocateHeapNumber(heap_num, &gc_required, scratch, scratch2,
326 __ Mov(x13, dst_elements);
327 __ Str(heap_num, MemOperand(dst_elements, kPointerSize, PostIndex));
328 __ RecordWrite(array, x13, heap_num, kLRHasBeenSaved, kDontSaveFPRegs,
329 EMIT_REMEMBERED_SET, OMIT_SMI_CHECK);
333 // Replace the-hole NaN with the-hole pointer.
334 __ Bind(&convert_hole);
335 __ Str(the_hole, MemOperand(dst_elements, kPointerSize, PostIndex));
338 __ Cmp(dst_elements, dst_end);
342 __ Pop(value, key, receiver, target_map);
343 // Replace receiver's backing store with newly created and filled FixedArray.
344 __ Str(array, FieldMemOperand(receiver, JSObject::kElementsOffset));
345 __ RecordWriteField(receiver, JSObject::kElementsOffset, array, x13,
346 kLRHasBeenSaved, kDontSaveFPRegs, EMIT_REMEMBERED_SET,
350 __ Bind(&only_change_map);
351 __ Str(target_map, FieldMemOperand(receiver, HeapObject::kMapOffset));
352 __ RecordWriteField(receiver, HeapObject::kMapOffset, target_map, x13,
353 kLRHasNotBeenSaved, kDontSaveFPRegs, OMIT_REMEMBERED_SET,
358 CodeAgingHelper::CodeAgingHelper() {
359 DCHECK(young_sequence_.length() == kNoCodeAgeSequenceLength);
360 // The sequence of instructions that is patched out for aging code is the
361 // following boilerplate stack-building prologue that is found both in
362 // FUNCTION and OPTIMIZED_FUNCTION code:
363 PatchingAssembler patcher(young_sequence_.start(),
364 young_sequence_.length() / kInstructionSize);
365 // The young sequence is the frame setup code for FUNCTION code types. It is
366 // generated by FullCodeGenerator::Generate.
367 MacroAssembler::EmitFrameSetupForCodeAgePatching(&patcher);
370 const int length = kCodeAgeStubEntryOffset / kInstructionSize;
371 DCHECK(old_sequence_.length() >= kCodeAgeStubEntryOffset);
372 PatchingAssembler patcher_old(old_sequence_.start(), length);
373 MacroAssembler::EmitCodeAgeSequence(&patcher_old, NULL);
379 bool CodeAgingHelper::IsOld(byte* candidate) const {
380 return memcmp(candidate, old_sequence_.start(), kCodeAgeStubEntryOffset) == 0;
385 bool Code::IsYoungSequence(Isolate* isolate, byte* sequence) {
386 return MacroAssembler::IsYoungSequence(isolate, sequence);
390 void Code::GetCodeAgeAndParity(Isolate* isolate, byte* sequence, Age* age,
391 MarkingParity* parity) {
392 if (IsYoungSequence(isolate, sequence)) {
393 *age = kNoAgeCodeAge;
394 *parity = NO_MARKING_PARITY;
396 byte* target = sequence + kCodeAgeStubEntryOffset;
397 Code* stub = GetCodeFromTargetAddress(Memory::Address_at(target));
398 GetCodeAgeAndParity(stub, age, parity);
403 void Code::PatchPlatformCodeAge(Isolate* isolate,
406 MarkingParity parity) {
407 PatchingAssembler patcher(sequence,
408 kNoCodeAgeSequenceLength / kInstructionSize);
409 if (age == kNoAgeCodeAge) {
410 MacroAssembler::EmitFrameSetupForCodeAgePatching(&patcher);
412 Code * stub = GetCodeAgeStub(isolate, age, parity);
413 MacroAssembler::EmitCodeAgeSequence(&patcher, stub);
418 void StringCharLoadGenerator::Generate(MacroAssembler* masm,
422 Label* call_runtime) {
423 DCHECK(string.Is64Bits() && index.Is32Bits() && result.Is64Bits());
424 // Fetch the instance type of the receiver into result register.
425 __ Ldr(result, FieldMemOperand(string, HeapObject::kMapOffset));
426 __ Ldrb(result, FieldMemOperand(result, Map::kInstanceTypeOffset));
428 // We need special handling for indirect strings.
429 Label check_sequential;
430 __ TestAndBranchIfAllClear(result, kIsIndirectStringMask, &check_sequential);
432 // Dispatch on the indirect string shape: slice or cons.
434 __ TestAndBranchIfAllClear(result, kSlicedNotConsMask, &cons_string);
437 Label indirect_string_loaded;
439 UntagSmiFieldMemOperand(string, SlicedString::kOffsetOffset));
440 __ Ldr(string, FieldMemOperand(string, SlicedString::kParentOffset));
441 __ Add(index, index, result.W());
442 __ B(&indirect_string_loaded);
444 // Handle cons strings.
445 // Check whether the right hand side is the empty string (i.e. if
446 // this is really a flat string in a cons string). If that is not
447 // the case we would rather go to the runtime system now to flatten
449 __ Bind(&cons_string);
450 __ Ldr(result, FieldMemOperand(string, ConsString::kSecondOffset));
451 __ JumpIfNotRoot(result, Heap::kempty_stringRootIndex, call_runtime);
452 // Get the first of the two strings and load its instance type.
453 __ Ldr(string, FieldMemOperand(string, ConsString::kFirstOffset));
455 __ Bind(&indirect_string_loaded);
456 __ Ldr(result, FieldMemOperand(string, HeapObject::kMapOffset));
457 __ Ldrb(result, FieldMemOperand(result, Map::kInstanceTypeOffset));
459 // Distinguish sequential and external strings. Only these two string
460 // representations can reach here (slices and flat cons strings have been
461 // reduced to the underlying sequential or external string).
462 Label external_string, check_encoding;
463 __ Bind(&check_sequential);
464 STATIC_ASSERT(kSeqStringTag == 0);
465 __ TestAndBranchIfAnySet(result, kStringRepresentationMask, &external_string);
467 // Prepare sequential strings
468 STATIC_ASSERT(SeqTwoByteString::kHeaderSize == SeqOneByteString::kHeaderSize);
469 __ Add(string, string, SeqTwoByteString::kHeaderSize - kHeapObjectTag);
470 __ B(&check_encoding);
472 // Handle external strings.
473 __ Bind(&external_string);
474 if (FLAG_debug_code) {
475 // Assert that we do not have a cons or slice (indirect strings) here.
476 // Sequential strings have already been ruled out.
477 __ Tst(result, kIsIndirectStringMask);
478 __ Assert(eq, kExternalStringExpectedButNotFound);
480 // Rule out short external strings.
481 STATIC_ASSERT(kShortExternalStringTag != 0);
482 // TestAndBranchIfAnySet can emit Tbnz. Do not use it because call_runtime
483 // can be bound far away in deferred code.
484 __ Tst(result, kShortExternalStringMask);
485 __ B(ne, call_runtime);
486 __ Ldr(string, FieldMemOperand(string, ExternalString::kResourceDataOffset));
489 __ Bind(&check_encoding);
490 STATIC_ASSERT(kTwoByteStringTag == 0);
491 __ TestAndBranchIfAnySet(result, kStringEncodingMask, &ascii);
493 __ Ldrh(result, MemOperand(string, index, SXTW, 1));
497 __ Ldrb(result, MemOperand(string, index, SXTW));
502 static MemOperand ExpConstant(Register base, int index) {
503 return MemOperand(base, index * kDoubleSize);
507 void MathExpGenerator::EmitMathExp(MacroAssembler* masm,
508 DoubleRegister input,
509 DoubleRegister result,
510 DoubleRegister double_temp1,
511 DoubleRegister double_temp2,
515 // TODO(jbramley): There are several instances where fnmsub could be used
516 // instead of fmul and fsub. Doing this changes the result, but since this is
517 // an estimation anyway, does it matter?
519 DCHECK(!AreAliased(input, result,
520 double_temp1, double_temp2,
521 temp1, temp2, temp3));
522 DCHECK(ExternalReference::math_exp_constants(0).address() != NULL);
523 DCHECK(!masm->serializer_enabled()); // External references not serializable.
526 DoubleRegister double_temp3 = result;
527 Register constants = temp3;
529 // The algorithm used relies on some magic constants which are initialized in
530 // ExternalReference::InitializeMathExpData().
532 // Load the address of the start of the array.
533 __ Mov(constants, ExternalReference::math_exp_constants(0));
535 // We have to do a four-way split here:
536 // - If input <= about -708.4, the output always rounds to zero.
537 // - If input >= about 709.8, the output always rounds to +infinity.
538 // - If the input is NaN, the output is NaN.
539 // - Otherwise, the result needs to be calculated.
540 Label result_is_finite_non_zero;
541 // Assert that we can load offset 0 (the small input threshold) and offset 1
542 // (the large input threshold) with a single ldp.
543 DCHECK(kDRegSize == (ExpConstant(constants, 1).offset() -
544 ExpConstant(constants, 0).offset()));
545 __ Ldp(double_temp1, double_temp2, ExpConstant(constants, 0));
547 __ Fcmp(input, double_temp1);
548 __ Fccmp(input, double_temp2, NoFlag, hi);
549 // At this point, the condition flags can be in one of five states:
551 // 1000 -708.4 < input < 709.8 result = exp(input)
552 // 0110 input == 709.8 result = +infinity
553 // 0010 input > 709.8 result = +infinity
554 // 0011 input is NaN result = input
555 // 0000 input <= -708.4 result = +0.0
557 // Continue the common case first. 'mi' tests N == 1.
558 __ B(&result_is_finite_non_zero, mi);
560 // TODO(jbramley): Consider adding a +infinity register for ARM64.
561 __ Ldr(double_temp2, ExpConstant(constants, 2)); // Synthesize +infinity.
563 // Select between +0.0 and +infinity. 'lo' tests C == 0.
564 __ Fcsel(result, fp_zero, double_temp2, lo);
565 // Select between {+0.0 or +infinity} and input. 'vc' tests V == 0.
566 __ Fcsel(result, result, input, vc);
569 // The rest is magic, as described in InitializeMathExpData().
570 __ Bind(&result_is_finite_non_zero);
572 // Assert that we can load offset 3 and offset 4 with a single ldp.
573 DCHECK(kDRegSize == (ExpConstant(constants, 4).offset() -
574 ExpConstant(constants, 3).offset()));
575 __ Ldp(double_temp1, double_temp3, ExpConstant(constants, 3));
576 __ Fmadd(double_temp1, double_temp1, input, double_temp3);
577 __ Fmov(temp2.W(), double_temp1.S());
578 __ Fsub(double_temp1, double_temp1, double_temp3);
580 // Assert that we can load offset 5 and offset 6 with a single ldp.
581 DCHECK(kDRegSize == (ExpConstant(constants, 6).offset() -
582 ExpConstant(constants, 5).offset()));
583 __ Ldp(double_temp2, double_temp3, ExpConstant(constants, 5));
584 // TODO(jbramley): Consider using Fnmsub here.
585 __ Fmul(double_temp1, double_temp1, double_temp2);
586 __ Fsub(double_temp1, double_temp1, input);
588 __ Fmul(double_temp2, double_temp1, double_temp1);
589 __ Fsub(double_temp3, double_temp3, double_temp1);
590 __ Fmul(double_temp3, double_temp3, double_temp2);
592 __ Mov(temp1.W(), Operand(temp2.W(), LSR, 11));
594 __ Ldr(double_temp2, ExpConstant(constants, 7));
595 // TODO(jbramley): Consider using Fnmsub here.
596 __ Fmul(double_temp3, double_temp3, double_temp2);
597 __ Fsub(double_temp3, double_temp3, double_temp1);
599 // The 8th constant is 1.0, so use an immediate move rather than a load.
600 // We can't generate a runtime assertion here as we would need to call Abort
601 // in the runtime and we don't have an Isolate when we generate this code.
602 __ Fmov(double_temp2, 1.0);
603 __ Fadd(double_temp3, double_temp3, double_temp2);
605 __ And(temp2, temp2, 0x7ff);
606 __ Add(temp1, temp1, 0x3ff);
608 // Do the final table lookup.
609 __ Mov(temp3, ExternalReference::math_exp_log_table());
611 __ Add(temp3, temp3, Operand(temp2, LSL, kDRegSizeLog2));
612 __ Ldp(temp2.W(), temp3.W(), MemOperand(temp3));
613 __ Orr(temp1.W(), temp3.W(), Operand(temp1.W(), LSL, 20));
614 __ Bfi(temp2, temp1, 32, 32);
615 __ Fmov(double_temp1, temp2);
617 __ Fmul(result, double_temp3, double_temp1);
624 } } // namespace v8::internal
626 #endif // V8_TARGET_ARCH_ARM64