// kernarg_segment_alignment is specified as log of the alignment.
// The minimum alignment is 16.
+ // FIXME: The metadata treats the minimum as 4?
Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
}
Align MaxKernArgAlign;
HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
MaxKernArgAlign);
+ HSACodeProps.mKernargSegmentAlign =
+ std::max(MaxKernArgAlign, Align(4)).value();
+
HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
- HSACodeProps.mKernargSegmentAlign =
- std::max(MaxKernArgAlign, Align(4)).value();
HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
Kern.getDocument()->getNode(ProgramInfo.LDSSize);
Kern[".private_segment_fixed_size"] =
Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
+
+ // FIXME: The metadata treats the minimum as 16?
Kern[".kernarg_segment_align"] =
Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
Kern[".wavefront_size"] =
if (ImplicitBytes != 0) {
const Align Alignment = getAlignmentForImplicitArgPtr();
TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
+ MaxAlign = std::max(MaxAlign, Alignment);
}
// Being able to dereference past the end is useful for emitting scalar loads.
; HSA-LABEL: - Name: opencl_kernel_implicitarg_ptr_empty
; HSA: KernargSegmentSize: 48
-; HSA: KernargSegmentAlign: 4
+; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: kernel_implicitarg_ptr
; HSA: KernargSegmentSize: 112
; HSA-LABEL: - Name: opencl_kernel_implicitarg_ptr
; HSA: KernargSegmentSize: 160
-; HSA: KernargSegmentAlign: 4
+; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: kernel_call_implicitarg_ptr_func_empty
; HSA: KernargSegmentSize: 0
; HSA-LABEL: - Name: opencl_kernel_call_implicitarg_ptr_func_empty
; HSA: KernargSegmentSize: 48
-; HSA: KernargSegmentAlign: 4
+; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: kernel_call_implicitarg_ptr_func
; HSA: KernargSegmentSize: 112
; HSA-LABEL: - Name: opencl_kernel_call_implicitarg_ptr_func
; HSA: KernargSegmentSize: 160
-; HSA: KernargSegmentAlign: 4
+; HSA: KernargSegmentAlign: 8
; HSA-LABEL: - Name: kernel_call_kernarg_implicitarg_ptr_func
; HSA: KernargSegmentSize: 112