case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTPOP:
- // Zero extend the argument.
- Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+ // Zero extend the argument unless its cttz, then use any_extend.
+ if (Node->getOpcode() == ISD::CTTZ ||
+ Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
+ Tmp1 = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(0));
+ else
+ Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+
if (Node->getOpcode() == ISD::CTTZ) {
// The count is the same in the promoted type except if the original
// value was zero. This can be handled by setting the bit just off
; FUNC-LABEL: {{^}}v_cttz_zero_undef_i8_with_select:
; SI-NOSDWA: v_ffbl_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
-; SI-SDWA: v_ffbl_b32_sdwa
+; SI-SDWA: v_ffbl_b32_e32
; EG: MEM_RAT MSKOR
define amdgpu_kernel void @v_cttz_zero_undef_i8_with_select(i8 addrspace(1)* noalias %out, i8 addrspace(1)* nocapture readonly %arrayidx) nounwind {
%val = load i8, i8 addrspace(1)* %arrayidx, align 1
; FUNC-LABEL: {{^}}v_cttz_zero_undef_i16_with_select:
; SI-NOSDWA: v_ffbl_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
-; SI-SDWA: v_ffbl_b32_sdwa
+; SI-SDWA: v_ffbl_b32_e32
; EG: MEM_RAT MSKOR
define amdgpu_kernel void @v_cttz_zero_undef_i16_with_select(i16 addrspace(1)* noalias %out, i16 addrspace(1)* nocapture readonly %arrayidx) nounwind {
%val = load i16, i16 addrspace(1)* %arrayidx, align 1
; FUNC-LABEL: {{^}}v_cttz_i8_sel_eq_neg1:
; SI: {{buffer|flat}}_load_ubyte
; SI-NOSDWA: v_ffbl_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
-; SI-SDWA: v_ffbl_b32_sdwa
+; SI-SDWA: v_ffbl_b32_e32
; EG: MEM_RAT MSKOR
; EG: FFBL_INT
define amdgpu_kernel void @v_cttz_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* nocapture readonly %arrayidx) nounwind {
define i8 @cttz_i8(i8 %x) {
; X32-LABEL: cttz_i8:
; X32: # %bb.0:
-; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: bsfl %eax, %eax
+; X32-NEXT: bsfl {{[0-9]+}}(%esp), %eax
; X32-NEXT: # kill: def $al killed $al killed $eax
; X32-NEXT: retl
;
; X64-LABEL: cttz_i8:
; X64: # %bb.0:
-; X64-NEXT: movzbl %dil, %eax
-; X64-NEXT: bsfl %eax, %eax
+; X64-NEXT: bsfl %edi, %eax
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
;
; X32-CLZ-LABEL: cttz_i8:
; X32-CLZ: # %bb.0:
-; X32-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X32-CLZ-NEXT: tzcntl %eax, %eax
+; X32-CLZ-NEXT: tzcntl {{[0-9]+}}(%esp), %eax
; X32-CLZ-NEXT: # kill: def $al killed $al killed $eax
; X32-CLZ-NEXT: retl
;
; X64-CLZ-LABEL: cttz_i8:
; X64-CLZ: # %bb.0:
-; X64-CLZ-NEXT: movzbl %dil, %eax
-; X64-CLZ-NEXT: tzcntl %eax, %eax
+; X64-CLZ-NEXT: tzcntl %edi, %eax
; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
; X64-CLZ-NEXT: retq
%tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
;
; X32-CLZ-LABEL: cttz_i8_zero_test:
; X32-CLZ: # %bb.0:
-; X32-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X32-CLZ-NEXT: orl $256, %eax # imm = 0x100
+; X32-CLZ-NEXT: movl $256, %eax # imm = 0x100
+; X32-CLZ-NEXT: orl {{[0-9]+}}(%esp), %eax
; X32-CLZ-NEXT: tzcntl %eax, %eax
; X32-CLZ-NEXT: # kill: def $al killed $al killed $eax
; X32-CLZ-NEXT: retl
;
; X64-CLZ-LABEL: cttz_i8_zero_test:
; X64-CLZ: # %bb.0:
-; X64-CLZ-NEXT: movzbl %dil, %eax
-; X64-CLZ-NEXT: orl $256, %eax # imm = 0x100
-; X64-CLZ-NEXT: tzcntl %eax, %eax
+; X64-CLZ-NEXT: orl $256, %edi # imm = 0x100
+; X64-CLZ-NEXT: tzcntl %edi, %eax
; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax
; X64-CLZ-NEXT: retq
%tmp1 = call i8 @llvm.cttz.i8(i8 %n, i1 false)