--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx | FileCheck %s
+define i8 @test_offset(i8* %base) {
+; CHECK-LABEL: test_offset:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movb $0, 7(%rdi)
+; CHECK-NEXT: movw $0, 5(%rdi)
+; CHECK-NEXT: movl $0, 1(%rdi)
+; CHECK-NEXT: movzwl -4(%rdi), %eax
+; CHECK-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movb -2(%rdi), %al
+; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movl 1(%rdi), %eax
+; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movzwl 5(%rdi), %eax
+; CHECK-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movb 7(%rdi), %al
+; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movl 8(%rdi), %eax
+; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: retq
+entry:
+ %z = alloca [128 x i8], align 16
+ %gep0 = getelementptr inbounds i8, i8* %base, i64 7
+ store volatile i8 0, i8* %gep0
+ %gep1 = getelementptr inbounds i8, i8* %base, i64 5
+ %bc1 = bitcast i8* %gep1 to i16*
+ store volatile i16 0, i16* %bc1
+ %gep2 = getelementptr inbounds i8, i8* %base, i64 1
+ %bc2 = bitcast i8* %gep2 to i32*
+ store volatile i32 0, i32* %bc2
+
+ %y1 = getelementptr inbounds i8, i8* %base, i64 -4
+ %y2 = bitcast [128 x i8]* %z to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %y2, i8* %y1, i64 16, i1 false)
+
+ %gep4 = getelementptr inbounds [128 x i8], [128 x i8]* %z, i64 0, i64 4
+ %ret = load i8, i8* %gep4
+ ret i8 %ret
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)