From bcd82afad64a22b15000de350d075b10f2de273a Mon Sep 17 00:00:00 2001 From: Fariborz Jahanian Date: Tue, 5 Aug 2014 18:37:48 +0000 Subject: [PATCH] Introduce f[no-]max-unknown-pointer-align=[number] option to instruct the code generator to not enforce a higher alignment than the given number (of bytes) when accessing memory via an opaque pointer or reference. Patch reviewed by John McCall (with post-commit review pending). rdar://16254558 llvm-svn: 214911 --- clang/docs/UsersManual.rst | 31 ++++++++++++ clang/include/clang/Basic/LangOptions.def | 2 + clang/include/clang/Driver/Options.td | 3 ++ clang/lib/CodeGen/CodeGenFunction.h | 7 ++- clang/lib/Driver/Tools.cpp | 15 ++++++ clang/lib/Frontend/CompilerInvocation.cpp | 1 + .../test/CodeGenCXX/align-avx-complete-objects.cpp | 57 ++++++++++++++++++++++ clang/test/Driver/darwin-max-type-align.c | 15 ++++++ clang/test/Driver/rewrite-legacy-objc.m | 6 +-- clang/test/Driver/rewrite-objc.m | 2 +- 10 files changed, 134 insertions(+), 5 deletions(-) create mode 100644 clang/test/CodeGenCXX/align-avx-complete-objects.cpp create mode 100644 clang/test/Driver/darwin-max-type-align.c diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 9c64b04..6f66c4d 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1111,6 +1111,37 @@ are listed below. This option restricts the generated code to use general registers only. This only applies to the AArch64 architecture. +**-f[no-]max-unknown-pointer-align=[number]** + Instruct the code generator to not enforce a higher alignment than the given + number (of bytes) when accessing memory via an opaque pointer or reference. + This cap is ignored when directly accessing a variable or when the pointee + type has an explicit “aligned” attribute. + + The value should usually be determined by the properties of the system allocator. + Some builtin types, especially vector types, have very high natural alignments; + when working with values of those types, Clang usually wants to use instructions + that take advantage of that alignment. However, many system allocators do + not promise to return memory that is more than 8-byte or 16-byte-aligned. Use + this option to limit the alignment that the compiler can assume for an arbitrary + pointer, which may point onto the heap. + + This option does not affect the ABI alignment of types; the layout of structs and + unions and the value returned by the alignof operator remain the same. + + This option can be overridden on a case-by-case basis by putting an explicit + “aligned” alignment on a struct, union, or typedef. For example: + + .. code-block:: console + + #include + // Make an aligned typedef of the AVX-512 16-int vector type. + typedef __v16si __aligned_v16si __attribute__((aligned(64))); + + void initialize_vector(__aligned_v16si *v) { + // The compiler may assume that ‘v’ is 64-byte aligned, regardless of the + // value of -fmax-unknown-pointer-align. + } + Profile Guided Optimization --------------------------- diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index a297a4c..c29686a 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -106,6 +106,8 @@ LANGOPT(OptimizeSize , 1, 0, "__OPTIMIZE_SIZE__ predefined macro") LANGOPT(Static , 1, 0, "__STATIC__ predefined macro (as opposed to __DYNAMIC__)") VALUE_LANGOPT(PackStruct , 32, 0, "default struct packing maximum alignment") +VALUE_LANGOPT(MaxTypeAlign , 32, 0, + "default maximum alignment for types") VALUE_LANGOPT(PICLevel , 2, 0, "__PIC__ level") VALUE_LANGOPT(PIELevel , 2, 0, "__PIE__ level") LANGOPT(GNUInline , 1, 0, "GNU inline semantics") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 759c5b5..7759b44 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -804,6 +804,9 @@ def fpack_struct : Flag<["-"], "fpack-struct">, Group; def fno_pack_struct : Flag<["-"], "fno-pack-struct">, Group; def fpack_struct_EQ : Joined<["-"], "fpack-struct=">, Group, Flags<[CC1Option]>, HelpText<"Specify the default maximum struct packing alignment">; +def fmax_type_align_EQ : Joined<["-"], "fmax-type-align=">, Group, Flags<[CC1Option]>, + HelpText<"Specify the maximum alignment to enforce on pointers lacking an explicit alignment">; +def fno_max_type_align : Flag<["-"], "fno-max-type-align">, Group; def fpascal_strings : Flag<["-"], "fpascal-strings">, Group, Flags<[CC1Option]>, HelpText<"Recognize and construct Pascal-style string literals">; def fpcc_struct_return : Flag<["-"], "fpcc-struct-return">, Group, Flags<[CC1Option]>, diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 8496292..a200548 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1399,8 +1399,13 @@ public: LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) { CharUnits Alignment; - if (!T->isIncompleteType()) + if (!T->isIncompleteType()) { Alignment = getContext().getTypeAlignInChars(T); + unsigned MaxAlign = getContext().getLangOpts().MaxTypeAlign; + if (MaxAlign && Alignment.getQuantity() > MaxAlign && + !getContext().isAlignmentRequired(T)) + Alignment = CharUnits::fromQuantity(MaxAlign); + } return LValue::MakeAddr(V, T, Alignment, getContext(), CGM.getTBAAInfo(T)); } diff --git a/clang/lib/Driver/Tools.cpp b/clang/lib/Driver/Tools.cpp index 9a205b3..93a1d91 100644 --- a/clang/lib/Driver/Tools.cpp +++ b/clang/lib/Driver/Tools.cpp @@ -4134,6 +4134,21 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fpack-struct=1"); } + // Handle -fmax-type-align=N and -fno-type-align + bool SkipMaxTypeAlign = Args.hasArg(options::OPT_fno_max_type_align); + if (Arg *A = Args.getLastArg(options::OPT_fmax_type_align_EQ)) { + if (!SkipMaxTypeAlign) { + std::string MaxTypeAlignStr = "-fmax-type-align="; + MaxTypeAlignStr += A->getValue(); + CmdArgs.push_back(Args.MakeArgString(MaxTypeAlignStr)); + } + } else if (getToolChain().getTriple().isOSDarwin()) { + if (!SkipMaxTypeAlign) { + std::string MaxTypeAlignStr = "-fmax-type-align=16"; + CmdArgs.push_back(Args.MakeArgString(MaxTypeAlignStr)); + } + } + if (KernelOrKext || isNoCommonDefault(getToolChain().getTriple())) { if (!Args.hasArg(options::OPT_fcommon)) CmdArgs.push_back("-fno-common"); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 0fbbf1a..e4be487 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1476,6 +1476,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, Args.hasArg(OPT_fencode_extended_block_signature); Opts.EmitAllDecls = Args.hasArg(OPT_femit_all_decls); Opts.PackStruct = getLastArgIntValue(Args, OPT_fpack_struct_EQ, 0, Diags); + Opts.MaxTypeAlign = getLastArgIntValue(Args, OPT_fmax_type_align_EQ, 0, Diags); Opts.PICLevel = getLastArgIntValue(Args, OPT_pic_level, 0, Diags); Opts.PIELevel = getLastArgIntValue(Args, OPT_pie_level, 0, Diags); Opts.Static = Args.hasArg(OPT_static_define); diff --git a/clang/test/CodeGenCXX/align-avx-complete-objects.cpp b/clang/test/CodeGenCXX/align-avx-complete-objects.cpp new file mode 100644 index 0000000..25f4ef1 --- /dev/null +++ b/clang/test/CodeGenCXX/align-avx-complete-objects.cpp @@ -0,0 +1,57 @@ +// RUN: %clang_cc1 -x c++ %s -O0 -triple=x86_64-apple-darwin -target-feature +avx2 -fmax-type-align=16 -emit-llvm -o - -Werror | FileCheck %s +// rdar://16254558 + +typedef float AVX2Float __attribute__((__vector_size__(32))); + + +volatile float TestAlign(void) +{ + volatile AVX2Float *p = new AVX2Float; + *p = *p; + AVX2Float r = *p; + return r[0]; +} + +// CHECK: [[R:%.*]] = alloca <8 x float>, align 32 +// CHECK-NEXT: [[CALL:%.*]] = call noalias i8* @_Znwm(i64 32) +// CHECK-NEXT: [[ZERO:%.*]] = bitcast i8* [[CALL]] to <8 x float>* +// CHECK-NEXT: store <8 x float>* [[ZERO]], <8 x float>** [[P:%.*]], align 8 +// CHECK-NEXT: [[ONE:%.*]] = load <8 x float>** [[P]], align 8 +// CHECK-NEXT: [[TWO:%.*]] = load volatile <8 x float>* [[ONE]], align 16 +// CHECK-NEXT: [[THREE:%.*]] = load <8 x float>** [[P]], align 8 +// CHECK-NEXT: store volatile <8 x float> [[TWO]], <8 x float>* [[THREE]], align 16 +// CHECK-NEXT: [[FOUR:%.*]] = load <8 x float>** [[P]], align 8 +// CHECK-NEXT: [[FIVE:%.*]] = load volatile <8 x float>* [[FOUR]], align 16 +// CHECK-NEXT: store <8 x float> [[FIVE]], <8 x float>* [[R]], align 32 +// CHECK-NEXT: [[SIX:%.*]] = load <8 x float>* [[R]], align 32 +// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x float> [[SIX]], i32 0 +// CHECK-NEXT: ret float [[VECEXT]] + +typedef float AVX2Float_Explicitly_aligned __attribute__((__vector_size__(32))) __attribute__((aligned (32))); + +typedef AVX2Float_Explicitly_aligned AVX2Float_indirect; + +typedef AVX2Float_indirect AVX2Float_use_existing_align; + +volatile float TestAlign2(void) +{ + volatile AVX2Float_use_existing_align *p = new AVX2Float_use_existing_align; + *p = *p; + AVX2Float_use_existing_align r = *p; + return r[0]; +} + +// CHECK: [[R:%.*]] = alloca <8 x float>, align 32 +// CHECK-NEXT: [[CALL:%.*]] = call noalias i8* @_Znwm(i64 32) +// CHECK-NEXT: [[ZERO:%.*]] = bitcast i8* [[CALL]] to <8 x float>* +// CHECK-NEXT: store <8 x float>* [[ZERO]], <8 x float>** [[P:%.*]], align 8 +// CHECK-NEXT: [[ONE:%.*]] = load <8 x float>** [[P]], align 8 +// CHECK-NEXT: [[TWO:%.*]] = load volatile <8 x float>* [[ONE]], align 32 +// CHECK-NEXT: [[THREE:%.*]] = load <8 x float>** [[P]], align 8 +// CHECK-NEXT: store volatile <8 x float> [[TWO]], <8 x float>* [[THREE]], align 32 +// CHECK-NEXT: [[FOUR:%.*]] = load <8 x float>** [[P]], align 8 +// CHECK-NEXT: [[FIVE:%.*]] = load volatile <8 x float>* [[FOUR]], align 32 +// CHECK-NEXT: store <8 x float> [[FIVE]], <8 x float>* [[R]], align 32 +// CHECK-NEXT: [[SIX:%.*]] = load <8 x float>* [[R]], align 32 +// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x float> [[SIX]], i32 0 +// CHECK-NEXT: ret float [[VECEXT]] diff --git a/clang/test/Driver/darwin-max-type-align.c b/clang/test/Driver/darwin-max-type-align.c new file mode 100644 index 0000000..6532f4a --- /dev/null +++ b/clang/test/Driver/darwin-max-type-align.c @@ -0,0 +1,15 @@ +// Check the -fmax-type-align=N flag +// rdar://16254558 +// +// RUN: %clang -no-canonical-prefixes -target x86_64-apple-macosx10.7.0 %s -o - -### 2>&1 | \ +// RUN: FileCheck -check-prefix=TEST0 %s +// TEST0: -fmax-type-align=16 +// RUN: %clang -no-canonical-prefixes -fmax-type-align=32 -target x86_64-apple-macosx10.7.0 %s -o - -### 2>&1 | \ +// RUN: FileCheck -check-prefix=TEST1 %s +// TEST1: -fmax-type-align=32 +// RUN: %clang -no-canonical-prefixes -fmax-type-align=32 -fno-max-type-align -target x86_64-apple-macosx10.7.0 %s -o - -### 2>&1 | \ +// RUN: FileCheck -check-prefix=TEST2 %s +// TEST2-NOT: -fmax-type-align +// RUN: %clang -no-canonical-prefixes -fno-max-type-align -target x86_64-apple-macosx10.7.0 %s -o - -### 2>&1 | \ +// RUN: FileCheck -check-prefix=TEST3 %s +// TEST3-NOT: -fmax-type-align diff --git a/clang/test/Driver/rewrite-legacy-objc.m b/clang/test/Driver/rewrite-legacy-objc.m index 0c6404e..b0b78d0 100644 --- a/clang/test/Driver/rewrite-legacy-objc.m +++ b/clang/test/Driver/rewrite-legacy-objc.m @@ -3,11 +3,11 @@ // TEST0: clang{{.*}}" "-cc1" // TEST0: "-rewrite-objc" // FIXME: CHECK-NOT is broken somehow, it doesn't work here. Check adjacency instead. -// TEST0: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx-fragile" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fdiagnostics-show-option" +// TEST0: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx-fragile" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fmax-type-align=16" "-fdiagnostics-show-option" // TEST0: rewrite-legacy-objc.m" // RUN: %clang -no-canonical-prefixes -target i386-apple-macosx10.9.0 -rewrite-legacy-objc %s -o - -### 2>&1 | \ // RUN: FileCheck -check-prefix=TEST1 %s // RUN: %clang -no-canonical-prefixes -target i386-apple-macosx10.6.0 -rewrite-legacy-objc %s -o - -### 2>&1 | \ // RUN: FileCheck -check-prefix=TEST2 %s -// TEST1: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx-fragile" "-fobjc-subscripting-legacy-runtime" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fdiagnostics-show-option" -// TEST2: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx-fragile" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fdiagnostics-show-option" +// TEST1: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx-fragile" "-fobjc-subscripting-legacy-runtime" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fmax-type-align=16" "-fdiagnostics-show-option" +// TEST2: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx-fragile" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fmax-type-align=16" "-fdiagnostics-show-option" diff --git a/clang/test/Driver/rewrite-objc.m b/clang/test/Driver/rewrite-objc.m index 95db582..ba5f835 100644 --- a/clang/test/Driver/rewrite-objc.m +++ b/clang/test/Driver/rewrite-objc.m @@ -3,4 +3,4 @@ // TEST0: clang{{.*}}" "-cc1" // TEST0: "-rewrite-objc" // FIXME: CHECK-NOT is broken somehow, it doesn't work here. Check adjacency instead. -// TEST0: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fdiagnostics-show-option" +// TEST0: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fmax-type-align=16" "-fdiagnostics-show-option" -- 2.7.4