The difference in most compilers between `-O3` and `-O2` is mostly down
to whether loops with statically determinable trip counts are fully
unrolled vs unrolled to a multiple of SIMD width.
This patch is effectively a revert of
commit
15f5db60a137 ("kbuild,arc: add
CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3 for ARC") without re-adding
ARCH_CFLAGS
Ever since
commit
cfdbc2e16e65 ("ARC: Build system: Makefiles, Kconfig, Linker
script")
ARC has been built with -O3, though the reason for doing so was not
specified in inline comments or the commit message. This commit does not
re-add -O3 to arch/arc/Makefile.
Folks looking to experiment with `-O3` (or any compiler flag for that
matter) may pass them along to the command line invocation of make:
$ make KCFLAGS=-O3
Code that looks to re-add an explicit Kconfig option for `-O3` should
provide:
1. A rigorous and reproducible performance profile of a reasonable
userspace workload that demonstrates a hot loop in the kernel that
would benefit from `-O3` over `-O2`.
2. Disassembly of said loop body before and after.
3. Provides stats on terms of increase in file size.
Link: https://lore.kernel.org/linux-kbuild/CA+55aFz2sNBbZyg-_i8_Ldr2e8o9dfvdSfHHuRzVtP2VMAUWPg@mail.gmail.com/
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
KBUILD_CFLAGS += -O2
-else ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3
-KBUILD_CFLAGS += -O3
else ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
KBUILD_CFLAGS += -Os
endif
# CONFIG_UTS_NS is not set
# CONFIG_PID_NS is not set
CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
CONFIG_EMBEDDED=y
CONFIG_PERF_EVENTS=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_UTS_NS is not set
# CONFIG_PID_NS is not set
CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
CONFIG_EMBEDDED=y
CONFIG_PERF_EVENTS=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_UTS_NS is not set
# CONFIG_PID_NS is not set
CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
CONFIG_EMBEDDED=y
CONFIG_PERF_EVENTS=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_UTS_NS is not set
# CONFIG_PID_NS is not set
CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
CONFIG_EXPERT=y
CONFIG_PERF_EVENTS=y
# CONFIG_COMPAT_BRK is not set
# CONFIG_UTS_NS is not set
# CONFIG_PID_NS is not set
CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
CONFIG_EMBEDDED=y
CONFIG_PERF_EVENTS=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_PID_NS is not set
CONFIG_BLK_DEV_INITRD=y
CONFIG_BLK_DEV_RAM=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
CONFIG_EMBEDDED=y
CONFIG_PERF_EVENTS=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_UTS_NS is not set
# CONFIG_PID_NS is not set
CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
CONFIG_KALLSYMS_ALL=y
CONFIG_EMBEDDED=y
CONFIG_PERF_EVENTS=y
# CONFIG_UTS_NS is not set
# CONFIG_PID_NS is not set
CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
CONFIG_KALLSYMS_ALL=y
CONFIG_EMBEDDED=y
CONFIG_PERF_EVENTS=y
# CONFIG_UTS_NS is not set
# CONFIG_PID_NS is not set
CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
CONFIG_KALLSYMS_ALL=y
CONFIG_EMBEDDED=y
CONFIG_PERF_EVENTS=y
# CONFIG_UTS_NS is not set
# CONFIG_PID_NS is not set
CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
CONFIG_PERF_EVENTS=y
# CONFIG_COMPAT_BRK is not set
CONFIG_KPROBES=y
CONFIG_INITRAMFS_ROOT_UID=2100
CONFIG_INITRAMFS_ROOT_GID=501
# CONFIG_RD_GZIP is not set
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
CONFIG_KALLSYMS_ALL=y
# CONFIG_AIO is not set
CONFIG_EMBEDDED=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
CONFIG_EMBEDDED=y
CONFIG_PERF_EVENTS=y
# CONFIG_VM_EVENT_COUNTERS is not set
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_BLK_DEV_INITRD=y
-CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE_O3=y
CONFIG_EMBEDDED=y
CONFIG_PERF_EVENTS=y
# CONFIG_VM_EVENT_COUNTERS is not set
with the "-O2" compiler flag for best performance and most
helpful compile-time warnings.
-config CC_OPTIMIZE_FOR_PERFORMANCE_O3
- bool "Optimize more for performance (-O3)"
- depends on ARC
- help
- Choosing this option will pass "-O3" to your compiler to optimize
- the kernel yet more for performance.
-
config CC_OPTIMIZE_FOR_SIZE
bool "Optimize for size (-Os)"
help