From 5fd03c81767f27ef190ca08ca940cf32a62417dd Mon Sep 17 00:00:00 2001 From: Alex Brachet Date: Mon, 31 Oct 2022 16:55:53 +0000 Subject: [PATCH] [libc] Implement getopt Differential Revision: https://reviews.llvm.org/D133487 --- libc/config/linux/aarch64/entrypoints.txt | 5 + libc/config/linux/api.td | 2 +- libc/config/linux/x86_64/entrypoints.txt | 5 + libc/include/CMakeLists.txt | 1 + libc/include/llvm-libc-types/CMakeLists.txt | 1 + libc/include/llvm-libc-types/__getoptargv_t.h | 14 ++ libc/spec/posix.td | 24 ++++ libc/src/unistd/CMakeLists.txt | 34 +++++ libc/src/unistd/getopt.cpp | 200 ++++++++++++++++++++++++++ libc/src/unistd/getopt.h | 25 ++++ libc/test/src/unistd/CMakeLists.txt | 13 ++ libc/test/src/unistd/getopt_test.cpp | 169 ++++++++++++++++++++++ 12 files changed, 492 insertions(+), 1 deletion(-) create mode 100644 libc/include/llvm-libc-types/__getoptargv_t.h create mode 100644 libc/src/unistd/getopt.cpp create mode 100644 libc/src/unistd/getopt.h create mode 100644 libc/test/src/unistd/getopt_test.cpp diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 51da5f3..5b2e609 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -432,6 +432,11 @@ if(LLVM_LIBC_FULL_BUILD) # unistd.h entrypoints libc.src.unistd.environ libc.src.unistd.execv + libc.src.unistd.getopt + libc.src.unistd.optarg + libc.src.unistd.optind + libc.src.unistd.optopt + libc.src.unistd.opterr # sys/select.h entrypoints libc.src.sys.select.select diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td index 6b69e3f..25e19e8 100644 --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -268,7 +268,7 @@ def DirentAPI : PublicAPI<"dirent.h"> { } def UniStdAPI : PublicAPI<"unistd.h"> { - let Types = ["__exec_argv_t", "__exec_envp_t", "off_t", "pid_t", "size_t", "ssize_t", "uid_t"]; + let Types = ["__exec_argv_t", "__exec_envp_t", "off_t", "pid_t", "size_t", "ssize_t", "uid_t", "__getoptargv_t"]; } def SysRandomAPI : PublicAPI<"sys/random.h"> { diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 40c6c33..56e38f0 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -457,6 +457,11 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.unistd.execv libc.src.unistd.fork libc.src.unistd.__llvm_libc_syscall + libc.src.unistd.getopt + libc.src.unistd.optarg + libc.src.unistd.optind + libc.src.unistd.optopt + libc.src.unistd.opterr # sys/select.h entrypoints libc.src.sys.select.select diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index cb0e885..3e63695 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -185,6 +185,7 @@ add_gen_header( .llvm-libc-types.size_t .llvm-libc-types.ssize_t .llvm-libc-types.uid_t + .llvm-libc-types.__getoptargv_t ) add_gen_header( diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt index 1c667d5..e4e9567 100644 --- a/libc/include/llvm-libc-types/CMakeLists.txt +++ b/libc/include/llvm-libc-types/CMakeLists.txt @@ -81,3 +81,4 @@ add_header(__atexithandler_t HDR __atexithandler_t.h) add_header(speed_t HDR speed_t.h) add_header(tcflag_t HDR tcflag_t.h) add_header(struct_termios HDR struct_termios.h DEPENDS .cc_t .speed_t .tcflag_t) +add_header(__getoptargv_t HDR __getoptargv_t.h) diff --git a/libc/include/llvm-libc-types/__getoptargv_t.h b/libc/include/llvm-libc-types/__getoptargv_t.h new file mode 100644 index 0000000..81c6728 --- /dev/null +++ b/libc/include/llvm-libc-types/__getoptargv_t.h @@ -0,0 +1,14 @@ +//===-- Definition of type __getoptargv_t ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __LLVM_LIBC_TYPES_GETOPTARGV_T_H__ +#define __LLVM_LIBC_TYPES_GETOPTARGV_T_H__ + +typedef char *const __getoptargv_t[]; + +#endif // __LLVM_LIBC_TYPES_GETOPTARGV_T_H__ diff --git a/libc/spec/posix.td b/libc/spec/posix.td index e54b488..43b3319 100644 --- a/libc/spec/posix.td +++ b/libc/spec/posix.td @@ -74,6 +74,8 @@ def FdSet : NamedType<"fd_set">; def FdSetPtr : PtrType; def RestrictedFdSetPtr : RestrictedPtrType; +def GetoptArgvT : NamedType<"__getoptargv_t">; + def POSIX : StandardSpec<"POSIX"> { PtrType CharPtr = PtrType; RestrictedPtrType RestrictedCharPtr = RestrictedPtrType; @@ -348,6 +350,7 @@ def POSIX : StandardSpec<"POSIX"> { SizeTType, PidT, UidT, + GetoptArgvT, ], [], // Enumerations [ @@ -581,9 +584,30 @@ def POSIX : StandardSpec<"POSIX"> { RetValSpec, [ArgSpec, ArgSpec, ArgSpec] >, + FunctionSpec< + "getopt", + RetValSpec, + [ArgSpec, ArgSpec, ArgSpec] + >, ], [ ObjectSpec<"environ", "char **">, + ObjectSpec< + "optarg", + "char *" + >, + ObjectSpec< + "optind", + "int" + >, + ObjectSpec< + "opterr", + "int" + >, + ObjectSpec< + "optopt", + "int" + >, ] >; diff --git a/libc/src/unistd/CMakeLists.txt b/libc/src/unistd/CMakeLists.txt index cd4002e..ab05175 100644 --- a/libc/src/unistd/CMakeLists.txt +++ b/libc/src/unistd/CMakeLists.txt @@ -254,3 +254,37 @@ add_entrypoint_object( HDRS environ.h ) + +add_entrypoint_object( + getopt + SRCS + getopt.cpp + HDRS + getopt.h + DEPENDS + libc.include.unistd + libc.src.__support.CPP.optional + libc.src.__support.CPP.string_view + libc.src.__support.File.file + libc.src.stdio.fprintf +) + +# These aren't actual external per-se, but this is just an easy way to create +# targets that don't do anything. They exist to be referenced in entrypoints.txt +# so that the header will properly expose their definitions. Their declaration +# is in getopt.cpp. +add_entrypoint_external( + optarg +) + +add_entrypoint_external( + optind +) + +add_entrypoint_external( + optopt +) + +add_entrypoint_external( + opterr +) diff --git a/libc/src/unistd/getopt.cpp b/libc/src/unistd/getopt.cpp new file mode 100644 index 0000000..9d3b534 --- /dev/null +++ b/libc/src/unistd/getopt.cpp @@ -0,0 +1,200 @@ +//===-- Implementation of getopt ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/unistd/getopt.h" +#include "src/__support/CPP/optional.h" +#include "src/__support/CPP/string_view.h" +#include "src/__support/File/file.h" +#include "src/__support/common.h" +#include "src/stdio/fprintf.h" + +#include + +// This is POSIX compliant and does not support GNU extensions, mainly this is +// just the re-ordering of argv elements such that unknown arguments can be +// easily iterated over. + +namespace __llvm_libc { + +template struct RefWrapper { + RefWrapper(T *ptr) : ptr(ptr) {} + RefWrapper &operator=(const RefWrapper &) = default; + operator T &() { return *ptr; } + T &get() { return *ptr; } + T *ptr; +}; + +struct GetoptContext { + RefWrapper optarg; + RefWrapper optind; + RefWrapper optopt; + RefWrapper optpos; + + int opterr; + + FILE *errstream; + + GetoptContext &operator=(const GetoptContext &) = default; + + template void report_error(const char *fmt, Ts... ts) { + if (opterr) + __llvm_libc::fprintf(errstream, fmt, ts...); + } +}; + +struct OptstringParser { + using value_type = struct { + char c; + bool arg; + }; + + cpp::string_view optstring; + + struct iterator { + cpp::string_view curr; + + iterator operator++() { + curr = curr.substr(1); + return *this; + } + + bool operator!=(iterator other) { return curr.data() != other.curr.data(); } + + value_type operator*() { + value_type r{curr.front(), false}; + if (!curr.substr(1).empty() && curr.substr(1).front() == ':') { + this->operator++(); + r.arg = true; + } + return r; + } + }; + + iterator begin() { + bool skip = optstring.front() == '-' || optstring.front() == '+' || + optstring.front() == ':'; + return {optstring.substr(!!skip)}; + } + + iterator end() { return {optstring.substr(optstring.size())}; } +}; + +int getopt_r(int argc, char *const argv[], const char *optstring, + GetoptContext &ctx) { + auto failure = [&ctx](int ret = -1) { + ctx.optpos.get() = 0; + return ret; + }; + + if (ctx.optind >= argc || !argv[ctx.optind]) + return failure(); + + cpp::string_view current = + cpp::string_view{argv[ctx.optind]}.substr(ctx.optpos); + + auto move_forward = [¤t, &ctx] { + current = current.substr(1); + ctx.optpos.get()++; + }; + + // If optpos is nonzero, then we are already parsing a valid flag and these + // need not be checked. + if (ctx.optpos == 0) { + if (current[0] != '-') + return failure(); + + if (current == "--") { + ctx.optind.get()++; + return failure(); + } + + // Eat the '-' char. + move_forward(); + if (current.empty()) + return failure(); + } + + auto find_match = + [current, optstring]() -> cpp::optional { + for (auto i : OptstringParser{optstring}) + if (i.c == current[0]) + return i; + return {}; + }; + + auto match = find_match(); + if (!match) { + ctx.report_error("%s: illegal option -- %c\n", argv[0], current[0]); + ctx.optopt.get() = current[0]; + return failure('?'); + } + + // We've matched so eat that character. + move_forward(); + if (match->arg) { + // If we found an option that takes an argument and our current is not over, + // the rest of current is that argument. Ie, "-cabc" with opstring "c:", + // then optarg should point to "abc". Otherwise the argument to c will be in + // the next arg like "-c abc". + if (!current.empty()) { + // This const cast is fine because current was already holding a mutable + // string, it just doesn't have the semantics to note that, we could use + // span but it doesn't have string_view string niceties. + ctx.optarg.get() = const_cast(current.data()); + } else { + // One char lookahead to see if we ran out of arguments. If so, return ':' + // if the first character of optstring is ':'. optind must stay at the + // current value so only increase it after we known there is another arg. + if (ctx.optind + 1 >= argc || !argv[ctx.optind + 1]) { + ctx.report_error("%s: option requires an argument -- %c\n", argv[0], + match->c); + return failure(optstring[0] == ':' ? ':' : '?'); + } + ctx.optarg.get() = argv[++ctx.optind]; + } + ctx.optind++; + ctx.optpos.get() = 0; + } else if (current.empty()) { + // If this argument is now empty we are safe to move onto the next one. + ctx.optind++; + ctx.optpos.get() = 0; + } + + return match->c; +} + +namespace impl { + +extern "C" char *optarg = nullptr; +extern "C" int optind = 1; +extern "C" int optopt = 0; +extern "C" int opterr = 0; + +static unsigned optpos; + +static GetoptContext ctx{ + &impl::optarg, &impl::optind, + &impl::optopt, &optpos, + impl::opterr, reinterpret_cast(__llvm_libc::stderr)}; + +#ifndef LLVM_LIBC_PUBLIC_PACKAGING +// This is used exclusively in tests. +void set_getopt_state(char **optarg, int *optind, int *optopt, unsigned *optpos, + int opterr, FILE *errstream) { + ctx = {optarg, optind, optopt, optpos, opterr, errstream}; +} +#endif + +} // namespace impl + +LLVM_LIBC_FUNCTION(int, getopt, + (int argc, char *const argv[], const char *optstring)) { + return getopt_r(argc, argv, optstring, impl::ctx); +} + +} // namespace __llvm_libc diff --git a/libc/src/unistd/getopt.h b/libc/src/unistd/getopt.h new file mode 100644 index 0000000..bf5f8d0 --- /dev/null +++ b/libc/src/unistd/getopt.h @@ -0,0 +1,25 @@ +//===-- Implementation header for getopt ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_UNISTD_GETOPT_H +#define LLVM_LIBC_SRC_UNISTD_GETOPT_H + +#include +#include + +namespace __llvm_libc { + +namespace impl { +void set_getopt_state(char **, int *, int *, unsigned *, int, FILE *); +} + +int getopt(int argc, char *const argv[], const char *optstring); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_UNISTD_GETOPT_H diff --git a/libc/test/src/unistd/CMakeLists.txt b/libc/test/src/unistd/CMakeLists.txt index 85b6f5c..b278c35 100644 --- a/libc/test/src/unistd/CMakeLists.txt +++ b/libc/test/src/unistd/CMakeLists.txt @@ -402,3 +402,16 @@ add_libc_unittest( libc.include.unistd libc.src.unistd.sysconf ) + +add_libc_unittest( + getopt_test + SUITE + libc_unistd_unittests + SRCS + getopt_test.cpp + DEPENDS + libc.src.unistd.getopt + libc.src.__support.CPP.array + libc.src.stdio.fopencookie + libc.src.stdio.fflush +) diff --git a/libc/test/src/unistd/getopt_test.cpp b/libc/test/src/unistd/getopt_test.cpp new file mode 100644 index 0000000..d043e58 --- /dev/null +++ b/libc/test/src/unistd/getopt_test.cpp @@ -0,0 +1,169 @@ +//===-- Unittests for getopt ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/unistd/getopt.h" +#include "utils/UnitTest/Test.h" + +#include "src/__support/CPP/array.h" +#include "src/stdio/fflush.h" +#include "src/stdio/fopencookie.h" + +#include + +using __llvm_libc::cpp::array; + +namespace test_globals { +char *optarg; +int optind = 1; +int optopt; +int opterr = 1; + +unsigned optpos; +} // namespace test_globals + +// This can't be a constructor because it will get run before the constructor +// which sets the default state in getopt. +void set_state(FILE *errstream) { + __llvm_libc::impl::set_getopt_state( + &test_globals::optarg, &test_globals::optind, &test_globals::optopt, + &test_globals::optpos, test_globals::opterr, errstream); +} + +// TODO: could be either llvm-libc's or the system libc's. The former +// doesn't currently support fmemopen but does have fopencookie. In the future +// just use that instead. This memopen does no error checking for the size +// of the buffer, etc. +FILE *memopen(char **pos) { + static auto memcpy = [](char *dest, const char *src, size_t size) { + for (size_t i = 0; i < size; i++) + dest[i] = src[i]; + }; + + static auto *write = + +[](void *cookie, const char *buf, size_t size) -> ssize_t { + char **pos = static_cast(cookie); + memcpy(*pos, buf, size); + *pos += size; + return size; + }; + + static cookie_io_functions_t cookie{nullptr, write, nullptr, nullptr}; + return __llvm_libc::fopencookie(pos, "w", cookie); +} + +struct LlvmLibcGetoptTest : public __llvm_libc::testing::Test { + FILE *errstream; + char buf[256]; + char *pos = buf; + + void reset_errstream() { pos = buf; } + const char *get_error_msg() { + __llvm_libc::fflush(errstream); + return buf; + } + + void SetUp() override { + ASSERT_TRUE(!!(errstream = memopen(&pos))); + set_state(errstream); + ASSERT_EQ(test_globals::optind, 1); + } + + void TearDown() override { + test_globals::optind = 1; + test_globals::opterr = 1; + } +}; + +// This is safe because getopt doesn't currently permute argv like GNU's getopt +// does so this just helps silence warnings. +char *operator"" _c(const char *c, size_t) { return const_cast(c); } + +TEST_F(LlvmLibcGetoptTest, NoMatch) { + array argv{"prog"_c, "arg1"_c, nullptr}; + + // optind >= argc + EXPECT_EQ(__llvm_libc::getopt(1, argv.data(), "..."), -1); + + // argv[optind] == nullptr + test_globals::optind = 2; + EXPECT_EQ(__llvm_libc::getopt(100, argv.data(), "..."), -1); + + // argv[optind][0] != '-' + test_globals::optind = 1; + EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "a"), -1); + ASSERT_EQ(test_globals::optind, 1); + + // argv[optind] == "-" + argv[1] = "-"_c; + EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "a"), -1); + ASSERT_EQ(test_globals::optind, 1); + + // argv[optind] == "--", then return -1 and incremement optind + argv[1] = "--"_c; + EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "a"), -1); + EXPECT_EQ(test_globals::optind, 2); +} + +TEST_F(LlvmLibcGetoptTest, WrongMatch) { + array argv{"prog"_c, "-b"_c, nullptr}; + + EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "a"), int('?')); + EXPECT_EQ(test_globals::optopt, (int)'b'); + EXPECT_EQ(test_globals::optind, 1); + EXPECT_STREQ(get_error_msg(), "prog: illegal option -- b\n"); +} + +TEST_F(LlvmLibcGetoptTest, OpterrFalse) { + array argv{"prog"_c, "-b"_c, nullptr}; + + test_globals::opterr = 0; + set_state(errstream); + EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "a"), int('?')); + EXPECT_EQ(test_globals::optopt, (int)'b'); + EXPECT_EQ(test_globals::optind, 1); + EXPECT_STREQ(get_error_msg(), ""); +} + +TEST_F(LlvmLibcGetoptTest, MissingArg) { + array argv{"prog"_c, "-b"_c, nullptr}; + + EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), ":b:"), (int)':'); + ASSERT_EQ(test_globals::optind, 1); + EXPECT_STREQ(get_error_msg(), "prog: option requires an argument -- b\n"); + reset_errstream(); + EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "b:"), int('?')); + EXPECT_EQ(test_globals::optind, 1); + EXPECT_STREQ(get_error_msg(), "prog: option requires an argument -- b\n"); +} + +TEST_F(LlvmLibcGetoptTest, ParseArgInCurrent) { + array argv{"prog"_c, "-barg"_c, nullptr}; + + EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "b:"), (int)'b'); + EXPECT_STREQ(test_globals::optarg, "arg"); + EXPECT_EQ(test_globals::optind, 2); +} + +TEST_F(LlvmLibcGetoptTest, ParseArgInNext) { + array argv{"prog"_c, "-b"_c, "arg"_c, nullptr}; + + EXPECT_EQ(__llvm_libc::getopt(3, argv.data(), "b:"), (int)'b'); + EXPECT_STREQ(test_globals::optarg, "arg"); + EXPECT_EQ(test_globals::optind, 3); +} + +TEST_F(LlvmLibcGetoptTest, ParseMutliInOne) { + array argv{"prog"_c, "-abc"_c, nullptr}; + + EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "abc"), (int)'a'); + ASSERT_EQ(test_globals::optind, 1); + EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "abc"), (int)'b'); + ASSERT_EQ(test_globals::optind, 1); + EXPECT_EQ(__llvm_libc::getopt(2, argv.data(), "abc"), (int)'c'); + EXPECT_EQ(test_globals::optind, 2); +} -- 2.7.4