From d080635bfca25e66edd5c5dd333bbd1f8eb70a77 Mon Sep 17 00:00:00 2001 From: cgyurgyik Date: Tue, 21 Jul 2020 11:37:36 -0400 Subject: [PATCH] [libc] Add strstr implementation. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D83956 --- libc/config/linux/aarch64/entrypoints.txt | 1 + libc/config/linux/x86_64/entrypoints.txt | 1 + libc/src/string/CMakeLists.txt | 8 +++ libc/src/string/strstr.cpp | 29 ++++++++ libc/src/string/strstr.h | 18 +++++ libc/test/src/string/CMakeLists.txt | 10 +++ libc/test/src/string/strstr_test.cpp | 114 ++++++++++++++++++++++++++++++ 7 files changed, 181 insertions(+) create mode 100644 libc/src/string/strstr.cpp create mode 100644 libc/src/string/strstr.h create mode 100644 libc/test/src/string/strstr_test.cpp diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 5d607b8..74834d1 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -11,6 +11,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.strlen libc.src.string.memchr libc.src.string.strchr + libc.str.string.strstr ) set(TARGET_LIBM_ENTRYPOINTS diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index fc62fbb..f08f775 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -29,6 +29,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.string.strcmp libc.src.string.memchr libc.src.string.strchr + libc.src.string.strstr # sys/mman.h entrypoints libc.src.sys.mman.mmap diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt index c8e1e4d..98693cc 100644 --- a/libc/src/string/CMakeLists.txt +++ b/libc/src/string/CMakeLists.txt @@ -60,6 +60,14 @@ add_entrypoint_object( strchr.h ) +add_entrypoint_object( + strstr + SRCS + strstr.cpp + HDRS + strstr.h +) + # Helper to define a function with multiple implementations # - Computes flags to satisfy required/rejected features and arch, # - Declares an entry point, diff --git a/libc/src/string/strstr.cpp b/libc/src/string/strstr.cpp new file mode 100644 index 0000000..a598f66 --- /dev/null +++ b/libc/src/string/strstr.cpp @@ -0,0 +1,29 @@ +//===-- Implementation of strstr ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strstr.h" + +#include "src/__support/common.h" +#include + +namespace __llvm_libc { + +// TODO: This is a simple brute force implementation. This can be +// improved upon using well known string matching algorithms. +char *LLVM_LIBC_ENTRYPOINT(strstr)(const char *haystack, const char *needle) { + for (size_t i = 0; haystack[i]; ++i) { + size_t j; + for (j = 0; haystack[i + j] && haystack[i + j] == needle[j]; ++j) + ; + if (!needle[j]) + return const_cast(haystack + i); + } + return nullptr; +} + +} // namespace __llvm_libc diff --git a/libc/src/string/strstr.h b/libc/src/string/strstr.h new file mode 100644 index 0000000..463ffb7 --- /dev/null +++ b/libc/src/string/strstr.h @@ -0,0 +1,18 @@ +//===-- Implementation header for strstr ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STRING_STRSTR_H +#define LLVM_LIBC_SRC_STRING_STRSTR_H + +namespace __llvm_libc { + +char *strstr(const char *haystack, const char *needle); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STRING_STRSTR_H diff --git a/libc/test/src/string/CMakeLists.txt b/libc/test/src/string/CMakeLists.txt index be48368..81ae08d 100644 --- a/libc/test/src/string/CMakeLists.txt +++ b/libc/test/src/string/CMakeLists.txt @@ -62,6 +62,16 @@ add_libc_unittest( libc.src.string.strchr ) +add_libc_unittest( + strstr_test + SUITE + libc_string_unittests + SRCS + strstr_test.cpp + DEPENDS + libc.src.string.strstr +) + # Tests all implementations that can run on the host. function(add_libc_multi_impl_test name) get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations) diff --git a/libc/test/src/string/strstr_test.cpp b/libc/test/src/string/strstr_test.cpp new file mode 100644 index 0000000..f4edbb2 --- /dev/null +++ b/libc/test/src/string/strstr_test.cpp @@ -0,0 +1,114 @@ +//===-- Unittests for strstr ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/string/strstr.h" +#include "utils/UnitTest/Test.h" + +TEST(StrStrTest, NeedleNotInHaystack) { + const char *haystack = "12345"; + const char *needle = "a"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), nullptr); +} + +TEST(StrStrTest, NeedleIsEmptyString) { + const char *haystack = "12345"; + const char *needle = ""; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), haystack); +} + +TEST(StrStrTest, HaystackIsEmptyString) { + const char *haystack = ""; + const char *needle = "12345"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), nullptr); +} + +TEST(StrStrTest, HaystackAndNeedleAreEmptyStrings) { + const char *haystack = ""; + const char *needle = ""; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), ""); +} + +TEST(StrStrTest, HaystackAndNeedleAreSingleCharacters) { + const char *haystack = "a"; + // Same characer returns that character. + ASSERT_STREQ(__llvm_libc::strstr(haystack, /*needle=*/"a"), "a"); + // Different character returns nullptr. + ASSERT_STREQ(__llvm_libc::strstr(haystack, /*needle=*/"b"), nullptr); +} + +TEST(StrStrTest, NeedleEqualToHaystack) { + const char *haystack = "12345"; + const char *needle = "12345"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), "12345"); +} + +TEST(StrStrTest, NeedleSmallerThanHaystack) { + const char *haystack = "12345"; + const char *needle = "345"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), "345"); +} + +TEST(StrStrTest, NeedleLargerThanHaystack) { + const char *haystack = "123"; + const char *needle = "12345"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), nullptr); +} + +TEST(StrStrTest, NeedleAtBeginning) { + const char *haystack = "12345"; + const char *needle = "12"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), "12345"); +} + +TEST(StrStrTest, NeedleInMiddle) { + const char *haystack = "abcdefghi"; + const char *needle = "def"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), "defghi"); +} + +TEST(StrStrTest, NeedleDirectlyBeforeNullTerminator) { + const char *haystack = "abcdefghi"; + const char *needle = "ghi"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), "ghi"); +} + +TEST(StrStrTest, NeedlePastNullTerminator) { + const char haystack[5] = {'1', '2', '\0', '3', '4'}; + // Shouldn't find anything after the null terminator. + ASSERT_STREQ(__llvm_libc::strstr(haystack, /*needle=*/"3"), nullptr); + ASSERT_STREQ(__llvm_libc::strstr(haystack, /*needle=*/"4"), nullptr); +} + +TEST(StrStrTest, PartialNeedle) { + const char *haystack = "la_ap_lap"; + const char *needle = "lap"; + // Shouldn't find la or ap. + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), "lap"); +} + +TEST(StrStrTest, MisspelledNeedle) { + const char *haystack = "atalloftwocities...wait, tale"; + const char *needle = "tale"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), "tale"); +} + +TEST(StrStrTest, AnagramNeedle) { + const char *haystack = "dgo_ogd_god_odg_gdo_dog"; + const char *needle = "dog"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, needle), "dog"); +} + +TEST(StrStrTest, MorphedNeedle) { + // Changes a single letter in the needle to mismatch with the haystack. + const char *haystack = "once upon a time"; + ASSERT_STREQ(__llvm_libc::strstr(haystack, "time"), "time"); + ASSERT_STREQ(__llvm_libc::strstr(haystack, "lime"), nullptr); + ASSERT_STREQ(__llvm_libc::strstr(haystack, "tome"), nullptr); + ASSERT_STREQ(__llvm_libc::strstr(haystack, "tire"), nullptr); + ASSERT_STREQ(__llvm_libc::strstr(haystack, "timo"), nullptr); +} -- 2.7.4