From b8f564124ed51964b704452e3ea05b2423446e1c Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Thu, 17 Nov 2016 15:55:26 +0000 Subject: [PATCH] Fix locations within raw strings Whilst investigating PR preprocessor/78324 I noticed that the substring location code currently doesn't handle raw strings correctly, by not skipping the 'R', opening quote, delimiter and opening parenthesis. For example, an attempt to underline chars 4-7 with caret at 6 of this raw string yields this erroneous output: __emit_string_literal_range (R"foo(0123456789)foo", ~~^~ With the patch, the correct range/caret is printed: __emit_string_literal_range (R"foo(0123456789)foo", ~~^~ gcc/ChangeLog: * input.c (selftest::test_lexer_string_locations_long_line): New function. (selftest::test_lexer_string_locations_raw_string_multiline): New function. (selftest::input_c_tests): Call the new functions, via for_each_line_table_case. gcc/testsuite/ChangeLog: * gcc.dg/plugin/diagnostic-test-string-literals-1.c (test_raw_string_one_liner): New function. (test_raw_string_multiline): New function. libcpp/ChangeLog: * charset.c (cpp_interpret_string_1): Skip locations from loc_reader when advancing 'p' when handling raw strings. From-SVN: r242552 --- gcc/ChangeLog | 9 +++ gcc/input.c | 74 ++++++++++++++++++++++ gcc/testsuite/ChangeLog | 6 ++ .../plugin/diagnostic-test-string-literals-1.c | 33 ++++++++++ libcpp/ChangeLog | 5 ++ libcpp/charset.c | 13 +++- 6 files changed, 139 insertions(+), 1 deletion(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f69f911..1bb29c0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,12 @@ +2016-11-17 David Malcolm + + * input.c (selftest::test_lexer_string_locations_long_line): New + function. + (selftest::test_lexer_string_locations_raw_string_multiline): New + function. + (selftest::input_c_tests): Call the new functions, via + for_each_line_table_case. + 2016-11-17 Kyrylo Tkachov * config/aarch64/aarch64.md (mov): Call diff --git a/gcc/input.c b/gcc/input.c index c2042e8..728f4dd 100644 --- a/gcc/input.c +++ b/gcc/input.c @@ -3156,6 +3156,78 @@ test_lexer_string_locations_long_line (const line_table_case &case_) i, 2, 7 + i, 7 + i); } +/* Test of locations within a raw string that doesn't contain a newline. */ + +static void +test_lexer_string_locations_raw_string_one_line (const line_table_case &case_) +{ + /* .....................00.0000000111111111122. + .....................12.3456789012345678901. */ + const char *content = ("R\"foo(0123456789)foo\"\n"); + lexer_test test (case_, content, NULL); + + /* Verify that we get the expected token back. */ + const cpp_token *tok = test.get_token (); + ASSERT_EQ (tok->type, CPP_STRING); + + /* Verify that cpp_interpret_string works. */ + cpp_string dst_string; + const enum cpp_ttype type = CPP_STRING; + bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, + &dst_string, type); + ASSERT_TRUE (result); + ASSERT_STREQ ("0123456789", (const char *)dst_string.text); + free (const_cast (dst_string.text)); + + if (!should_have_column_data_p (line_table->highest_location)) + return; + + /* 0-9, plus the nil terminator. */ + ASSERT_NUM_SUBSTRING_RANGES (test, tok->src_loc, CPP_STRING, 11); + for (int i = 0; i < 11; i++) + ASSERT_CHAR_AT_RANGE (test, tok->src_loc, CPP_STRING, + i, 1, 7 + i, 7 + i); +} + +/* Test of locations within a raw string that contains a newline. */ + +static void +test_lexer_string_locations_raw_string_multiline (const line_table_case &case_) +{ + /* .....................00.0000. + .....................12.3456. */ + const char *content = ("R\"foo(\n" + /* .....................00000. + .....................12345. */ + "hello\n" + "world\n" + /* .....................00000. + .....................12345. */ + ")foo\"\n"); + lexer_test test (case_, content, NULL); + + /* Verify that we get the expected token back. */ + const cpp_token *tok = test.get_token (); + ASSERT_EQ (tok->type, CPP_STRING); + + /* Verify that cpp_interpret_string works. */ + cpp_string dst_string; + const enum cpp_ttype type = CPP_STRING; + bool result = cpp_interpret_string (test.m_parser, &tok->val.str, 1, + &dst_string, type); + ASSERT_TRUE (result); + ASSERT_STREQ ("\nhello\nworld\n", (const char *)dst_string.text); + free (const_cast (dst_string.text)); + + if (!should_have_column_data_p (line_table->highest_location)) + return; + + /* Currently we don't support locations within raw strings that + contain newlines. */ + ASSERT_HAS_NO_SUBSTRING_RANGES (test, tok->src_loc, tok->type, + "range endpoints are on different lines"); +} + /* Test of lexing char constants. */ static void @@ -3297,6 +3369,8 @@ input_c_tests () for_each_line_table_case (test_lexer_string_locations_stringified_macro_argument); for_each_line_table_case (test_lexer_string_locations_non_string); for_each_line_table_case (test_lexer_string_locations_long_line); + for_each_line_table_case (test_lexer_string_locations_raw_string_one_line); + for_each_line_table_case (test_lexer_string_locations_raw_string_multiline); for_each_line_table_case (test_lexer_char_constants); test_reading_source_line (); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 77fa771..fb0bd45 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2016-11-17 David Malcolm + + * gcc.dg/plugin/diagnostic-test-string-literals-1.c + (test_raw_string_one_liner): New function. + (test_raw_string_multiline): New function. + 2016-11-17 Kyrylo Tkachov * gcc.target/aarch64/store_repeating_constant_1.c: New test. diff --git a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c index 3d5ff6d..3e44936 100644 --- a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c +++ b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c @@ -194,6 +194,39 @@ test_L (void) } void +test_raw_string_one_liner (void) +{ + /* Digits 0-9. */ + __emit_string_literal_range (R"foo(0123456789)foo", /* { dg-warning "range" } */ + 6, 4, 7); +/* { dg-begin-multiline-output "" } + __emit_string_literal_range (R"foo(0123456789)foo", + ~~^~ + { dg-end-multiline-output "" } */ +} + +void +test_raw_string_multiline (void) +{ + __emit_string_literal_range (R"foo( +hello +world +)foo", + 6, 4, 7); + /* { dg-error "unable to read substring location: range endpoints are on different lines" "" { target *-*-* } .-5 } */ + /* { dg-begin-multiline-output "" } + __emit_string_literal_range (R"foo( + ^~~~~~ + hello + ~~~~~ + world + ~~~~~ + )foo", + ~~~~~ + { dg-end-multiline-output "" } */ +} + +void test_macro (void) { #define START "01234" /* { dg-warning "range" } */ diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 7690b5d..6b92807 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,8 @@ +2016-11-17 David Malcolm + + * charset.c (cpp_interpret_string_1): Skip locations from + loc_reader when advancing 'p' when handling raw strings. + 2016-11-16 Jakub Jelinek PR bootstrap/72823 diff --git a/libcpp/charset.c b/libcpp/charset.c index e77270a..9cd1e10 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -1564,10 +1564,21 @@ cpp_interpret_string_1 (cpp_reader *pfile, const cpp_string *from, size_t count, /* Skip over 'R"'. */ p += 2; + if (loc_reader) + { + loc_reader->get_next (); + loc_reader->get_next (); + } prefix = p; while (*p != '(') - p++; + { + p++; + if (loc_reader) + loc_reader->get_next (); + } p++; + if (loc_reader) + loc_reader->get_next (); limit = from[i].text + from[i].len; if (limit >= p + (p - prefix) + 1) limit -= (p - prefix) + 1; -- 2.7.4