From 470a60b2c4fa561455faece02379c6b34872c58a Mon Sep 17 00:00:00 2001 From: David Malcolm Date: Mon, 12 Dec 2016 17:37:48 +0000 Subject: [PATCH] re PR preprocessor/78680 (ICE in get_substring_ranges_for_loc, at input.c:1398) Fix for PR preprocessor/78680 PR preprocessor/78680 identifies a crash when attempting to issue a -Wformat warning, where the format string includes a string token split across multiple physical source lines via backslash-continued lines. The issue is that libcpp is generating bogus range information for such tokens. For example, in: void fn1() { __builtin_printf("\ %ld.\n\ 2\n"); }; the range of the string token is printed as: __builtin_printf("\ ^~ whereas the range ought to be: __builtin_printf("\ ^~ %ld.\n\ ~~~~~~~ 2\n"); }; ~~~~ The root cause is that the line notes expressing the update of the buffer in lex.c aren't yet updated when the end-point of the token is computed 3095 tok_range.m_finish 3096 = linemap_position_for_column (pfile->line_table, 3097 CPP_BUF_COLUMN (buffer, buffer->cur)); so that the physical line is still regarded as that of the start of the token, and, where CPP_BUF_COLUMN uses (BUF)->line_base, line_base is still the location of the first physical line in the and hence the column information is too large (as if it were the offset in the *logical* line). (the printed range is somewhat misleading; the actual buggy range extends beyond the "\ in the line, but within diagnostic-show-locus.c layout::print_annotation_line only prints up to the xbound set by layout::print_source_line and so truncates most of the buggy range). The fix is to ensure that line notes are handled before calculating the end-point of the token range. This leads to the range for the string token being correctly computed, as: __builtin_printf("\ ^~ %ld.\n\ ~~~~~~~ 2\n"); }; ~~~~ and this leads to get_substring_ranges_for_loc failing gracefully, rather than crashing. gcc/testsuite/ChangeLog: PR preprocessor/78680 * gcc.dg/format/pr78680.c: New test case. * gcc.dg/plugin/diagnostic-test-expressions-1.c (test_multiline_token): New function. * gcc.dg/plugin/diagnostic-test-string-literals-1.c (test_backslash_continued_logical_lines): New function. libcpp/ChangeLog: PR preprocessor/78680 * lex.c (_cpp_lex_direct): Ensure line notes are processed before computing the end-point of the token. From-SVN: r243567 --- gcc/testsuite/ChangeLog | 9 +++++++++ gcc/testsuite/gcc.dg/format/pr78680.c | 16 ++++++++++++++++ .../gcc.dg/plugin/diagnostic-test-expressions-1.c | 19 +++++++++++++++++++ .../plugin/diagnostic-test-string-literals-1.c | 20 ++++++++++++++++++++ libcpp/ChangeLog | 6 ++++++ libcpp/lex.c | 7 +++++++ 6 files changed, 77 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/format/pr78680.c diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 982a203..f1f5184 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2016-12-12 David Malcolm + + PR preprocessor/78680 + * gcc.dg/format/pr78680.c: New test case. + * gcc.dg/plugin/diagnostic-test-expressions-1.c + (test_multiline_token): New function. + * gcc.dg/plugin/diagnostic-test-string-literals-1.c + (test_backslash_continued_logical_lines): New function. + 2016-12-12 Nathan Sidwell PR c++/78252 diff --git a/gcc/testsuite/gcc.dg/format/pr78680.c b/gcc/testsuite/gcc.dg/format/pr78680.c new file mode 100644 index 0000000..0c599f3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/format/pr78680.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -Wall -Wextra -fdiagnostics-show-caret" } */ + +void fn1() { + __builtin_printf("\ + %ld.\n\ + 2\n"); }; +/* { dg-warning "expects a matching" "" { target *-*-* } .-3 } */ +/* { dg-begin-multiline-output "" } + __builtin_printf("\ + ^~ + %ld.\n\ + ~~~~~~~ + 2\n"); }; + ~~~~ + { dg-end-multiline-output "" } */ diff --git a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-expressions-1.c b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-expressions-1.c index 9372936..afbe0f7 100644 --- a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-expressions-1.c +++ b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-expressions-1.c @@ -689,3 +689,22 @@ void test_multiple_ordinary_maps (void) ~~ { dg-end-multiline-output "" } */ } + +/* Verify that we correctly handle a token that spans multiple + physical lines. */ + +const char *test_multiline_token (void) +{ + __emit_expression_range (0, "foo\ +bar\ +baz"); +/* { dg-warning "range" "" { target *-*-* } .-3 } */ +/* { dg-begin-multiline-output "" } + __emit_expression_range (0, "foo\ + ^~~~~ + bar\ + ~~~~ + baz"); + ~~~~ + { dg-end-multiline-output "" } */ +} diff --git a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c index 76a085e..03f042a 100644 --- a/gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c +++ b/gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c @@ -272,3 +272,23 @@ test_terminator_location (void) ^ { dg-end-multiline-output "" } */ } + +/* Verify that we fail gracefully when a string literal token is split + across multiple physical lines. */ + +void +test_backslash_continued_logical_lines (void) +{ + __emit_string_literal_range ("\ +01234\ +56789", 6, 6, 7); + /* { dg-error "unable to read substring location: range endpoints are on different lines" "" { target *-*-* } .-3 } */ + /* { dg-begin-multiline-output "" } + __emit_string_literal_range ("\ + ^~ + 01234\ + ~~~~~~ + 56789", 6, 6, 7); + ~~~~~~ + { dg-end-multiline-output "" } */ +} diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index 1a7e737..6368122 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,9 @@ +2016-12-12 David Malcolm + + PR preprocessor/78680 + * lex.c (_cpp_lex_direct): Ensure line notes are processed before + computing the end-point of the token. + 2016-11-23 Paolo Bonzini * include/cpplib.h (struct cpp_options): Add new member diff --git a/libcpp/lex.c b/libcpp/lex.c index cea8848..ae45892 100644 --- a/libcpp/lex.c +++ b/libcpp/lex.c @@ -3089,6 +3089,13 @@ _cpp_lex_direct (cpp_reader *pfile) break; } + /* Ensure that any line notes are processed, so that we have the + correct physical line/column for the end-point of the token even + when a logical line is split via one or more backslashes. */ + if (buffer->cur >= buffer->notes[buffer->cur_note].pos + && !pfile->overlaid_buffer) + _cpp_process_line_notes (pfile, false); + source_range tok_range; tok_range.m_start = result->src_loc; if (result->src_loc >= RESERVED_LOCATION_COUNT) -- 2.7.4