static void
test_escaping_bytes_1 (const line_table_case &case_)
{
- const char content[] = "before\0\1\2\3\r\x80\xff""after\n";
+ const char content[] = "before\0\1\2\3\v\x80\xff""after\n";
const size_t sz = sizeof (content);
temp_source_file tmp (SELFTEST_LOCATION, ".c", content, sz);
line_table_test ltt (case_);
if (finish > LINE_MAP_MAX_LOCATION_WITH_COLS)
return;
- /* Locations of the NUL and \r bytes. */
+ /* Locations of the NUL and \v bytes. */
location_t nul_loc
= linemap_position_for_line_and_column (line_table, ord_map, 1, 7);
- location_t r_loc
+ location_t v_loc
= linemap_position_for_line_and_column (line_table, ord_map, 1, 11);
gcc_rich_location richloc (nul_loc);
- richloc.add_range (r_loc);
+ richloc.add_range (v_loc);
{
test_diagnostic_context dc;
diagnostic_show_locus (&dc, &richloc, DK_ERROR);
- ASSERT_STREQ (" before \1\2\3 \x80\xff""after\n"
+ ASSERT_STREQ (" before \1\2\3\v\x80\xff""after\n"
" ^ ~\n",
pp_formatted_text (dc.printer));
}
dc.escape_format = DIAGNOSTICS_ESCAPE_FORMAT_UNICODE;
diagnostic_show_locus (&dc, &richloc, DK_ERROR);
ASSERT_STREQ
- (" before<U+0000><U+0001><U+0002><U+0003><U+000D><80><ff>after\n"
+ (" before<U+0000><U+0001><U+0002><U+0003><U+000B><80><ff>after\n"
" ^~~~~~~~ ~~~~~~~~\n",
pp_formatted_text (dc.printer));
}
test_diagnostic_context dc;
dc.escape_format = DIAGNOSTICS_ESCAPE_FORMAT_BYTES;
diagnostic_show_locus (&dc, &richloc, DK_ERROR);
- ASSERT_STREQ (" before<00><01><02><03><0d><80><ff>after\n"
+ ASSERT_STREQ (" before<00><01><02><03><0b><80><ff>after\n"
" ^~~~ ~~~~\n",
pp_formatted_text (dc.printer));
}
return read_data ();
}
+/* Helper function for file_cache_slot::get_next_line (), to find the end of
+ the next line. Returns with the memchr convention, i.e. nullptr if a line
+ terminator was not found. We need to determine line endings in the same
+ manner that libcpp does: any of \n, \r\n, or \r is a line ending. */
+
+static char *
+find_end_of_line (char *s, size_t len)
+{
+ for (const auto end = s + len; s != end; ++s)
+ {
+ if (*s == '\n')
+ return s;
+ if (*s == '\r')
+ {
+ const auto next = s + 1;
+ if (next == end)
+ {
+ /* Don't find the line ending if \r is the very last character
+ in the buffer; we do not know if it's the end of the file or
+ just the end of what has been read so far, and we wouldn't
+ want to break in the middle of what's actually a \r\n
+ sequence. Instead, we will handle the case of a file ending
+ in a \r later. */
+ break;
+ }
+ return (*next == '\n' ? next : s);
+ }
+ }
+ return nullptr;
+}
+
/* Read a new line from file FP, using C as a cache for the data
coming from the file. Upon successful completion, *LINE is set to
the beginning of the line found. *LINE points directly in the
char *next_line_start = NULL;
size_t len = 0;
- char *line_end = (char *) memchr (line_start, '\n', remaining_size);
+ char *line_end = find_end_of_line (line_start, remaining_size);
if (line_end == NULL)
{
- /* We haven't found the end-of-line delimiter in the cache.
- Fill the cache with more data from the file and look for the
- '\n'. */
+ /* We haven't found an end-of-line delimiter in the cache.
+ Fill the cache with more data from the file and look again. */
while (maybe_read_data ())
{
line_start = m_data + m_line_start_idx;
remaining_size = m_nb_read - m_line_start_idx;
- line_end = (char *) memchr (line_start, '\n', remaining_size);
+ line_end = find_end_of_line (line_start, remaining_size);
if (line_end != NULL)
{
next_line_start = line_end + 1;
}
if (line_end == NULL)
{
- /* We've loadded all the file into the cache and still no
- '\n'. Let's say the line ends up at one byte passed the
+ /* We've loaded all the file into the cache and still no
+ terminator. Let's say the line ends up at one byte past the
end of the file. This is to stay consistent with the case
- of when the line ends up with a '\n' and line_end points to
- that terminal '\n'. That consistency is useful below in
- the len calculation. */
- line_end = m_data + m_nb_read ;
- m_missing_trailing_newline = true;
+ of when the line ends up with a terminator and line_end points to
+ that. That consistency is useful below in the len calculation.
+
+ If the file ends in a \r, we didn't identify it as a line
+ terminator above, so do that now instead. */
+ line_end = m_data + m_nb_read;
+ if (m_nb_read && line_end[-1] == '\r')
+ {
+ --line_end;
+ m_missing_trailing_newline = false;
+ }
+ else
+ m_missing_trailing_newline = true;
}
else
m_missing_trailing_newline = false;
if (m_fp && ferror (m_fp))
return false;
- /* At this point, we've found the end of the of line. It either
- points to the '\n' or to one byte after the last byte of the
- file. */
+ /* At this point, we've found the end of the of line. It either points to
+ the line terminator or to one byte after the last byte of the file. */
gcc_assert (line_end != NULL);
len = line_end - line_start;