From ea9746f7fe85320d3fb55aa7f3f8422e048b19fb Mon Sep 17 00:00:00 2001 From: Evan Martin Date: Tue, 27 Dec 2011 15:45:30 -0800 Subject: [PATCH] de-escape backslashes in depfiles while parsing --- src/depfile_parser.cc | 193 ++++++++++++++++++++++++++------------------- src/depfile_parser.h | 5 +- src/depfile_parser.in.cc | 96 ++++++++++++++-------- src/depfile_parser_test.cc | 23 +++++- 4 files changed, 198 insertions(+), 119 deletions(-) diff --git a/src/depfile_parser.cc b/src/depfile_parser.cc index b547661..832ad65 100644 --- a/src/depfile_parser.cc +++ b/src/depfile_parser.cc @@ -24,34 +24,45 @@ // How do you end a line with a backslash? The netbsd Make docs suggest // reading the result of a shell command echoing a backslash! // -// Rather than implement the above, we do the simpler thing here. +// Rather than implement all of above, we do a simpler thing here: +// Backslashes escape a set of characters (see "escapes" defined below), +// otherwise they are passed through verbatim. // If anyone actually has depfiles that rely on the more complicated // behavior we can adjust this. bool DepfileParser::Parse(string* content, string* err) { - char* p = &(*content)[0]; - char* end = p + content->size(); - for (;;) { - const char* start = p; - char yych; - + // in: current parser input point. + // end: end of input. + char* in = &(*content)[0]; + char* end = in + content->size(); + while (in < end) { + // out: current output point (typically same as in, but can fall behind + // as we de-escape backslashes). + char* out = in; + // filename: start of the current parsed filename. + char* filename = out; + for (;;) { + // start: beginning of the current parsed span. + const char* start = in; + char yych; + { static const unsigned char yybm[] = { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 128, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 0, 0, 0, 0, 0, - 0, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 0, 64, 0, 0, 64, - 0, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 64, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 0, 0, 0, 0, 0, + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 0, 0, 0, 0, 128, + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -70,86 +81,108 @@ bool DepfileParser::Parse(string* content, string* err) { 0, 0, 0, 0, 0, 0, 0, 0, }; - if ((end - p) < 2) break; - yych = *p; - if (yych <= '@') { + yych = *in; + if (yych <= '[') { + if (yych <= ':') { + if (yych <= '*') goto yy6; + goto yy4; + } else { + if (yych <= '@') goto yy6; + if (yych <= 'Z') goto yy4; + goto yy6; + } + } else { + if (yych <= '_') { + if (yych <= '\\') goto yy2; + if (yych <= '^') goto yy6; + goto yy4; + } else { + if (yych <= '`') goto yy6; + if (yych <= 'z') goto yy4; + goto yy6; + } + } +yy2: + ++in; + if ((yych = *in) <= '$') { if (yych <= 0x1F) { - if (yych == '\n') goto yy4; - goto yy7; + if (yych != '\n') goto yy9; } else { - if (yych <= ' ') goto yy4; - if (yych <= '*') goto yy7; - if (yych <= ':') goto yy6; - goto yy7; + if (yych <= ' ') goto yy11; + if (yych <= '"') goto yy9; + goto yy11; } } else { - if (yych <= '^') { - if (yych <= 'Z') goto yy6; - if (yych != '\\') goto yy7; + if (yych <= 'Z') { + if (yych == '*') goto yy11; + goto yy9; } else { - if (yych == '`') goto yy7; - if (yych <= 'z') goto yy6; - goto yy7; + if (yych <= '\\') goto yy11; + if (yych == '|') goto yy11; + goto yy9; } } - ++p; - if ((yych = *p) == '\n') goto yy13; - goto yy10; yy3: { - // Got a filename. - int len = p - start; - if (start[len - 1] == ':') - len--; // Strip off trailing colon, if any. - - if (len == 0) - continue; // Drop isolated colons. - - if (!out_.str_) { - out_ = StringPiece(start, len); - } else { - ins_.push_back(StringPiece(start, len)); + // For any other character (e.g. whitespace), swallow it here, + // allowing the outer logic to loop around again. + break; } - continue; - } yy4: - ++p; - yych = *p; - goto yy12; + ++in; + yych = *in; + goto yy8; yy5: - { continue; } -yy6: - yych = *++p; - goto yy10; -yy7: - ++p; { - *err = "BUG: depfile lexer encountered unknown state"; - return false; - } -yy9: - ++p; - if (end <= p) break; - yych = *p; -yy10: - if (yybm[0+yych] & 64) { - goto yy9; + // Got a span of plain text. Copy it to out if necessary. + int len = in - start; + if (out < start) + memmove(out, start, len); + out += len; + continue; } +yy6: + yych = *++in; goto yy3; -yy11: - ++p; - if (end <= p) break; - yych = *p; -yy12: +yy7: + ++in; + yych = *in; +yy8: if (yybm[0+yych] & 128) { - goto yy11; + goto yy7; } goto yy5; -yy13: - ++p; - { continue; } +yy9: + ++in; + { + // Let backslash before other characters through verbatim. + *out++ = '\\'; + *out++ = yych; + continue; + } +yy11: + ++in; + { + // De-escape backslashed character. + *out++ = yych; + continue; + } + } + } + int len = out - filename; + if (len > 0 && filename[len - 1] == ':') + len--; // Strip off trailing colon, if any. + + if (len == 0) + continue; + + if (!out_.str_) { + out_ = StringPiece(filename, len); + } else { + ins_.push_back(StringPiece(filename, len)); + } } return true; } diff --git a/src/depfile_parser.h b/src/depfile_parser.h index 08bf68a..c900956 100644 --- a/src/depfile_parser.h +++ b/src/depfile_parser.h @@ -20,8 +20,9 @@ using namespace std; /// Parser for the dependency information emitted by gcc's -M flags. struct DepfileParser { - /// Parse an input file. Warning: may mutate the content in-place - /// and parsed StringPieces are pointers within it. + /// Parse an input file. Input must be NUL-terminated. + /// Warning: may mutate the content in-place and parsed StringPieces are + /// pointers within it. bool Parse(string* content, string* err); StringPiece out_; diff --git a/src/depfile_parser.in.cc b/src/depfile_parser.in.cc index 7ac95c6..c469a2c 100644 --- a/src/depfile_parser.in.cc +++ b/src/depfile_parser.in.cc @@ -23,51 +23,79 @@ // How do you end a line with a backslash? The netbsd Make docs suggest // reading the result of a shell command echoing a backslash! // -// Rather than implement the above, we do the simpler thing here. +// Rather than implement all of above, we do a simpler thing here: +// Backslashes escape a set of characters (see "escapes" defined below), +// otherwise they are passed through verbatim. // If anyone actually has depfiles that rely on the more complicated // behavior we can adjust this. bool DepfileParser::Parse(string* content, string* err) { - char* p = &(*content)[0]; - char* end = p + content->size(); - for (;;) { - const char* start = p; - char yych; - /*!re2c - re2c:define:YYCTYPE = "char"; - re2c:define:YYCURSOR = p; - re2c:define:YYLIMIT = end; + // in: current parser input point. + // end: end of input. + char* in = &(*content)[0]; + char* end = in + content->size(); + while (in < end) { + // out: current output point (typically same as in, but can fall behind + // as we de-escape backslashes). + char* out = in; + // filename: start of the current parsed filename. + char* filename = out; + for (;;) { + // start: beginning of the current parsed span. + const char* start = in; + char yych; + /*!re2c + re2c:define:YYCTYPE = "char"; + re2c:define:YYCURSOR = in; + re2c:define:YYLIMIT = end; - re2c:yyfill:parameter = 0; - re2c:define:YYFILL = break; + re2c:yyfill:enable = 0; - re2c:indent:top = 2; - re2c:indent:string = " "; + re2c:indent:top = 2; + re2c:indent:string = " "; - re2c:yych:emit = 0; + re2c:yych:emit = 0; - '\\\n' { continue; } - [ \n]+ { continue; } - [a-zA-Z0-9+,/\\_:.-]+ { - // Got a filename. - int len = p - start; - if (start[len - 1] == ':') - len--; // Strip off trailing colon, if any. + escape = [ \\#*$[|]; - if (len == 0) - continue; // Drop isolated colons. - - if (!out_.str_) { - out_ = StringPiece(start, len); - } else { - ins_.push_back(StringPiece(start, len)); + '\\' escape { + // De-escape backslashed character. + *out++ = yych; + continue; } - continue; + '\\'. { + // Let backslash before other characters through verbatim. + *out++ = '\\'; + *out++ = yych; + continue; + } + [a-zA-Z0-9+,/_:.-]+ { + // Got a span of plain text. Copy it to out if necessary. + int len = in - start; + if (out < start) + memmove(out, start, len); + out += len; + continue; + } + [^] { + // For any other character (e.g. whitespace), swallow it here, + // allowing the outer logic to loop around again. + break; + } + */ } - [^] { - *err = "BUG: depfile lexer encountered unknown state"; - return false; + + int len = out - filename; + if (len > 0 && filename[len - 1] == ':') + len--; // Strip off trailing colon, if any. + + if (len == 0) + continue; + + if (!out_.str_) { + out_ = StringPiece(filename, len); + } else { + ins_.push_back(StringPiece(filename, len)); } - */ } return true; } diff --git a/src/depfile_parser_test.cc b/src/depfile_parser_test.cc index 3549f97..43e677c 100644 --- a/src/depfile_parser_test.cc +++ b/src/depfile_parser_test.cc @@ -73,15 +73,32 @@ TEST_F(DepfileParserTest, BackSlashes) { EXPECT_EQ(4u, parser_.ins_.size()); } -TEST_F(DepfileParserTest, DISABLED_Spaces) { +TEST_F(DepfileParserTest, Spaces) { string err; EXPECT_TRUE(Parse( -"foo\\ bar: a\\ b a b", +"a\\ bc\\ def: a\\ b c d", &err)); ASSERT_EQ("", err); - EXPECT_EQ("foo bar", + EXPECT_EQ("a bc def", parser_.out_.AsString()); ASSERT_EQ(3u, parser_.ins_.size()); EXPECT_EQ("a b", parser_.ins_[0].AsString()); + EXPECT_EQ("c", + parser_.ins_[1].AsString()); + EXPECT_EQ("d", + parser_.ins_[2].AsString()); +} + +TEST_F(DepfileParserTest, Escapes) { + // Put backslashes before a variety of characters, see which ones make + // it through. + string err; + EXPECT_TRUE(Parse( +"\\!\\@\\#\\$\\%\\^\\&\\\\", + &err)); + ASSERT_EQ("", err); + EXPECT_EQ("\\!\\@#$\\%\\^\\&\\", + parser_.out_.AsString()); + ASSERT_EQ(0u, parser_.ins_.size()); } -- 2.7.4