From 47ba90bd530cfd269f40e61e5d41e87298812ddb Mon Sep 17 00:00:00 2001 From: Evan Martin Date: Thu, 29 Dec 2011 12:44:15 -0800 Subject: [PATCH] handle nuls more carefully in re2c rules - '.' in re2c matches anything except \n, which means it matches \000. Be more careful about which characters we match. - The fallback rule [^] reads ahead another character, which means it can read past the trailing \000. Add a separate rule to match it specifically. This was found by Valgrind. --- src/depfile_parser.cc | 51 +++++++++++++++++++++++++++--------------------- src/depfile_parser.in.cc | 6 +++++- 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/src/depfile_parser.cc b/src/depfile_parser.cc index 832ad65..4e842f8 100644 --- a/src/depfile_parser.cc +++ b/src/depfile_parser.cc @@ -84,42 +84,44 @@ bool DepfileParser::Parse(string* content, string* err) { yych = *in; if (yych <= '[') { if (yych <= ':') { - if (yych <= '*') goto yy6; + if (yych <= 0x00) goto yy6; + if (yych <= '*') goto yy8; goto yy4; } else { - if (yych <= '@') goto yy6; + if (yych <= '@') goto yy8; if (yych <= 'Z') goto yy4; - goto yy6; + goto yy8; } } else { if (yych <= '_') { if (yych <= '\\') goto yy2; - if (yych <= '^') goto yy6; + if (yych <= '^') goto yy8; goto yy4; } else { - if (yych <= '`') goto yy6; + if (yych <= '`') goto yy8; if (yych <= 'z') goto yy4; - goto yy6; + goto yy8; } } yy2: ++in; if ((yych = *in) <= '$') { - if (yych <= 0x1F) { - if (yych != '\n') goto yy9; + if (yych <= '\n') { + if (yych <= 0x00) goto yy3; + if (yych <= '\t') goto yy11; } else { - if (yych <= ' ') goto yy11; - if (yych <= '"') goto yy9; - goto yy11; + if (yych == ' ') goto yy13; + if (yych <= '"') goto yy11; + goto yy13; } } else { if (yych <= 'Z') { - if (yych == '*') goto yy11; - goto yy9; + if (yych == '*') goto yy13; + goto yy11; } else { - if (yych <= '\\') goto yy11; - if (yych == '|') goto yy11; - goto yy9; + if (yych <= '\\') goto yy13; + if (yych == '|') goto yy13; + goto yy11; } } yy3: @@ -131,7 +133,7 @@ yy3: yy4: ++in; yych = *in; - goto yy8; + goto yy10; yy5: { // Got a span of plain text. Copy it to out if necessary. @@ -142,17 +144,22 @@ yy5: continue; } yy6: + ++in; + { + break; + } +yy8: yych = *++in; goto yy3; -yy7: +yy9: ++in; yych = *in; -yy8: +yy10: if (yybm[0+yych] & 128) { - goto yy7; + goto yy9; } goto yy5; -yy9: +yy11: ++in; { // Let backslash before other characters through verbatim. @@ -160,7 +167,7 @@ yy9: *out++ = yych; continue; } -yy11: +yy13: ++in; { // De-escape backslashed character. diff --git a/src/depfile_parser.in.cc b/src/depfile_parser.in.cc index c469a2c..b310c58 100644 --- a/src/depfile_parser.in.cc +++ b/src/depfile_parser.in.cc @@ -55,6 +55,7 @@ bool DepfileParser::Parse(string* content, string* err) { re2c:yych:emit = 0; + nul = "\000"; escape = [ \\#*$[|]; '\\' escape { @@ -62,7 +63,7 @@ bool DepfileParser::Parse(string* content, string* err) { *out++ = yych; continue; } - '\\'. { + '\\'[^\000\n] { // Let backslash before other characters through verbatim. *out++ = '\\'; *out++ = yych; @@ -76,6 +77,9 @@ bool DepfileParser::Parse(string* content, string* err) { out += len; continue; } + nul { + break; + } [^] { // For any other character (e.g. whitespace), swallow it here, // allowing the outer logic to loop around again. -- 2.7.4