1 /* Generated by re2c 1.3 */
2 // Copyright 2011 Google Inc. All Rights Reserved.
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
8 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
16 #include "depfile_parser.h"
21 DepfileParser::DepfileParser(DepfileParserOptions options)
26 // A note on backslashes in Makefiles, from reading the docs:
27 // Backslash-newline is the line continuation character.
28 // Backslash-# escapes a # (otherwise meaningful as a comment start).
29 // Backslash-% escapes a % (otherwise meaningful as a special).
30 // Finally, quoting the GNU manual, "Backslashes that are not in danger
31 // of quoting ‘%’ characters go unmolested."
32 // How do you end a line with a backslash? The netbsd Make docs suggest
33 // reading the result of a shell command echoing a backslash!
35 // Rather than implement all of above, we follow what GCC/Clang produces:
36 // Backslashes escape a space or hash sign.
37 // When a space is preceded by 2N+1 backslashes, it is represents N backslashes
39 // When a space is preceded by 2N backslashes, it represents 2N backslashes at
40 // the end of a filename.
41 // A hash sign is escaped by a single backslash. All other backslashes remain
44 // If anyone actually has depfiles that rely on the more complicated
45 // behavior we can adjust this.
46 bool DepfileParser::Parse(string* content, string* err) {
47 // in: current parser input point.
49 // parsing_targets: whether we are parsing targets or dependencies.
50 char* in = &(*content)[0];
51 char* end = in + content->size();
52 bool have_target = false;
53 bool parsing_targets = true;
54 bool poisoned_input = false;
56 bool have_newline = false;
57 // out: current output point (typically same as in, but can fall behind
58 // as we de-escape backslashes).
60 // filename: start of the current parsed filename.
63 // start: beginning of the current parsed span.
64 const char* start = in;
65 char* yymarker = NULL;
69 static const unsigned char yybm[] = {
70 0, 0, 0, 0, 0, 0, 0, 0,
71 0, 0, 0, 0, 0, 0, 0, 0,
72 0, 0, 0, 0, 0, 0, 0, 0,
73 0, 0, 0, 0, 0, 0, 0, 0,
74 0, 128, 0, 0, 0, 128, 0, 0,
75 128, 128, 0, 128, 128, 128, 128, 128,
76 128, 128, 128, 128, 128, 128, 128, 128,
77 128, 128, 128, 0, 0, 128, 0, 0,
78 128, 128, 128, 128, 128, 128, 128, 128,
79 128, 128, 128, 128, 128, 128, 128, 128,
80 128, 128, 128, 128, 128, 128, 128, 128,
81 128, 128, 128, 128, 0, 128, 0, 128,
82 0, 128, 128, 128, 128, 128, 128, 128,
83 128, 128, 128, 128, 128, 128, 128, 128,
84 128, 128, 128, 128, 128, 128, 128, 128,
85 128, 128, 128, 128, 0, 128, 128, 0,
86 128, 128, 128, 128, 128, 128, 128, 128,
87 128, 128, 128, 128, 128, 128, 128, 128,
88 128, 128, 128, 128, 128, 128, 128, 128,
89 128, 128, 128, 128, 128, 128, 128, 128,
90 128, 128, 128, 128, 128, 128, 128, 128,
91 128, 128, 128, 128, 128, 128, 128, 128,
92 128, 128, 128, 128, 128, 128, 128, 128,
93 128, 128, 128, 128, 128, 128, 128, 128,
94 128, 128, 128, 128, 128, 128, 128, 128,
95 128, 128, 128, 128, 128, 128, 128, 128,
96 128, 128, 128, 128, 128, 128, 128, 128,
97 128, 128, 128, 128, 128, 128, 128, 128,
98 128, 128, 128, 128, 128, 128, 128, 128,
99 128, 128, 128, 128, 128, 128, 128, 128,
100 128, 128, 128, 128, 128, 128, 128, 128,
101 128, 128, 128, 128, 128, 128, 128, 128,
104 if (yybm[0+yych] & 128) {
109 if (yych >= 0x01) goto yy4;
111 if (yych <= '\n') goto yy6;
112 if (yych <= '\f') goto yy4;
117 if (yych <= '#') goto yy4;
120 if (yych <= '?') goto yy4;
121 if (yych <= '\\') goto yy13;
133 // For any other character (e.g. whitespace), swallow it here,
134 // allowing the outer logic to loop around again.
140 // A newline ends the current file name and the current rule.
146 if (yych == '\n') goto yy6;
150 if (yybm[0+yych] & 128) {
155 // Got a span of plain text.
156 int len = (int)(in - start);
157 // Need to shift it over if we're overwriting backslashes.
159 memmove(out, start, len);
165 if (yych == '$') goto yy14;
168 yych = *(yymarker = ++in);
171 if (yych <= 0x00) goto yy5;
172 if (yych <= '\t') goto yy16;
175 if (yych == '\r') goto yy19;
176 if (yych <= 0x1F) goto yy16;
181 if (yych == '#') goto yy23;
184 if (yych <= ':') goto yy25;
185 if (yych == '\\') goto yy27;
192 // De-escape dollar character.
202 // A line continuation ends the current file name.
207 if (yych == '\n') goto yy17;
213 // 2N+1 backslashes plus space -> N backslashes plus space.
214 int len = (int)(in - start);
217 memset(out, '\\', n);
225 // De-escape hash sign, but preserve other leading backslashes.
226 int len = (int)(in - start);
227 if (len > 2 && out < start)
228 memset(out, '\\', len - 2);
236 if (yych <= 0x00) goto yy28;
237 if (yych <= 0x08) goto yy26;
238 if (yych <= '\n') goto yy28;
240 if (yych <= '\r') goto yy28;
241 if (yych == ' ') goto yy28;
245 // De-escape colon sign, but preserve other leading backslashes.
246 // Regular expression uses lookahead to make sure that no whitespace
247 // nor EOF follows. In that case it'd be the : at the end of a target
248 int len = (int)(in - start);
249 if (len > 2 && out < start)
250 memset(out, '\\', len - 2);
259 if (yych <= 0x00) goto yy11;
260 if (yych <= '\t') goto yy16;
263 if (yych == '\r') goto yy11;
264 if (yych <= 0x1F) goto yy16;
269 if (yych == '#') goto yy23;
272 if (yych <= ':') goto yy25;
273 if (yych == '\\') goto yy32;
280 // Backslash followed by : and whitespace.
281 // It is therefore normal text and not an escaped colon
282 int len = (int)(in - start - 1);
283 // Need to shift it over if we're overwriting backslashes.
285 memmove(out, start, len);
287 if (*(in - 1) == '\n')
294 // 2N backslashes plus space -> 2N backslashes, end of filename.
295 int len = (int)(in - start);
297 memset(out, '\\', len - 1);
305 if (yych <= 0x00) goto yy11;
306 if (yych <= '\t') goto yy16;
309 if (yych == '\r') goto yy11;
310 if (yych <= 0x1F) goto yy16;
315 if (yych == '#') goto yy23;
318 if (yych <= ':') goto yy25;
319 if (yych == '\\') goto yy27;
327 int len = (int)(out - filename);
328 const bool is_dependency = !parsing_targets;
329 if (len > 0 && filename[len - 1] == ':') {
330 len--; // Strip off trailing colon, if any.
331 parsing_targets = false;
336 StringPiece piece = StringPiece(filename, len);
337 // If we've seen this as an input before, skip it.
338 std::vector<StringPiece>::iterator pos = std::find(ins_.begin(), ins_.end(), piece);
339 if (pos == ins_.end()) {
341 if (poisoned_input) {
342 *err = "inputs may not also have inputs";
346 ins_.push_back(piece);
348 // Check for a new output.
349 if (std::find(outs_.begin(), outs_.end(), piece) == outs_.end())
350 outs_.push_back(piece);
352 } else if (!is_dependency) {
353 // We've passed an input on the left side; reject new inputs.
354 poisoned_input = true;
359 // A newline ends a rule so the next filename will be a new target.
360 parsing_targets = true;
361 poisoned_input = false;
365 *err = "expected ':' in depfile";