2 Copyright (c) 2018-2019 Intel Corporation
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
8 http://www.apache.org/licenses/LICENSE-2.0
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
21 from mo.utils.error import Error
24 class SimpleProtoParser(object):
26 This is a simple Proto2 parser that has limited functionality and is intended to parse configuration files for the
27 models created with Object Detection API only. The result of the parser is the dictionary.
38 def _convert_value_to_correct_datatype(value: str):
40 Converts string representation of the token to a value with proper data type.
41 :param value: string representation to be converted.
42 :return: converted to a correct data type value.
49 result = ast.literal_eval(value)
51 except Exception: # if it is not possible to evaluate the value then consider it as a string
55 def _convert_values_to_correct_datatypes(d: dict):
57 Convert dictionary with values to correct data types.
58 :param d: dictionary with values.
61 for key, value in d.items():
62 if isinstance(value, dict):
63 __class__._convert_values_to_correct_datatypes(value)
64 elif isinstance(value, list):
65 d[key] = [__class__._convert_value_to_correct_datatype(item) for item in value]
67 d[key] = __class__._convert_value_to_correct_datatype(value)
69 def _add_non_empty_token(self, token: str):
71 Add token to the list of tokens if it is non-empty.
72 :param token: token to add
76 self._tokens.append(token)
78 def _parse_list(self, result: list, token_ind: int):
80 while token_ind < len(self._tokens):
81 cur_token = self._tokens[token_ind]
85 if prev_token == ',' or prev_token == '[':
86 raise Error('Missing value in the list at position {}'.format(token_ind))
88 result.append(cur_token)
90 prev_token = cur_token
93 def _parse_tokens(self, result: dict, token_ind: int, depth: int=0):
95 Internal function that parses tokens.
96 :param result: current dictionary where to store parse result.
97 :param token_ind: index of the token from the tokens list to start parsing from.
98 :return: token index to continue parsing from.
100 while token_ind < len(self._tokens):
101 cur_token = self._tokens[token_ind]
102 if cur_token == ',': # redundant commas that we simply ignore everywhere except list "[x, y, z...]"
107 next_token = self._tokens[token_ind + 1]
108 if next_token == '{':
109 result[cur_token] = dict()
110 token_ind = self._parse_tokens(result[cur_token], token_ind + 2, depth + 1)
111 elif next_token == ':':
112 next_next_token = self._tokens[token_ind + 2]
113 if next_next_token == '[':
114 result[cur_token] = list()
115 token_ind = self._parse_list(result[cur_token], token_ind + 3)
117 if cur_token not in result:
118 result[cur_token] = self._tokens[token_ind + 2]
120 if not isinstance(result[cur_token], list):
121 old_val = result[cur_token]
122 result[cur_token] = [old_val]
123 result[cur_token].append(self._tokens[token_ind + 2])
126 raise Error('Wrong character "{}" in position {}'.format(next_token, token_ind))
128 raise Error('Input/output braces mismatch.')
131 def _convert_tokens_to_dict(self):
133 Convert list of tokens into a dictionary with proper structure.
134 Then converts values in the dictionary to values of correct data types. For example, 'false' -> False,
135 'true' -> true, '0.004' -> 0.004, etc.
136 :return: True if conversion is successful.
139 self._parse_tokens(self._result, 0)
140 except Exception as ex:
141 log.error('Failed to convert tokens to dictionary: {}'.format(str(ex)))
143 self._convert_values_to_correct_datatypes(self._result)
146 def _split_to_tokens(self, file_content: str):
148 The function gets file content as string and converts it to the list of tokens (all tokens are still strings).
149 :param file_content: file content as a string
152 string_started = False
153 for line in file_content.split('\n'):
156 if line.startswith('#'): # skip comments
160 if char == '"': # string ended
161 self._add_non_empty_token(cur_token)
162 cur_token = '' # start of a new string
163 string_started = False
167 self._add_non_empty_token(cur_token)
168 cur_token = '' # start of a new string
169 string_started = True
170 elif (char == " " and not string_started) or char == '\n':
171 self._add_non_empty_token(cur_token)
173 elif char in [':', '{', '}', '[', ']', ',']:
174 self._add_non_empty_token(cur_token)
175 self._tokens.append(char)
179 self._add_non_empty_token(cur_token)
180 self._add_non_empty_token(cur_token)
182 def parse_from_string(self, file_content: str):
184 Parses the proto text file passed as a string.
185 :param file_content: content of the file.
186 :return: dictionary with file content or None if the file cannot be parsed.
188 self._split_to_tokens(file_content)
189 if not self._convert_tokens_to_dict():
190 log.error('Failed to generate dictionary representation of file.')
194 def parse_file(self, file_name: str):
196 Parses the specified file and returns its representation as dictionary.
197 :param file_name: file name to parse.
198 :return: dictionary with file content or None if the file cannot be parsed.
200 if not os.path.exists(file_name):
201 log.error('File {} does not exist'.format(file_name))
204 with open(file_name) as file:
205 file_content = file.readlines()
206 except Exception as ex:
207 log.error('Failed to read file {}: {}'.format(file_name, str(ex)))
209 return self.parse_from_string(''.join(file_content))