model-optimizer/mo/utils/simple_proto_parser.py

   1 """
   2  Copyright (c) 2018-2019 Intel Corporation
   3
   4  Licensed under the Apache License, Version 2.0 (the "License");
   5  you may not use this file except in compliance with the License.
   6  You may obtain a copy of the License at
   7
   8       http://www.apache.org/licenses/LICENSE-2.0
   9
  10  Unless required by applicable law or agreed to in writing, software
  11  distributed under the License is distributed on an "AS IS" BASIS,
  12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  See the License for the specific language governing permissions and
  14  limitations under the License.
  15 """
  16
  17 import ast
  18 import logging as log
  19 import os
  20
  21 from mo.utils.error import Error
  22
  23
  24 class SimpleProtoParser(object):
  25     """
  26     This is a simple Proto2 parser that has limited functionality and is intended to parse configuration files for the
  27     models created with Object Detection API only. The result of the parser is the dictionary.
  28     """
  29
  30     _tokens = list()
  31     _result = dict()
  32
  33     def __init__(self):
  34         self._tokens = list()
  35         self._result = dict()
  36
  37     @staticmethod
  38     def _convert_value_to_correct_datatype(value: str):
  39         """
  40         Converts string representation of the token to a value with proper data type.
  41         :param value: string representation to be converted.
  42         :return: converted to a correct data type value.
  43         """
  44         if value == 'true':
  45             return True
  46         if value == 'false':
  47             return False
  48         try:
  49             result = ast.literal_eval(value)
  50             return result
  51         except Exception:  # if it is not possible to evaluate the value then consider it as a string
  52             return value
  53
  54     @staticmethod
  55     def _convert_values_to_correct_datatypes(d: dict):
  56         """
  57         Convert dictionary with values to correct data types.
  58         :param d: dictionary with values.
  59         :return: None
  60         """
  61         for key, value in d.items():
  62             if isinstance(value, dict):
  63                 __class__._convert_values_to_correct_datatypes(value)
  64             elif isinstance(value, list):
  65                 d[key] = [__class__._convert_value_to_correct_datatype(item) for item in value]
  66             else:
  67                 d[key] = __class__._convert_value_to_correct_datatype(value)
  68
  69     def _add_non_empty_token(self, token: str):
  70         """
  71         Add token to the list of tokens if it is non-empty.
  72         :param token: token to add
  73         :return: None
  74         """
  75         if token != "":
  76             self._tokens.append(token)
  77
  78     def _parse_list(self, result: list, token_ind: int):
  79         prev_token = '['
  80         while token_ind < len(self._tokens):
  81             cur_token = self._tokens[token_ind]
  82             if cur_token == ']':
  83                 return token_ind + 1
  84             if cur_token == ',':
  85                 if prev_token == ',' or prev_token == '[':
  86                     raise Error('Missing value in the list at position {}'.format(token_ind))
  87             else:
  88                 result.append(cur_token)
  89             token_ind += 1
  90             prev_token = cur_token
  91         return token_ind
  92
  93     def _parse_tokens(self, result: dict, token_ind: int, depth: int=0):
  94         """
  95         Internal function that parses tokens.
  96         :param result: current dictionary where to store parse result.
  97         :param token_ind: index of the token from the tokens list to start parsing from.
  98         :return: token index to continue parsing from.
  99         """
 100         while token_ind < len(self._tokens):
 101             cur_token = self._tokens[token_ind]
 102             if cur_token == ',':  # redundant commas that we simply ignore everywhere except list "[x, y, z...]"
 103                 token_ind += 1
 104                 continue
 105             if cur_token == '}':
 106                 return token_ind + 1
 107             next_token = self._tokens[token_ind + 1]
 108             if next_token == '{':
 109                 result[cur_token] = dict()
 110                 token_ind = self._parse_tokens(result[cur_token], token_ind + 2, depth + 1)
 111             elif next_token == ':':
 112                 next_next_token = self._tokens[token_ind + 2]
 113                 if next_next_token == '[':
 114                     result[cur_token] = list()
 115                     token_ind = self._parse_list(result[cur_token], token_ind + 3)
 116                 else:
 117                     if cur_token not in result:
 118                         result[cur_token] = self._tokens[token_ind + 2]
 119                     else:
 120                         if not isinstance(result[cur_token], list):
 121                             old_val = result[cur_token]
 122                             result[cur_token] = [old_val]
 123                         result[cur_token].append(self._tokens[token_ind + 2])
 124                     token_ind += 3
 125             else:
 126                 raise Error('Wrong character "{}" in position {}'.format(next_token, token_ind))
 127         if depth != 0:
 128             raise Error('Input/output braces mismatch.')
 129         return token_ind
 130
 131     def _convert_tokens_to_dict(self):
 132         """
 133         Convert list of tokens into a dictionary with proper structure.
 134         Then converts values in the dictionary to values of correct data types. For example, 'false' -> False,
 135         'true' -> true, '0.004' -> 0.004, etc.
 136         :return: True if conversion is successful.
 137         """
 138         try:
 139             self._parse_tokens(self._result, 0)
 140         except Exception as ex:
 141             log.error('Failed to convert tokens to dictionary: {}'.format(str(ex)))
 142             return False
 143         self._convert_values_to_correct_datatypes(self._result)
 144         return True
 145
 146     def _split_to_tokens(self, file_content: str):
 147         """
 148         The function gets file content as string and converts it to the list of tokens (all tokens are still strings).
 149         :param file_content: file content as a string
 150         """
 151         cur_token = ''
 152         string_started = False
 153         for line in file_content.split('\n'):
 154             cur_token = ''
 155             line = line.strip()
 156             if line.startswith('#'):  # skip comments
 157                 continue
 158             for char in line:
 159                 if string_started:
 160                     if char == '"':  # string ended
 161                         self._add_non_empty_token(cur_token)
 162                         cur_token = ''  # start of a new string
 163                         string_started = False
 164                     else:
 165                         cur_token += char
 166                 elif char == '"':
 167                     self._add_non_empty_token(cur_token)
 168                     cur_token = ''  # start of a new string
 169                     string_started = True
 170                 elif (char == " " and not string_started) or char == '\n':
 171                     self._add_non_empty_token(cur_token)
 172                     cur_token = ''
 173                 elif char in [':', '{', '}', '[', ']', ',']:
 174                     self._add_non_empty_token(cur_token)
 175                     self._tokens.append(char)
 176                     cur_token = ''
 177                 else:
 178                     cur_token += char
 179             self._add_non_empty_token(cur_token)
 180         self._add_non_empty_token(cur_token)
 181
 182     def parse_from_string(self, file_content: str):
 183         """
 184         Parses the proto text file passed as a string.
 185         :param file_content: content of the file.
 186         :return: dictionary with file content or None if the file cannot be parsed.
 187         """
 188         self._split_to_tokens(file_content)
 189         if not self._convert_tokens_to_dict():
 190             log.error('Failed to generate dictionary representation of file.')
 191             return None
 192         return self._result
 193
 194     def parse_file(self, file_name: str):
 195         """
 196         Parses the specified file and returns its representation as dictionary.
 197         :param file_name: file name to parse.
 198         :return: dictionary with file content or None if the file cannot be parsed.
 199         """
 200         if not os.path.exists(file_name):
 201             log.error('File {} does not exist'.format(file_name))
 202             return None
 203         try:
 204             with open(file_name) as file:
 205                 file_content = file.readlines()
 206         except Exception as ex:
 207             log.error('Failed to read file {}: {}'.format(file_name, str(ex)))
 208             return None
 209         return self.parse_from_string(''.join(file_content))