--- /dev/null
+#ifndef GRAMMAR_PORT_BUILD\r
+#error Do not build this file directly, build your grammar_XXX.c instead, which includes this file\r
+#endif\r
+\r
+/*\r
+ Last Modified: 2004-II-8\r
+*/\r
+\r
+/*\r
+ INTRODUCTION\r
+ ------------\r
+\r
+ The task is to check the syntax of an input string. Input string is a stream of ASCII\r
+ characters terminated with a null-character ('\0'). Checking it using C language is\r
+ difficult and hard to implement without bugs. It is hard to maintain and make changes when\r
+ the syntax changes.\r
+\r
+ This is because of a high redundancy of the C code. Large blocks of code are duplicated with\r
+ only small changes. Even use of macros does not solve the problem because macros cannot\r
+ erase the complexity of the problem.\r
+\r
+ The resolution is to create a new language that will be highly oriented to our task. Once\r
+ we describe a particular syntax, we are done. We can then focus on the code that implements\r
+ the language. The size and complexity of it is relatively small than the code that directly\r
+ checks the syntax.\r
+\r
+ First, we must implement our new language. Here, the language is implemented in C, but it\r
+ could also be implemented in any other language. The code is listed below. We must take\r
+ a good care that it is bug free. This is simple because the code is simple and clean.\r
+\r
+ Next, we must describe the syntax of our new language in itself. Once created and checked\r
+ manually that it is correct, we can use it to check another scripts.\r
+\r
+ Note that our new language loading code does not have to check the syntax. It is because we\r
+ assume that the script describing itself is correct, and other scripts can be syntactically\r
+ checked by the former script. The loading code must only do semantic checking which leads us to\r
+ simple resolving references.\r
+\r
+ THE LANGUAGE\r
+ ------------\r
+\r
+ Here I will describe the syntax of the new language (further called "Synek"). It is mainly a\r
+ sequence of declarations terminated by a semicolon. The declaration consists of a symbol,\r
+ which is an identifier, and its definition. A definition is in turn a sequence of specifiers\r
+ connected with ".and" or ".or" operator. These operators cannot be mixed together in a one\r
+ definition. Specifier can be a symbol, string, character, character range or a special\r
+ keyword ".true" or ".false".\r
+\r
+ On the very beginning of the script there is a declaration of a root symbol and is in the form:\r
+ .syntax <root_symbol>;\r
+ The <root_symbol> must be on of the symbols in declaration sequence. The syntax is correct if\r
+ the root symbol evaluates to true. A symbol evaluates to true if the definition associated with\r
+ the symbol evaluates to true. Definition evaluation depends on the operator used to connect\r
+ specifiers in the definition. If ".and" operator is used, definition evaluates to true if and\r
+ only if all the specifiers evaluate to true. If ".or" operator is used, definition evalutes to\r
+ true if any of the specifiers evaluates to true. If definition contains only one specifier,\r
+ it is evaluated as if it was connected with ".true" keyword by ".and" operator.\r
+\r
+ If specifier is a ".true" keyword, it always evaluates to true.\r
+\r
+ If specifier is a ".false" keyword, it always evaluates to false. Specifier evaluates to false\r
+ when it does not evaluate to true.\r
+\r
+ Character range specifier is in the form:\r
+ '<first_character>' - '<second_character>'\r
+ If specifier is a character range, it evaluates to true if character in the stream is greater\r
+ or equal to <first_character> and less or equal to <second_character>. In that situation \r
+ the stream pointer is advanced to point to next character in the stream. All C-style escape\r
+ sequences are supported although trigraph sequences are not. The comparisions are performed\r
+ on 8-bit unsigned integers.\r
+\r
+ Character specifier is in the form:\r
+ '<single_character>'\r
+ It evaluates to true if the following character range specifier evaluates to true:\r
+ '<single_character>' - '<single_character>'\r
+\r
+ String specifier is in the form:\r
+ "<string>"\r
+ Let N be the number of characters in <string>. Let <string>[i] designate i-th character in\r
+ <string>. Then the string specifier evaluates to true if and only if for i in the range [0, N)\r
+ the following character specifier evaluates to true:\r
+ '<string>[i]'\r
+ If <string>[i] is a quotation mark, '<string>[i]' is replaced with '\<string>[i]'.\r
+\r
+ Symbol specifier can be optionally preceded by a ".loop" keyword in the form:\r
+ .loop <symbol> (1)\r
+ where <symbol> is defined as follows:\r
+ <symbol> <definition>; (2)\r
+ Construction (1) is replaced by the following code:\r
+ <symbol$1>\r
+ and declaration (2) is replaced by the following:\r
+ <symbol$1> <symbol$2> .or .true;\r
+ <symbol$2> <symbol> .and <symbol$1>;\r
+ <symbol> <definition>;\r
+\r
+ ESCAPE SEQUENCES\r
+ ----------------\r
+\r
+ Synek supports all escape sequences in character specifiers. The mapping table is listed below.\r
+ All occurences of the characters in the first column are replaced with the corresponding\r
+ character in the second column.\r
+\r
+ Escape sequence Represents\r
+ ------------------------------------------------------------------------------------------------\r
+ \a Bell (alert)\r
+ \b Backspace\r
+ \f Formfeed\r
+ \n New line\r
+ \r Carriage return\r
+ \t Horizontal tab\r
+ \v Vertical tab\r
+ \' Single quotation mark\r
+ \" Double quotation mark\r
+ \\ Backslash\r
+ \? Literal question mark\r
+ \ooo ASCII character in octal notation\r
+ \xhhh ASCII character in hexadecimal notation\r
+ ------------------------------------------------------------------------------------------------\r
+\r
+ RAISING ERRORS\r
+ --------------\r
+\r
+ Any specifier can be followed by a special construction that is executed when the specifier\r
+ evaluates to false. The construction is in the form:\r
+ .error <ERROR_TEXT>\r
+ <ERROR_TEXT> is an identifier declared earlier by error text declaration. The declaration is\r
+ in the form:\r
+ .errtext <ERROR_TEXT> "<error_desc>"\r
+ When specifier evaluates to false and this construction is present, parsing is stopped\r
+ immediately and <error_desc> is returned as a result of parsing. The error position is also\r
+ returned and it is meant as an offset from the beggining of the stream to the character that\r
+ was valid so far. Example:\r
+\r
+ (**** syntax script ****)\r
+\r
+ .syntax program;\r
+ .errtext MISSING_SEMICOLON "missing ';'"\r
+ program declaration .and .loop space .and ';' .error MISSING_SEMICOLON .and\r
+ .loop space .and '\0';\r
+ declaration "declare" .and .loop space .and identifier;\r
+ space ' ';\r
+\r
+ (**** sample code ****)\r
+\r
+ declare foo ,\r
+\r
+ In the example above checking the sample code will result in error message "missing ';'" and\r
+ error position 12. The sample code is not correct. Note the presence of '\0' specifier to\r
+ assure that there is no code after semicolon - only spaces.\r
+ <error_desc> can optionally contain identifier surrounded by dollar signs $. In such a case,\r
+ the identifier and dollar signs are replaced by a string retrieved by invoking symbol with\r
+ the identifier name. The starting position is the error position. The lenght of the resulting\r
+ string is the position after invoking the symbol.\r
+\r
+ PRODUCTION\r
+ ----------\r
+\r
+ Synek not only checks the syntax but it can also produce (emit) bytes associated with specifiers\r
+ that evaluate to true. That is, every specifier and optional error construction can be followed\r
+ by a number of emit constructions that are in the form:\r
+ .emit <parameter>\r
+ <paramater> can be a HEX number, identifier, a star * or a dollar $. HEX number is preceded by\r
+ 0x or 0X. If <parameter> is an identifier, it must be earlier declared by emit code declaration\r
+ in the form:\r
+ .emtcode <identifier> <hex_number>\r
+\r
+ When given specifier evaluates to true, all emits associated with the specifier are output\r
+ in order they were declared. A star means that last-read character should be output instead\r
+ of constant value. Example:\r
+\r
+ (**** syntax script ****)\r
+\r
+ .syntax foobar;\r
+ .emtcode WORD_FOO 0x01\r
+ .emtcode WORD_BAR 0x02\r
+ foobar FOO .emit WORD_FOO .or BAR .emit WORD_BAR .or .true .emit 0x00;\r
+ FOO "foo" .and SPACE;\r
+ BAR "bar" .and SPACE;\r
+ SPACE ' ' .or '\0';\r
+\r
+ (**** sample text 1 ****)\r
+\r
+ foo\r
+\r
+ (**** sample text 2 ****)\r
+\r
+ foobar\r
+\r
+ For both samples the result will be one-element array. For first sample text it will be\r
+ value 1, for second - 0. Note that every text will be accepted because of presence of\r
+ .true as an alternative.\r
+\r
+ Another example:\r
+\r
+ (**** syntax script ****)\r
+\r
+ .syntax declaration;\r
+ .emtcode VARIABLE 0x01\r
+ declaration "declare" .and .loop space .and\r
+ identifier .emit VARIABLE .and (1)\r
+ .true .emit 0x00 .and (2)\r
+ .loop space .and ';';\r
+ space ' ' .or '\t';\r
+ identifier .loop id_char .emit *; (3)\r
+ id_char 'a'-'z' .or 'A'-'Z' .or '_';\r
+\r
+ (**** sample code ****)\r
+\r
+ declare fubar;\r
+\r
+ In specifier (1) symbol <identifier> is followed by .emit VARIABLE. If it evaluates to\r
+ true, VARIABLE constant and then production of the symbol is output. Specifier (2) is used\r
+ to terminate the string with null to signal when the string ends. Specifier (3) outputs\r
+ all characters that make declared identifier. The result of sample code will be the\r
+ following array:\r
+ { 1, 'f', 'u', 'b', 'a', 'r', 0 }\r
+\r
+ If .emit is followed by dollar $, it means that current position should be output. Current\r
+ position is a 32-bit unsigned integer distance from the very beginning of the parsed string to\r
+ first character consumed by the specifier associated with the .emit instruction. Current\r
+ position is stored in the output buffer in Little-Endian convention (the lowest byte comes\r
+ first).\r
+*/\r
+\r
+static void mem_free (void **);\r
+\r
+/*\r
+ internal error messages\r
+*/\r
+static const byte *OUT_OF_MEMORY = (byte *) "internal error 1001: out of physical memory";\r
+static const byte *UNRESOLVED_REFERENCE = (byte *) "internal error 1002: unresolved reference '$'";\r
+static const byte *INVALID_GRAMMAR_ID = (byte *) "internal error 1003: invalid grammar object";\r
+static const byte *INVALID_REGISTER_NAME = (byte *) "internal error 1004: invalid register name: '$'";\r
+\r
+static const byte *error_message = NULL;\r
+static byte *error_param = NULL; /* this is inserted into error_message in place of $ */\r
+static int error_position = -1;\r
+\r
+static byte *unknown = (byte *) "???";\r
+\r
+static void clear_last_error ()\r
+{\r
+ /* reset error message */\r
+ error_message = NULL;\r
+\r
+ /* free error parameter - if error_param is a "???" don't free it - it's static */\r
+ if (error_param != unknown)\r
+ mem_free ((void **) &error_param);\r
+ else\r
+ error_param = NULL;\r
+\r
+ /* reset error position */\r
+ error_position = -1;\r
+}\r
+\r
+static void set_last_error (const byte *msg, byte *param, int pos)\r
+{\r
+ /* error message can only be set only once */\r
+ if (error_message != NULL)\r
+ {\r
+ mem_free (¶m);\r
+ return;\r
+ }\r
+\r
+ error_message = msg;\r
+\r
+ if (param != NULL)\r
+ error_param = param;\r
+ else\r
+ error_param = unknown;\r
+\r
+ error_position = pos;\r
+}\r
+\r
+/*\r
+ memory management routines\r
+*/\r
+static void *mem_alloc (size_t size)\r
+{\r
+ void *ptr = grammar_alloc_malloc (size);\r
+ if (ptr == NULL)\r
+ set_last_error (OUT_OF_MEMORY, NULL, -1);\r
+ return ptr;\r
+}\r
+\r
+static void *mem_copy (void *dst, const void *src, size_t size)\r
+{\r
+ return grammar_memory_copy (dst, src, size);\r
+}\r
+\r
+static void mem_free (void **ptr)\r
+{\r
+ grammar_alloc_free (*ptr);\r
+ *ptr = NULL;\r
+}\r
+\r
+static void *mem_realloc (void *ptr, size_t old_size, size_t new_size)\r
+{\r
+ void *ptr2 = grammar_alloc_realloc (ptr, old_size, new_size);\r
+ if (ptr2 == NULL)\r
+ set_last_error (OUT_OF_MEMORY, NULL, -1);\r
+ return ptr2;\r
+}\r
+\r
+static byte *str_copy_n (byte *dst, const byte *src, size_t max_len)\r
+{\r
+ return grammar_string_copy_n (dst, src, max_len);\r
+}\r
+\r
+static byte *str_duplicate (const byte *str)\r
+{\r
+ byte *new_str = grammar_string_duplicate (str);\r
+ if (new_str == NULL)\r
+ set_last_error (OUT_OF_MEMORY, NULL, -1);\r
+ return new_str;\r
+}\r
+\r
+static int str_equal (const byte *str1, const byte *str2)\r
+{\r
+ return grammar_string_compare (str1, str2) == 0;\r
+}\r
+\r
+static int str_equal_n (const byte *str1, const byte *str2, unsigned int n)\r
+{\r
+ return grammar_string_compare_n (str1, str2, n) == 0;\r
+}\r
+\r
+static unsigned int str_length (const byte *str)\r
+{\r
+ return grammar_string_length (str);\r
+}\r
+\r
+/*\r
+ string to byte map typedef\r
+*/\r
+typedef struct map_byte_\r
+{\r
+ byte *key;\r
+ byte data;\r
+ struct map_byte_ *next;\r
+} map_byte;\r
+\r
+static void map_byte_create (map_byte **ma)\r
+{\r
+ *ma = mem_alloc (sizeof (map_byte));\r
+ if (*ma)\r
+ {\r
+ (**ma).key = NULL;\r
+ (**ma).data = '\0';\r
+ (**ma).next = NULL;\r
+ }\r
+}\r
+\r
+/* XXX unfold the recursion */\r
+static void map_byte_destroy (map_byte **ma)\r
+{\r
+ if (*ma)\r
+ {\r
+ map_byte_destroy (&(**ma).next);\r
+ mem_free ((void **) &(**ma).key);\r
+ mem_free ((void **) ma);\r
+ }\r
+}\r
+\r
+static void map_byte_append (map_byte **ma, map_byte **nm)\r
+{\r
+ while (*ma)\r
+ ma = &(**ma).next;\r
+ *ma = *nm;\r
+}\r
+\r
+/*\r
+ searches the map for the specified key,\r
+ returns pointer to the element with the specified key if it exists\r
+ returns NULL otherwise\r
+*/\r
+map_byte *map_byte_locate (map_byte **ma, const byte *key)\r
+{\r
+ while (*ma)\r
+ {\r
+ if (str_equal ((**ma).key, key))\r
+ return *ma;\r
+\r
+ ma = &(**ma).next;\r
+ }\r
+\r
+ set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);\r
+ return NULL;\r
+}\r
+\r
+/*\r
+ searches the map for specified key,\r
+ if the key is matched, *data is filled with data associated with the key,\r
+ returns 0 if the key is matched,\r
+ returns 1 otherwise\r
+*/\r
+static int map_byte_find (map_byte **ma, const byte *key, byte *data)\r
+{\r
+ map_byte *found = map_byte_locate (ma, key);\r
+ if (found != NULL)\r
+ {\r
+ *data = found->data;\r
+\r
+ return 0;\r
+ }\r
+\r
+ return 1;\r
+}\r
+\r
+/*\r
+ regbyte context typedef\r
+\r
+ Each regbyte consists of its name and a default value. These are static and created at\r
+ grammar script compile-time, for example the following line:\r
+ .regbyte vertex_blend 0x00\r
+ adds a new regbyte named "vertex_blend" to the static list and initializes it to 0.\r
+ When the script is executed, this regbyte can be accessed by name for read and write. When a\r
+ particular regbyte is written, a new regbyte_ctx entry is added to the top of the regbyte_ctx\r
+ stack. The new entry contains information abot which regbyte it references and its new value.\r
+ When a given regbyte is accessed for read, the stack is searched top-down to find an\r
+ entry that references the regbyte. The first matching entry is used to return the current\r
+ value it holds. If no entry is found, the default value is returned.\r
+*/\r
+typedef struct regbyte_ctx_\r
+{\r
+ map_byte *m_regbyte;\r
+ byte m_current_value;\r
+ struct regbyte_ctx_ *m_prev;\r
+} regbyte_ctx;\r
+\r
+static void regbyte_ctx_create (regbyte_ctx **re)\r
+{\r
+ *re = mem_alloc (sizeof (regbyte_ctx));\r
+ if (*re)\r
+ {\r
+ (**re).m_regbyte = NULL;\r
+ (**re).m_prev = NULL;\r
+ }\r
+}\r
+\r
+static void regbyte_ctx_destroy (regbyte_ctx **re)\r
+{\r
+ if (*re)\r
+ {\r
+ mem_free ((void **) re);\r
+ }\r
+}\r
+\r
+static byte regbyte_ctx_extract (regbyte_ctx **re, map_byte *reg)\r
+{\r
+ /* first lookup in the register stack */\r
+ while (*re != NULL)\r
+ {\r
+ if ((**re).m_regbyte == reg)\r
+ return (**re).m_current_value;\r
+\r
+ re = &(**re).m_prev;\r
+ }\r
+\r
+ /* if not found - return the default value */\r
+ return reg->data;\r
+}\r
+\r
+/*\r
+ emit type typedef\r
+*/\r
+typedef enum emit_type_\r
+{\r
+ et_byte, /* explicit number */\r
+ et_stream, /* eaten character */\r
+ et_position /* current position */\r
+} emit_type;\r
+\r
+/*\r
+ emit destination typedef\r
+*/\r
+typedef enum emit_dest_\r
+{\r
+ ed_output, /* write to the output buffer */\r
+ ed_regbyte /* write a particular regbyte */\r
+} emit_dest;\r
+\r
+/*\r
+ emit typedef\r
+*/\r
+typedef struct emit_\r
+{\r
+ emit_dest m_emit_dest;\r
+ emit_type m_emit_type; /* ed_output */\r
+ byte m_byte; /* et_byte */\r
+ map_byte *m_regbyte; /* ed_regbyte */\r
+ byte *m_regname; /* ed_regbyte - temporary */\r
+ struct emit_ *m_next;\r
+} emit;\r
+\r
+static void emit_create (emit **em)\r
+{\r
+ *em = mem_alloc (sizeof (emit));\r
+ if (*em)\r
+ {\r
+ (**em).m_emit_dest = ed_output;\r
+ (**em).m_emit_type = et_byte;\r
+ (**em).m_byte = '\0';\r
+ (**em).m_regbyte = NULL;\r
+ (**em).m_regname = NULL;\r
+ (**em).m_next = NULL;\r
+ }\r
+}\r
+\r
+static void emit_destroy (emit **em)\r
+{\r
+ if (*em)\r
+ {\r
+ emit_destroy (&(**em).m_next);\r
+ mem_free ((void **) &(**em).m_regname);\r
+ mem_free ((void **) em);\r
+ }\r
+}\r
+\r
+/*\r
+ error typedef\r
+*/\r
+typedef struct error_\r
+{\r
+ byte *m_text;\r
+ byte *m_token_name;\r
+ struct rule_ *m_token;\r
+} error;\r
+\r
+static void error_create (error **er)\r
+{\r
+ *er = mem_alloc (sizeof (error));\r
+ if (*er)\r
+ {\r
+ (**er).m_text = NULL;\r
+ (**er).m_token_name = NULL;\r
+ (**er).m_token = NULL;\r
+ }\r
+}\r
+\r
+static void error_destroy (error **er)\r
+{\r
+ if (*er)\r
+ {\r
+ mem_free ((void **) &(**er).m_text);\r
+ mem_free ((void **) &(**er).m_token_name);\r
+ mem_free ((void **) er);\r
+ }\r
+}\r
+\r
+struct dict_;\r
+static byte *error_get_token (error *, struct dict_ *, const byte *, unsigned int);\r
+\r
+/*\r
+ condition operand type typedef\r
+*/\r
+typedef enum cond_oper_type_\r
+{\r
+ cot_byte, /* constant 8-bit unsigned integer */\r
+ cot_regbyte /* pointer to byte register containing the current value */\r
+} cond_oper_type;\r
+\r
+/*\r
+ condition operand typedef\r
+*/\r
+typedef struct cond_oper_\r
+{\r
+ cond_oper_type m_type;\r
+ byte m_byte; /* cot_byte */\r
+ map_byte *m_regbyte; /* cot_regbyte */\r
+ byte *m_regname; /* cot_regbyte - temporary */\r
+} cond_oper;\r
+\r
+/*\r
+ condition type typedef\r
+*/\r
+typedef enum cond_type_\r
+{\r
+ ct_equal,\r
+ ct_not_equal\r
+} cond_type;\r
+\r
+/*\r
+ condition typedef\r
+*/\r
+typedef struct cond_\r
+{\r
+ cond_type m_type;\r
+ cond_oper m_operands[2];\r
+} cond;\r
+\r
+static void cond_create (cond **co)\r
+{\r
+ *co = mem_alloc (sizeof (cond));\r
+ if (*co)\r
+ {\r
+ (**co).m_operands[0].m_regname = NULL;\r
+ (**co).m_operands[1].m_regname = NULL;\r
+ }\r
+}\r
+\r
+static void cond_destroy (cond **co)\r
+{\r
+ if (*co)\r
+ {\r
+ mem_free ((void **) &(**co).m_operands[0].m_regname);\r
+ mem_free ((void **) &(**co).m_operands[1].m_regname);\r
+ mem_free ((void **) co);\r
+ }\r
+}\r
+\r
+/*\r
+ specifier type typedef\r
+*/\r
+typedef enum spec_type_\r
+{\r
+ st_false,\r
+ st_true,\r
+ st_byte,\r
+ st_byte_range,\r
+ st_string,\r
+ st_identifier,\r
+ st_identifier_loop,\r
+ st_debug\r
+} spec_type;\r
+\r
+/*\r
+ specifier typedef\r
+*/\r
+typedef struct spec_\r
+{\r
+ spec_type m_spec_type;\r
+ byte m_byte[2]; /* st_byte, st_byte_range */\r
+ byte *m_string; /* st_string */\r
+ struct rule_ *m_rule; /* st_identifier, st_identifier_loop */\r
+ emit *m_emits;\r
+ error *m_errtext;\r
+ cond *m_cond;\r
+ struct spec_ *m_next;\r
+} spec;\r
+\r
+static void spec_create (spec **sp)\r
+{\r
+ *sp = mem_alloc (sizeof (spec));\r
+ if (*sp)\r
+ {\r
+ (**sp).m_spec_type = st_false;\r
+ (**sp).m_byte[0] = '\0';\r
+ (**sp).m_byte[1] = '\0';\r
+ (**sp).m_string = NULL;\r
+ (**sp).m_rule = NULL;\r
+ (**sp).m_emits = NULL;\r
+ (**sp).m_errtext = NULL;\r
+ (**sp).m_cond = NULL;\r
+ (**sp).m_next = NULL;\r
+ }\r
+}\r
+\r
+static void spec_destroy (spec **sp)\r
+{\r
+ if (*sp)\r
+ {\r
+ spec_destroy (&(**sp).m_next);\r
+ emit_destroy (&(**sp).m_emits);\r
+ error_destroy (&(**sp).m_errtext);\r
+ mem_free ((void **) &(**sp).m_string);\r
+ cond_destroy (&(**sp).m_cond);\r
+ mem_free ((void **) sp);\r
+ }\r
+}\r
+\r
+static void spec_append (spec **sp, spec **ns)\r
+{\r
+ while (*sp)\r
+ sp = &(**sp).m_next;\r
+ *sp = *ns;\r
+}\r
+\r
+/*\r
+ operator typedef\r
+*/\r
+typedef enum oper_\r
+{\r
+ op_none,\r
+ op_and,\r
+ op_or\r
+} oper;\r
+\r
+/*\r
+ rule typedef\r
+*/\r
+typedef struct rule_\r
+{\r
+ oper m_oper;\r
+ spec *m_specs;\r
+ struct rule_ *m_next;\r
+/* int m_referenced; */ /* for debugging purposes */\r
+} rule;\r
+\r
+static void rule_create (rule **ru)\r
+{\r
+ *ru = mem_alloc (sizeof (rule));\r
+ if (*ru)\r
+ {\r
+ (**ru).m_oper = op_none;\r
+ (**ru).m_specs = NULL;\r
+ (**ru).m_next = NULL;\r
+/* (**ru).m_referenced = 0; */\r
+ }\r
+}\r
+\r
+static void rule_destroy (rule **ru)\r
+{\r
+ if (*ru)\r
+ {\r
+ rule_destroy (&(**ru).m_next);\r
+ spec_destroy (&(**ru).m_specs);\r
+ mem_free ((void **) ru);\r
+ }\r
+}\r
+\r
+static void rule_append (rule **ru, rule **nr)\r
+{\r
+ while (*ru)\r
+ ru = &(**ru).m_next;\r
+ *ru = *nr;\r
+}\r
+\r
+/*\r
+ returns unique grammar id\r
+*/\r
+static grammar next_valid_grammar_id ()\r
+{\r
+ static grammar id = 0;\r
+\r
+ return ++id;\r
+}\r
+\r
+/*\r
+ dictionary typedef\r
+*/\r
+typedef struct dict_\r
+{\r
+ rule *m_rulez;\r
+ rule *m_syntax;\r
+ rule *m_string;\r
+ map_byte *m_regbytes;\r
+ grammar m_id;\r
+ struct dict_ *m_next;\r
+} dict;\r
+\r
+static void dict_create (dict **di)\r
+{\r
+ *di = mem_alloc (sizeof (dict));\r
+ if (*di)\r
+ {\r
+ (**di).m_rulez = NULL;\r
+ (**di).m_syntax = NULL;\r
+ (**di).m_string = NULL;\r
+ (**di).m_regbytes = NULL;\r
+ (**di).m_id = next_valid_grammar_id ();\r
+ (**di).m_next = NULL;\r
+ }\r
+}\r
+\r
+static void dict_destroy (dict **di)\r
+{\r
+ if (*di)\r
+ {\r
+ rule_destroy (&(**di).m_rulez);\r
+ map_byte_destroy (&(**di).m_regbytes);\r
+ mem_free ((void **) di);\r
+ }\r
+}\r
+\r
+static void dict_append (dict **di, dict **nd)\r
+{\r
+ while (*di)\r
+ di = &(**di).m_next;\r
+ *di = *nd;\r
+}\r
+\r
+static void dict_find (dict **di, grammar key, dict **data)\r
+{\r
+ while (*di)\r
+ {\r
+ if ((**di).m_id == key)\r
+ {\r
+ *data = *di;\r
+ return;\r
+ }\r
+\r
+ di = &(**di).m_next;\r
+ }\r
+\r
+ *data = NULL;\r
+}\r
+\r
+static dict *g_dicts = NULL;\r
+\r
+/*\r
+ byte array typedef\r
+\r
+ XXX this class is going to be replaced by a faster one, soon\r
+*/\r
+typedef struct barray_\r
+{\r
+ byte *data;\r
+ unsigned int len;\r
+} barray;\r
+\r
+static void barray_create (barray **ba)\r
+{\r
+ *ba = mem_alloc (sizeof (barray));\r
+ if (*ba)\r
+ {\r
+ (**ba).data = NULL;\r
+ (**ba).len = 0;\r
+ }\r
+}\r
+\r
+static void barray_destroy (barray **ba)\r
+{\r
+ if (*ba)\r
+ {\r
+ mem_free ((void **) &(**ba).data);\r
+ mem_free ((void **) ba);\r
+ }\r
+}\r
+\r
+/*\r
+ reallocates byte array to requested size,\r
+ returns 0 on success,\r
+ returns 1 otherwise\r
+*/\r
+static int barray_resize (barray **ba, unsigned int nlen)\r
+{\r
+ byte *new_pointer;\r
+\r
+ if (nlen == 0)\r
+ {\r
+ mem_free ((void **) &(**ba).data);\r
+ (**ba).data = NULL;\r
+ (**ba).len = 0;\r
+\r
+ return 0;\r
+ }\r
+ else\r
+ {\r
+ new_pointer = mem_realloc ((**ba).data, (**ba).len * sizeof (byte), nlen * sizeof (byte));\r
+ if (new_pointer)\r
+ {\r
+ (**ba).data = new_pointer;\r
+ (**ba).len = nlen;\r
+\r
+ return 0;\r
+ }\r
+ }\r
+\r
+ return 1;\r
+}\r
+\r
+/*\r
+ adds byte array pointed by *nb to the end of array pointed by *ba,\r
+ returns 0 on success,\r
+ returns 1 otherwise\r
+*/\r
+static int barray_append (barray **ba, barray **nb)\r
+{\r
+ const unsigned int len = (**ba).len;\r
+\r
+ if (barray_resize (ba, (**ba).len + (**nb).len))\r
+ return 1;\r
+\r
+ mem_copy ((**ba).data + len, (**nb).data, (**nb).len);\r
+\r
+ return 0;\r
+}\r
+\r
+/*\r
+ adds emit chain pointed by em to the end of array pointed by *ba,\r
+ returns 0 on success,\r
+ returns 1 otherwise\r
+*/\r
+static int barray_push (barray **ba, emit *em, byte c, unsigned int pos, regbyte_ctx **rbc)\r
+{\r
+ emit *temp = em;\r
+ unsigned int count = 0;\r
+\r
+ while (temp)\r
+ {\r
+ if (temp->m_emit_dest == ed_output)\r
+ if (temp->m_emit_type == et_position)\r
+ count += 4; /* position is a 32-bit unsigned integer */\r
+ else\r
+ count++;\r
+\r
+ temp = temp->m_next;\r
+ }\r
+\r
+ if (barray_resize (ba, (**ba).len + count))\r
+ return 1;\r
+\r
+ while (em)\r
+ {\r
+ if (em->m_emit_dest == ed_output)\r
+ {\r
+ if (em->m_emit_type == et_byte)\r
+ (**ba).data[(**ba).len - count--] = em->m_byte;\r
+ else if (em->m_emit_type == et_stream)\r
+ (**ba).data[(**ba).len - count--] = c;\r
+ else // em->type == et_position\r
+ (**ba).data[(**ba).len - count--] = (byte) pos,\r
+ (**ba).data[(**ba).len - count--] = (byte) (pos >> 8),\r
+ (**ba).data[(**ba).len - count--] = (byte) (pos >> 16),\r
+ (**ba).data[(**ba).len - count--] = (byte) (pos >> 24);\r
+ }\r
+ else\r
+ {\r
+ regbyte_ctx *new_rbc;\r
+ regbyte_ctx_create (&new_rbc);\r
+ if (new_rbc == NULL)\r
+ return 1;\r
+\r
+ new_rbc->m_prev = *rbc;\r
+ new_rbc->m_regbyte = em->m_regbyte;\r
+ *rbc = new_rbc;\r
+\r
+ if (em->m_emit_type == et_byte)\r
+ new_rbc->m_current_value = em->m_byte;\r
+ else if (em->m_emit_type == et_stream)\r
+ new_rbc->m_current_value = c;\r
+ }\r
+\r
+ em = em->m_next;\r
+ }\r
+\r
+ return 0;\r
+}\r
+\r
+/*\r
+ string to string map typedef\r
+*/\r
+typedef struct map_str_\r
+{\r
+ byte *key;\r
+ byte *data;\r
+ struct map_str_ *next;\r
+} map_str;\r
+\r
+static void map_str_create (map_str **ma)\r
+{\r
+ *ma = mem_alloc (sizeof (map_str));\r
+ if (*ma)\r
+ {\r
+ (**ma).key = NULL;\r
+ (**ma).data = NULL;\r
+ (**ma).next = NULL;\r
+ }\r
+}\r
+\r
+static void map_str_destroy (map_str **ma)\r
+{\r
+ if (*ma)\r
+ {\r
+ map_str_destroy (&(**ma).next);\r
+ mem_free ((void **) &(**ma).key);\r
+ mem_free ((void **) &(**ma).data);\r
+ mem_free ((void **) ma);\r
+ }\r
+}\r
+\r
+static void map_str_append (map_str **ma, map_str **nm)\r
+{\r
+ while (*ma)\r
+ ma = &(**ma).next;\r
+ *ma = *nm;\r
+}\r
+\r
+/*\r
+ searches the map for specified key,\r
+ if the key is matched, *data is filled with data associated with the key,\r
+ returns 0 if the key is matched,\r
+ returns 1 otherwise\r
+*/\r
+static int map_str_find (map_str **ma, const byte *key, byte **data)\r
+{\r
+ while (*ma)\r
+ {\r
+ if (str_equal ((**ma).key, key))\r
+ {\r
+ *data = str_duplicate ((**ma).data);\r
+ if (*data == NULL)\r
+ return 1;\r
+\r
+ return 0;\r
+ }\r
+\r
+ ma = &(**ma).next;\r
+ }\r
+\r
+ set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);\r
+ return 1;\r
+}\r
+\r
+/*\r
+ string to rule map typedef\r
+*/\r
+typedef struct map_rule_\r
+{\r
+ byte *key;\r
+ rule *data;\r
+ struct map_rule_ *next;\r
+} map_rule;\r
+\r
+static void map_rule_create (map_rule **ma)\r
+{\r
+ *ma = mem_alloc (sizeof (map_rule));\r
+ if (*ma)\r
+ {\r
+ (**ma).key = NULL;\r
+ (**ma).data = NULL;\r
+ (**ma).next = NULL;\r
+ }\r
+}\r
+\r
+static void map_rule_destroy (map_rule **ma)\r
+{\r
+ if (*ma)\r
+ {\r
+ map_rule_destroy (&(**ma).next);\r
+ mem_free ((void **) &(**ma).key);\r
+ mem_free ((void **) ma);\r
+ }\r
+}\r
+\r
+static void map_rule_append (map_rule **ma, map_rule **nm)\r
+{\r
+ while (*ma)\r
+ ma = &(**ma).next;\r
+ *ma = *nm;\r
+}\r
+\r
+/*\r
+ searches the map for specified key,\r
+ if the key is matched, *data is filled with data associated with the key,\r
+ returns 0 if the is matched,\r
+ returns 1 otherwise\r
+*/\r
+static int map_rule_find (map_rule **ma, const byte *key, rule **data)\r
+{\r
+ while (*ma)\r
+ {\r
+ if (str_equal ((**ma).key, key))\r
+ {\r
+ *data = (**ma).data;\r
+\r
+ return 0;\r
+ }\r
+\r
+ ma = &(**ma).next;\r
+ }\r
+\r
+ set_last_error (UNRESOLVED_REFERENCE, str_duplicate (key), -1);\r
+ return 1;\r
+}\r
+\r
+/*\r
+ returns 1 if given character is a white space,\r
+ returns 0 otherwise\r
+*/\r
+static int is_space (byte c)\r
+{\r
+ return c == ' ' || c == '\t' || c == '\n' || c == '\r';\r
+}\r
+\r
+/*\r
+ advances text pointer by 1 if character pointed by *text is a space,\r
+ returns 1 if a space has been eaten,\r
+ returns 0 otherwise\r
+*/\r
+static int eat_space (const byte **text)\r
+{\r
+ if (is_space (**text))\r
+ {\r
+ (*text)++;\r
+\r
+ return 1;\r
+ }\r
+\r
+ return 0;\r
+}\r
+\r
+/*\r
+ returns 1 if text points to C-style comment start string "/*",\r
+ returns 0 otherwise\r
+*/\r
+static int is_comment_start (const byte *text)\r
+{\r
+ return text[0] == '/' && text[1] == '*';\r
+}\r
+\r
+/*\r
+ advances text pointer to first character after C-style comment block - if any,\r
+ returns 1 if C-style comment block has been encountered and eaten,\r
+ returns 0 otherwise\r
+*/\r
+static int eat_comment (const byte **text)\r
+{\r
+ if (is_comment_start (*text))\r
+ {\r
+ /* *text points to comment block - skip two characters to enter comment body */\r
+ *text += 2;\r
+ /* skip any character except consecutive '*' and '/' */\r
+ while (!((*text)[0] == '*' && (*text)[1] == '/'))\r
+ (*text)++;\r
+ /* skip those two terminating characters */\r
+ *text += 2;\r
+\r
+ return 1;\r
+ }\r
+\r
+ return 0;\r
+}\r
+\r
+/*\r
+ advances text pointer to first character that is neither space nor C-style comment block\r
+*/\r
+static void eat_spaces (const byte **text)\r
+{\r
+ while (eat_space (text) || eat_comment (text))\r
+ ;\r
+}\r
+\r
+/*\r
+ resizes string pointed by *ptr to successfully add character c to the end of the string,\r
+ returns 0 on success,\r
+ returns 1 otherwise\r
+*/\r
+static int string_grow (byte **ptr, unsigned int *len, byte c)\r
+{\r
+ /* reallocate the string in 16-byte increments */\r
+ if ((*len & 0x0F) == 0x0F || *ptr == NULL)\r
+ {\r
+ byte *tmp = mem_realloc (*ptr, ((*len + 1) & ~0x0F) * sizeof (byte),\r
+ ((*len + 1 + 0x10) & ~0x0F) * sizeof (byte));\r
+ if (tmp == NULL)\r
+ return 1;\r
+\r
+ *ptr = tmp;\r
+ }\r
+\r
+ if (c)\r
+ {\r
+ /* append given character */\r
+ (*ptr)[*len] = c;\r
+ (*len)++;\r
+ }\r
+ (*ptr)[*len] = '\0';\r
+\r
+ return 0;\r
+}\r
+\r
+/*\r
+ returns 1 if given character is a valid identifier character a-z, A-Z, 0-9 or _\r
+ returns 0 otherwise\r
+*/\r
+static int is_identifier (byte c)\r
+{\r
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';\r
+}\r
+\r
+/*\r
+ copies characters from *text to *id until non-identifier character is encountered,\r
+ assumes that *id points to NULL object - caller is responsible for later freeing the string,\r
+ text pointer is advanced to point past the copied identifier,\r
+ returns 0 if identifier was successfully copied,\r
+ returns 1 otherwise\r
+*/\r
+static int get_identifier (const byte **text, byte **id)\r
+{\r
+ const byte *t = *text;\r
+ byte *p = NULL;\r
+ unsigned int len = 0;\r
+\r
+ if (string_grow (&p, &len, '\0'))\r
+ return 1;\r
+\r
+ /* loop while next character in buffer is valid for identifiers */\r
+ while (is_identifier (*t))\r
+ {\r
+ if (string_grow (&p, &len, *t++))\r
+ {\r
+ mem_free ((void **) &p);\r
+ return 1;\r
+ }\r
+ }\r
+\r
+ *text = t;\r
+ *id = p;\r
+\r
+ return 0;\r
+}\r
+\r
+/*\r
+ returns 1 if given character is HEX digit 0-9, A-F or a-f,\r
+ returns 0 otherwise\r
+*/\r
+static int is_hex (byte c)\r
+{\r
+ return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');\r
+}\r
+\r
+/*\r
+ returns value of passed character as if it was HEX digit\r
+*/\r
+static unsigned int hex2dec (byte c)\r
+{\r
+ if (c >= '0' && c <= '9')\r
+ return c - '0';\r
+ if (c >= 'A' && c <= 'F')\r
+ return c - 'A' + 10;\r
+ return c - 'a' + 10;\r
+}\r
+\r
+/*\r
+ converts sequence of HEX digits pointed by *text until non-HEX digit is encountered,\r
+ advances text pointer past the converted sequence,\r
+ returns the converted value\r
+*/\r
+static unsigned int hex_convert (const byte **text)\r
+{\r
+ unsigned int value = 0;\r
+\r
+ while (is_hex (**text))\r
+ {\r
+ value = value * 0x10 + hex2dec (**text);\r
+ (*text)++;\r
+ }\r
+\r
+ return value;\r
+}\r
+\r
+/*\r
+ returns 1 if given character is OCT digit 0-7,\r
+ returns 0 otherwise\r
+*/\r
+static int is_oct (byte c)\r
+{\r
+ return c >= '0' && c <= '7';\r
+}\r
+\r
+/*\r
+ returns value of passed character as if it was OCT digit\r
+*/\r
+static int oct2dec (byte c)\r
+{\r
+ return c - '0';\r
+}\r
+\r
+static byte get_escape_sequence (const byte **text)\r
+{\r
+ int value = 0;\r
+\r
+ /* skip '\' character */\r
+ (*text)++;\r
+\r
+ switch (*(*text)++)\r
+ {\r
+ case '\'':\r
+ return '\'';\r
+ case '"':\r
+ return '\"';\r
+ case '?':\r
+ return '\?';\r
+ case '\\':\r
+ return '\\';\r
+ case 'a':\r
+ return '\a';\r
+ case 'b':\r
+ return '\b';\r
+ case 'f':\r
+ return '\f';\r
+ case 'n':\r
+ return '\n';\r
+ case 'r':\r
+ return '\r';\r
+ case 't':\r
+ return '\t';\r
+ case 'v':\r
+ return '\v';\r
+ case 'x':\r
+ return (byte) hex_convert (text);\r
+ }\r
+\r
+ (*text)--;\r
+ if (is_oct (**text))\r
+ {\r
+ value = oct2dec (*(*text)++);\r
+ if (is_oct (**text))\r
+ {\r
+ value = value * 010 + oct2dec (*(*text)++);\r
+ if (is_oct (**text))\r
+ value = value * 010 + oct2dec (*(*text)++);\r
+ }\r
+ }\r
+\r
+ return (byte) value;\r
+}\r
+\r
+/*\r
+ copies characters from *text to *str until " or ' character is encountered,\r
+ assumes that *str points to NULL object - caller is responsible for later freeing the string,\r
+ assumes that *text points to " or ' character that starts the string,\r
+ text pointer is advanced to point past the " or ' character,\r
+ returns 0 if string was successfully copied,\r
+ returns 1 otherwise\r
+*/\r
+static int get_string (const byte **text, byte **str)\r
+{\r
+ const byte *t = *text;\r
+ byte *p = NULL;\r
+ unsigned int len = 0;\r
+ byte term_char;\r
+\r
+ if (string_grow (&p, &len, '\0'))\r
+ return 1;\r
+\r
+ /* read " or ' character that starts the string */\r
+ term_char = *t++;\r
+ /* while next character is not the terminating character */\r
+ while (*t && *t != term_char)\r
+ {\r
+ byte c;\r
+\r
+ if (*t == '\\')\r
+ c = get_escape_sequence (&t);\r
+ else\r
+ c = *t++;\r
+\r
+ if (string_grow (&p, &len, c))\r
+ {\r
+ mem_free ((void **) &p);\r
+ return 1;\r
+ }\r
+ }\r
+ /* skip " or ' character that ends the string */\r
+ t++;\r
+\r
+ *text = t;\r
+ *str = p;\r
+ return 0;\r
+}\r
+\r
+/*\r
+ gets emit code, the syntax is: ".emtcode" " " <symbol> " " ("0x" | "0X") <hex_value>\r
+ assumes that *text already points to <symbol>,\r
+ returns 0 if emit code is successfully read,\r
+ returns 1 otherwise\r
+*/\r
+static int get_emtcode (const byte **text, map_byte **ma)\r
+{\r
+ const byte *t = *text;\r
+ map_byte *m = NULL;\r
+\r
+ map_byte_create (&m);\r
+ if (m == NULL)\r
+ return 1;\r
+\r
+ if (get_identifier (&t, &m->key))\r
+ {\r
+ map_byte_destroy (&m);\r
+ return 1;\r
+ }\r
+ eat_spaces (&t);\r
+\r
+ if (*t == '\'')\r
+ {\r
+ byte *c;\r
+\r
+ if (get_string (&t, &c))\r
+ {\r
+ map_byte_destroy (&m);\r
+ return 1;\r
+ }\r
+\r
+ m->data = (byte) c[0];\r
+ mem_free ((void **) &c);\r
+ }\r
+ else\r
+ {\r
+ /* skip HEX "0x" or "0X" prefix */\r
+ t += 2;\r
+ m->data = (byte) hex_convert (&t);\r
+ }\r
+\r
+ eat_spaces (&t);\r
+\r
+ *text = t;\r
+ *ma = m;\r
+ return 0;\r
+}\r
+\r
+/*\r
+ gets regbyte declaration, the syntax is: ".regbyte" " " <symbol> " " ("0x" | "0X") <hex_value>\r
+ assumes that *text already points to <symbol>,\r
+ returns 0 if regbyte is successfully read,\r
+ returns 1 otherwise\r
+*/\r
+static int get_regbyte (const byte **text, map_byte **ma)\r
+{\r
+ return get_emtcode (text, ma);\r
+}\r
+\r
+/*\r
+ returns 0 on success,\r
+ returns 1 otherwise\r
+*/\r
+static int get_errtext (const byte **text, map_str **ma)\r
+{\r
+ const byte *t = *text;\r
+ map_str *m = NULL;\r
+\r
+ map_str_create (&m);\r
+ if (m == NULL)\r
+ return 1;\r
+\r
+ if (get_identifier (&t, &m->key))\r
+ {\r
+ map_str_destroy (&m);\r
+ return 1;\r
+ }\r
+ eat_spaces (&t);\r
+\r
+ if (get_string (&t, &m->data))\r
+ {\r
+ map_str_destroy (&m);\r
+ return 1;\r
+ }\r
+ eat_spaces (&t);\r
+\r
+ *text = t;\r
+ *ma = m;\r
+ return 0;\r
+}\r
+\r
+/*\r
+ returns 0 on success,\r
+ returns 1 otherwise,\r
+*/\r
+static int get_error (const byte **text, error **er, map_str *maps)\r
+{\r
+ const byte *t = *text;\r
+ byte *temp = NULL;\r
+\r
+ if (*t != '.')\r
+ return 0;\r
+\r
+ t++;\r
+ if (get_identifier (&t, &temp))\r
+ return 1;\r
+ eat_spaces (&t);\r
+\r
+ if (!str_equal ((byte *) "error", temp))\r
+ {\r
+ mem_free ((void **) &temp);\r
+ return 0;\r
+ }\r
+\r
+ mem_free ((void **) &temp);\r
+\r
+ error_create (er);\r
+ if (*er == NULL)\r
+ return 1;\r
+\r
+ if (*t == '\"')\r
+ {\r
+ if (get_string (&t, &(**er).m_text))\r
+ {\r
+ error_destroy (er);\r
+ return 1;\r
+ }\r
+ eat_spaces (&t);\r
+ }\r
+ else\r
+ {\r
+ if (get_identifier (&t, &temp))\r
+ {\r
+ error_destroy (er);\r
+ return 1;\r
+ }\r
+ eat_spaces (&t);\r
+\r
+ if (map_str_find (&maps, temp, &(**er).m_text))\r
+ {\r
+ mem_free ((void **) &temp);\r
+ error_destroy (er);\r
+ return 1;\r
+ }\r
+\r
+ mem_free ((void **) &temp);\r
+ }\r
+\r
+ /* try to extract "token" from "...$token$..." */\r
+ {\r
+ byte *processed = NULL;\r
+ unsigned int len = 0, i = 0;\r
+\r
+ if (string_grow (&processed, &len, '\0'))\r
+ {\r
+ error_destroy (er);\r
+ return 1;\r
+ }\r
+\r
+ while (i < str_length ((**er).m_text))\r
+ {\r
+ /* check if the dollar sign is repeated - if so skip it */\r
+ if ((**er).m_text[i] == '$' && (**er).m_text[i + 1] == '$')\r
+ {\r
+ if (string_grow (&processed, &len, '$'))\r
+ {\r
+ mem_free ((void **) &processed);\r
+ error_destroy (er);\r
+ return 1;\r
+ }\r
+\r
+ i += 2;\r
+ }\r
+ else if ((**er).m_text[i] != '$')\r
+ {\r
+ if (string_grow (&processed, &len, (**er).m_text[i]))\r
+ {\r
+ mem_free ((void **) &processed);\r
+ error_destroy (er);\r
+ return 1;\r
+ }\r
+\r
+ i++;\r
+ }\r
+ else\r
+ {\r
+ if (string_grow (&processed, &len, '$'))\r
+ {\r
+ mem_free ((void **) &processed);\r
+ error_destroy (er);\r
+ return 1;\r
+ }\r
+\r
+ {\r
+ /* length of token being extracted */\r
+ unsigned int tlen = 0;\r
+\r
+ if (string_grow (&(**er).m_token_name, &tlen, '\0'))\r
+ {\r
+ mem_free ((void **) &processed);\r
+ error_destroy (er);\r
+ return 1;\r
+ }\r
+\r
+ /* skip the dollar sign */\r
+ i++;\r
+\r
+ while ((**er).m_text[i] != '$')\r
+ {\r
+ if (string_grow (&(**er).m_token_name, &tlen, (**er).m_text[i]))\r
+ {\r
+ mem_free ((void **) &processed);\r
+ error_destroy (er);\r
+ return 1;\r
+ }\r
+\r
+ i++;\r
+ }\r
+\r
+ /* skip the dollar sign */\r
+ i++;\r
+ }\r
+ }\r
+ }\r
+\r
+ mem_free ((void **) &(**er).m_text);\r
+ (**er).m_text = processed;\r
+ }\r
+\r
+ *text = t;\r
+ return 0;\r
+}\r
+\r
+/*\r
+ returns 0 on success,\r
+ returns 1 otherwise,\r
+*/\r
+static int get_emits (const byte **text, emit **em, map_byte *mapb)\r
+{\r
+ const byte *t = *text;\r
+ byte *temp = NULL;\r
+ emit *e = NULL;\r
+ emit_dest dest;\r
+\r
+ if (*t != '.')\r
+ return 0;\r
+\r
+ t++;\r
+ if (get_identifier (&t, &temp))\r
+ return 1;\r
+ eat_spaces (&t);\r
+\r
+ /* .emit */\r
+ if (str_equal ((byte *) "emit", temp))\r
+ dest = ed_output;\r
+ /* .load */\r
+ else if (str_equal ((byte *) "load", temp))\r
+ dest = ed_regbyte;\r
+ else\r
+ {\r
+ mem_free ((void **) &temp);\r
+ return 0;\r
+ }\r
+\r
+ mem_free ((void **) &temp);\r
+\r
+ emit_create (&e);\r
+ if (e == NULL)\r
+ return 1;\r
+\r
+ e->m_emit_dest = dest;\r
+\r
+ if (dest == ed_regbyte)\r
+ {\r
+ if (get_identifier (&t, &e->m_regname))\r
+ {\r
+ emit_destroy (&e);\r
+ return 1;\r
+ }\r
+ eat_spaces (&t);\r
+ }\r
+\r
+ /* 0xNN */\r
+ if (*t == '0')\r
+ {\r
+ t += 2;\r
+ e->m_byte = (byte) hex_convert (&t);\r
+\r
+ e->m_emit_type = et_byte;\r
+ }\r
+ /* * */\r
+ else if (*t == '*')\r
+ {\r
+ t++;\r
+\r
+ e->m_emit_type = et_stream;\r
+ }\r
+ /* $ */\r
+ else if (*t == '$')\r
+ {\r
+ t++;\r
+\r
+ e->m_emit_type = et_position;\r
+ }\r
+ /* 'c' */\r
+ else if (*t == '\'')\r
+ {\r
+ if (get_string (&t, &temp))\r
+ {\r
+ emit_destroy (&e);\r
+ return 1;\r
+ }\r
+ e->m_byte = (byte) temp[0];\r
+\r
+ mem_free ((void **) &temp);\r
+\r
+ e->m_emit_type = et_byte;\r
+ }\r
+ else\r
+ {\r
+ if (get_identifier (&t, &temp))\r
+ {\r
+ emit_destroy (&e);\r
+ return 1;\r
+ }\r
+\r
+ if (map_byte_find (&mapb, temp, &e->m_byte))\r
+ {\r
+ mem_free ((void **) &temp);\r
+ emit_destroy (&e);\r
+ return 1;\r
+ }\r
+\r
+ mem_free ((void **) &temp);\r
+\r
+ e->m_emit_type = et_byte;\r
+ }\r
+\r
+ eat_spaces (&t);\r
+\r
+ if (get_emits (&t, &e->m_next, mapb))\r
+ {\r
+ emit_destroy (&e);\r
+ return 1;\r
+ }\r
+\r
+ *text = t;\r
+ *em = e;\r
+ return 0;\r
+}\r
+\r
+/*\r
+ returns 0 on success,\r
+ returns 1 otherwise,\r
+*/\r
+static int get_spec (const byte **text, spec **sp, map_str *maps, map_byte *mapb)\r
+{\r
+ const byte *t = *text;\r
+ spec *s = NULL;\r
+\r
+ spec_create (&s);\r
+ if (s == NULL)\r
+ return 1;\r
+\r
+ /* first - read optional .if statement */\r
+ if (*t == '.')\r
+ {\r
+ const byte *u = t;\r
+ byte *keyword = NULL;\r
+\r
+ /* skip the dot */\r
+ u++;\r
+\r
+ if (get_identifier (&u, &keyword))\r
+ {\r
+ spec_destroy (&s);\r
+ return 1;\r
+ }\r
+\r
+ /* .if */\r
+ if (str_equal ((byte *) "if", keyword))\r
+ {\r
+ cond_create (&s->m_cond);\r
+ if (s->m_cond == NULL)\r
+ {\r
+ spec_destroy (&s);\r
+ return 1;\r
+ }\r
+\r
+ /* skip the left paren */\r
+ eat_spaces (&u);\r
+ u++;\r
+\r
+ /* get the left operand */\r
+ eat_spaces (&u);\r
+ if (get_identifier (&u, &s->m_cond->m_operands[0].m_regname))\r
+ {\r
+ spec_destroy (&s);\r
+ return 1;\r
+ }\r
+ s->m_cond->m_operands[0].m_type = cot_regbyte;\r
+\r
+ /* get the operator (!= or ==) */\r
+ eat_spaces (&u);\r
+ if (*u == '!')\r
+ s->m_cond->m_type = ct_not_equal;\r
+ else\r
+ s->m_cond->m_type = ct_equal;\r
+ u += 2;\r
+\r
+ /* skip the 0x prefix */\r
+ eat_spaces (&u);\r
+ u += 2;\r
+\r
+ /* get the right operand */\r
+ s->m_cond->m_operands[1].m_byte = hex_convert (&u);\r
+ s->m_cond->m_operands[1].m_type = cot_byte;\r
+\r
+ /* skip the right paren */\r
+ eat_spaces (&u);\r
+ u++;\r
+\r
+ eat_spaces (&u);\r
+\r
+ t = u;\r
+ }\r
+\r
+ mem_free ((void **) &keyword);\r
+ }\r
+\r
+ if (*t == '\'')\r
+ {\r
+ byte *temp = NULL;\r
+\r
+ if (get_string (&t, &temp))\r
+ {\r
+ spec_destroy (&s);\r
+ return 1;\r
+ }\r
+ eat_spaces (&t);\r
+\r
+ if (*t == '-')\r
+ {\r
+ byte *temp2 = NULL;\r
+\r
+ /* skip the '-' character */\r
+ t++;\r
+ eat_spaces (&t);\r
+\r
+ if (get_string (&t, &temp2))\r
+ {\r
+ mem_free ((void **) &temp);\r
+ spec_destroy (&s);\r
+ return 1;\r
+ }\r
+ eat_spaces (&t);\r
+\r
+ s->m_spec_type = st_byte_range;\r
+ s->m_byte[0] = *temp;\r
+ s->m_byte[1] = *temp2;\r
+\r
+ mem_free ((void **) &temp2);\r
+ }\r
+ else\r
+ {\r
+ s->m_spec_type = st_byte;\r
+ *s->m_byte = *temp;\r
+ }\r
+\r
+ mem_free ((void **) &temp);\r
+ }\r
+ else if (*t == '"')\r
+ {\r
+ if (get_string (&t, &s->m_string))\r
+ {\r
+ spec_destroy (&s);\r
+ return 1;\r
+ }\r
+ eat_spaces (&t);\r
+\r
+ s->m_spec_type = st_string;\r
+ }\r
+ else if (*t == '.')\r
+ {\r
+ byte *keyword = NULL;\r
+\r
+ /* skip the dot */\r
+ t++;\r
+\r
+ if (get_identifier (&t, &keyword))\r
+ {\r
+ spec_destroy (&s);\r
+ return 1;\r
+ }\r
+ eat_spaces (&t);\r
+\r
+ /* .true */\r
+ if (str_equal ((byte *) "true", keyword))\r
+ {\r
+ s->m_spec_type = st_true;\r
+ }\r
+ /* .false */\r
+ else if (str_equal ((byte *) "false", keyword))\r
+ {\r
+ s->m_spec_type = st_false;\r
+ }\r
+ /* .debug */\r
+ else if (str_equal ((byte *) "debug", keyword))\r
+ {\r
+ s->m_spec_type = st_debug;\r
+ }\r
+ /* .loop */\r
+ else if (str_equal ((byte *) "loop", keyword))\r
+ {\r
+ if (get_identifier (&t, &s->m_string))\r
+ {\r
+ mem_free ((void **) &keyword);\r
+ spec_destroy (&s);\r
+ return 1;\r
+ }\r
+ eat_spaces (&t);\r
+\r
+ s->m_spec_type = st_identifier_loop;\r
+ }\r
+\r
+ mem_free ((void **) &keyword);\r
+ }\r
+ else\r
+ {\r
+ if (get_identifier (&t, &s->m_string))\r
+ {\r
+ spec_destroy (&s);\r
+ return 1;\r
+ }\r
+ eat_spaces (&t);\r
+\r
+ s->m_spec_type = st_identifier;\r
+ }\r
+\r
+ if (get_error (&t, &s->m_errtext, maps))\r
+ {\r
+ spec_destroy (&s);\r
+ return 1;\r
+ }\r
+\r
+ if (get_emits (&t, &s->m_emits, mapb))\r
+ {\r
+ spec_destroy (&s);\r
+ return 1;\r
+ }\r
+\r
+ *text = t;\r
+ *sp = s;\r
+ return 0;\r
+}\r
+\r
+/*\r
+ returns 0 on success,\r
+ returns 1 otherwise,\r
+*/\r
+static int get_rule (const byte **text, rule **ru, map_str *maps, map_byte *mapb)\r
+{\r
+ const byte *t = *text;\r
+ rule *r = NULL;\r
+\r
+ rule_create (&r);\r
+ if (r == NULL)\r
+ return 1;\r
+\r
+ if (get_spec (&t, &r->m_specs, maps, mapb))\r
+ {\r
+ rule_destroy (&r);\r
+ return 1;\r
+ }\r
+\r
+ while (*t != ';')\r
+ {\r
+ byte *op = NULL;\r
+ spec *sp = NULL;\r
+\r
+ /* skip the dot that precedes "and" or "or" */\r
+ t++;\r
+\r
+ /* read "and" or "or" keyword */\r
+ if (get_identifier (&t, &op))\r
+ {\r
+ rule_destroy (&r);\r
+ return 1;\r
+ }\r
+ eat_spaces (&t);\r
+\r
+ if (r->m_oper == op_none)\r
+ {\r
+ /* .and */\r
+ if (str_equal ((byte *) "and", op))\r
+ r->m_oper = op_and;\r
+ /* .or */\r
+ else\r
+ r->m_oper = op_or;\r
+ }\r
+\r
+ mem_free ((void **) &op);\r
+\r
+ if (get_spec (&t, &sp, maps, mapb))\r
+ {\r
+ rule_destroy (&r);\r
+ return 1;\r
+ }\r
+\r
+ spec_append (&r->m_specs, &sp);\r
+ }\r
+\r
+ /* skip the semicolon */\r
+ t++;\r
+ eat_spaces (&t);\r
+\r
+ *text = t;\r
+ *ru = r;\r
+ return 0;\r
+}\r
+\r
+/*\r
+ returns 0 on success,\r
+ returns 1 otherwise,\r
+*/\r
+static int update_dependency (map_rule *mapr, byte *symbol, rule **ru)\r
+{\r
+ if (map_rule_find (&mapr, symbol, ru))\r
+ return 1;\r
+\r
+/* (**ru).m_referenced = 1; */\r
+\r
+ return 0;\r
+}\r
+\r
+/*\r
+ returns 0 on success,\r
+ returns 1 otherwise,\r
+*/\r
+static int update_dependencies (dict *di, map_rule *mapr, byte **syntax_symbol,\r
+ byte **string_symbol, map_byte *regbytes)\r
+{\r
+ rule *rulez = di->m_rulez;\r
+\r
+ /* update dependecies for the root and lexer symbols */\r
+ if (update_dependency (mapr, *syntax_symbol, &di->m_syntax) ||\r
+ (*string_symbol != NULL && update_dependency (mapr, *string_symbol, &di->m_string)))\r
+ return 1;\r
+\r
+ mem_free ((void **) syntax_symbol);\r
+ mem_free ((void **) string_symbol);\r
+\r
+ /* update dependecies for the rest of the rules */\r
+ while (rulez)\r
+ {\r
+ spec *sp = rulez->m_specs;\r
+\r
+ /* iterate through all the specifiers */\r
+ while (sp)\r
+ {\r
+ /* update dependency for identifier */\r
+ if (sp->m_spec_type == st_identifier || sp->m_spec_type == st_identifier_loop)\r
+ {\r
+ if (update_dependency (mapr, sp->m_string, &sp->m_rule))\r
+ return 1;\r
+\r
+ mem_free ((void **) &sp->m_string);\r
+ }\r
+\r
+ /* some errtexts reference to a rule */\r
+ if (sp->m_errtext && sp->m_errtext->m_token_name)\r
+ {\r
+ if (update_dependency (mapr, sp->m_errtext->m_token_name, &sp->m_errtext->m_token))\r
+ return 1;\r
+\r
+ mem_free ((void **) &sp->m_errtext->m_token_name);\r
+ }\r
+\r
+ /* update dependency for condition */\r
+ if (sp->m_cond)\r
+ {\r
+ int i;\r
+ for (i = 0; i < 2; i++)\r
+ if (sp->m_cond->m_operands[i].m_type == cot_regbyte)\r
+ {\r
+ sp->m_cond->m_operands[i].m_regbyte = map_byte_locate (®bytes,\r
+ sp->m_cond->m_operands[i].m_regname);\r
+\r
+ if (sp->m_cond->m_operands[i].m_regbyte == NULL)\r
+ return 1;\r
+\r
+ mem_free ((void **) &sp->m_cond->m_operands[i].m_regname);\r
+ }\r
+ }\r
+\r
+ /* update dependency for all .load instructions */\r
+ if (sp->m_emits)\r
+ {\r
+ emit *em = sp->m_emits;\r
+ while (em != NULL)\r
+ {\r
+ if (em->m_emit_dest == ed_regbyte)\r
+ {\r
+ em->m_regbyte = map_byte_locate (®bytes, em->m_regname);\r
+\r
+ if (em->m_regbyte == NULL)\r
+ return 1;\r
+\r
+ mem_free ((void **) &em->m_regname);\r
+ }\r
+\r
+ em = em->m_next;\r
+ }\r
+ }\r
+\r
+ sp = sp->m_next;\r
+ }\r
+\r
+ rulez = rulez->m_next;\r
+ }\r
+\r
+/* check for unreferenced symbols */\r
+/* de = di->m_defntns;\r
+ while (de)\r
+ {\r
+ if (!de->m_referenced)\r
+ {\r
+ map_def *ma = mapd;\r
+ while (ma)\r
+ {\r
+ if (ma->data == de)\r
+ {\r
+ assert (0);\r
+ break;\r
+ }\r
+ ma = ma->next;\r
+ }\r
+ }\r
+ de = de->m_next;\r
+ }\r
+*/\r
+ return 0;\r
+}\r
+\r
+static int satisfies_condition (cond *co, regbyte_ctx *ctx)\r
+{\r
+ byte values[2];\r
+ int i;\r
+\r
+ if (co == NULL)\r
+ return 1;\r
+\r
+ for (i = 0; i < 2; i++)\r
+ switch (co->m_operands[i].m_type)\r
+ {\r
+ case cot_byte:\r
+ values[i] = co->m_operands[i].m_byte;\r
+ break;\r
+ case cot_regbyte:\r
+ values[i] = regbyte_ctx_extract (&ctx, co->m_operands[i].m_regbyte);\r
+ break;\r
+ }\r
+\r
+ switch (co->m_type)\r
+ {\r
+ case ct_equal:\r
+ return values[0] == values[1];\r
+ case ct_not_equal:\r
+ return values[0] != values[1];\r
+ }\r
+\r
+ return 0;\r
+}\r
+\r
+static void free_regbyte_ctx_stack (regbyte_ctx *top, regbyte_ctx *limit)\r
+{\r
+ while (top != limit)\r
+ {\r
+ regbyte_ctx *rbc = top->m_prev;\r
+ regbyte_ctx_destroy (&top);\r
+ top = rbc;\r
+ }\r
+}\r
+\r
+typedef enum match_result_\r
+{\r
+ mr_not_matched, /* the examined string does not match */\r
+ mr_matched, /* the examined string matches */\r
+ mr_error_raised, /* mr_not_matched + error has been raised */\r
+ mr_dont_emit, /* used by identifier loops only */\r
+ mr_internal_error /* an internal error has occured such as out of memory */\r
+} match_result;\r
+\r
+/*\r
+ This function does the main job. It parses the text and generates output data.\r
+\r
+ XXX optimize it - the barray seems to be the bottleneck\r
+*/\r
+static match_result match (dict *di, const byte *text, unsigned int *index, rule *ru, barray **ba,\r
+ int filtering_string, regbyte_ctx **rbc)\r
+{\r
+ unsigned int ind = *index;\r
+ match_result status = mr_not_matched;\r
+ spec *sp = ru->m_specs;\r
+ regbyte_ctx *ctx = *rbc;\r
+\r
+ /* for every specifier in the rule */\r
+ while (sp)\r
+ {\r
+ unsigned int i, len, save_ind = ind;\r
+ barray *array = NULL;\r
+\r
+ if (satisfies_condition (sp->m_cond, ctx))\r
+ {\r
+ switch (sp->m_spec_type)\r
+ {\r
+ case st_identifier:\r
+ barray_create (&array);\r
+ if (array == NULL)\r
+ {\r
+ free_regbyte_ctx_stack (ctx, *rbc);\r
+ return mr_internal_error;\r
+ }\r
+\r
+ status = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);\r
+ if (status == mr_internal_error)\r
+ {\r
+ free_regbyte_ctx_stack (ctx, *rbc);\r
+ barray_destroy (&array);\r
+ return mr_internal_error;\r
+ }\r
+ break;\r
+ case st_string:\r
+ len = str_length (sp->m_string);\r
+\r
+ /* prefilter the stream */\r
+ if (!filtering_string && di->m_string)\r
+ {\r
+ barray *ba;\r
+ unsigned int filter_index = 0;\r
+ match_result result;\r
+ regbyte_ctx *null_ctx = NULL;\r
+\r
+ barray_create (&ba);\r
+ if (ba == NULL)\r
+ {\r
+ free_regbyte_ctx_stack (ctx, *rbc);\r
+ return mr_internal_error;\r
+ }\r
+\r
+ result = match (di, text + ind, &filter_index, di->m_string, &ba, 1, &null_ctx);\r
+\r
+ if (result == mr_internal_error)\r
+ {\r
+ free_regbyte_ctx_stack (ctx, *rbc);\r
+ barray_destroy (&ba);\r
+ return mr_internal_error;\r
+ }\r
+\r
+ if (result != mr_matched)\r
+ {\r
+ barray_destroy (&ba);\r
+ status = mr_not_matched;\r
+ break;\r
+ }\r
+\r
+ barray_destroy (&ba);\r
+\r
+ if (filter_index != len || !str_equal_n (sp->m_string, text + ind, len))\r
+ {\r
+ status = mr_not_matched;\r
+ break;\r
+ }\r
+\r
+ status = mr_matched;\r
+ ind += len;\r
+ }\r
+ else\r
+ {\r
+ status = mr_matched;\r
+ for (i = 0; status == mr_matched && i < len; i++)\r
+ if (text[ind + i] != sp->m_string[i])\r
+ status = mr_not_matched;\r
+ if (status == mr_matched)\r
+ ind += len;\r
+ }\r
+ break;\r
+ case st_byte:\r
+ status = text[ind] == *sp->m_byte ? mr_matched : mr_not_matched;\r
+ if (status == mr_matched)\r
+ ind++;\r
+ break;\r
+ case st_byte_range:\r
+ status = (text[ind] >= sp->m_byte[0] && text[ind] <= sp->m_byte[1]) ?\r
+ mr_matched : mr_not_matched;\r
+ if (status == mr_matched)\r
+ ind++;\r
+ break;\r
+ case st_true:\r
+ status = mr_matched;\r
+ break;\r
+ case st_false:\r
+ status = mr_not_matched;\r
+ break;\r
+ case st_debug:\r
+ status = ru->m_oper == op_and ? mr_matched : mr_not_matched;\r
+ break;\r
+ case st_identifier_loop:\r
+ barray_create (&array);\r
+ if (array == NULL)\r
+ {\r
+ free_regbyte_ctx_stack (ctx, *rbc);\r
+ return mr_internal_error;\r
+ }\r
+\r
+ status = mr_dont_emit;\r
+ for (;;)\r
+ {\r
+ match_result result;\r
+\r
+ save_ind = ind;\r
+ result = match (di, text, &ind, sp->m_rule, &array, filtering_string, &ctx);\r
+\r
+ if (result == mr_error_raised)\r
+ {\r
+ status = result;\r
+ break;\r
+ }\r
+ else if (result == mr_matched)\r
+ {\r
+ if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx) ||\r
+ barray_append (ba, &array))\r
+ {\r
+ free_regbyte_ctx_stack (ctx, *rbc);\r
+ barray_destroy (&array);\r
+ return mr_internal_error;\r
+ }\r
+ barray_destroy (&array);\r
+ barray_create (&array);\r
+ if (array == NULL)\r
+ {\r
+ free_regbyte_ctx_stack (ctx, *rbc);\r
+ return mr_internal_error;\r
+ }\r
+ }\r
+ else if (result == mr_internal_error)\r
+ {\r
+ free_regbyte_ctx_stack (ctx, *rbc);\r
+ barray_destroy (&array);\r
+ return mr_internal_error;\r
+ }\r
+ else\r
+ break;\r
+ }\r
+ break;\r
+ }\r
+ }\r
+ else\r
+ {\r
+ status = mr_not_matched;\r
+ }\r
+\r
+ if (status == mr_error_raised)\r
+ {\r
+ free_regbyte_ctx_stack (ctx, *rbc);\r
+ barray_destroy (&array);\r
+\r
+ return mr_error_raised;\r
+ }\r
+\r
+ if (ru->m_oper == op_and && status != mr_matched && status != mr_dont_emit)\r
+ {\r
+ free_regbyte_ctx_stack (ctx, *rbc);\r
+ barray_destroy (&array);\r
+\r
+ if (sp->m_errtext)\r
+ {\r
+ set_last_error (sp->m_errtext->m_text, error_get_token (sp->m_errtext, di, text,\r
+ ind), ind);\r
+\r
+ return mr_error_raised;\r
+ }\r
+\r
+ return mr_not_matched;\r
+ }\r
+\r
+ if (status == mr_matched)\r
+ {\r
+ if (sp->m_emits)\r
+ if (barray_push (ba, sp->m_emits, text[ind - 1], save_ind, &ctx))\r
+ {\r
+ free_regbyte_ctx_stack (ctx, *rbc);\r
+ barray_destroy (&array);\r
+ return mr_internal_error;\r
+ }\r
+\r
+ if (array)\r
+ if (barray_append (ba, &array))\r
+ {\r
+ free_regbyte_ctx_stack (ctx, *rbc);\r
+ barray_destroy (&array);\r
+ return mr_internal_error;\r
+ }\r
+ }\r
+\r
+ barray_destroy (&array);\r
+\r
+ /* if the rule operator is a logical or, we pick up the first matching specifier */\r
+ if (ru->m_oper == op_or && (status == mr_matched || status == mr_dont_emit))\r
+ {\r
+ *index = ind;\r
+ *rbc = ctx;\r
+ return mr_matched;\r
+ }\r
+\r
+ sp = sp->m_next;\r
+ }\r
+\r
+ /* everything went fine - all specifiers match up */\r
+ if (ru->m_oper == op_and && (status == mr_matched || status == mr_dont_emit))\r
+ {\r
+ *index = ind;\r
+ *rbc = ctx;\r
+ return mr_matched;\r
+ }\r
+\r
+ free_regbyte_ctx_stack (ctx, *rbc);\r
+ return mr_not_matched;\r
+}\r
+\r
+static byte *error_get_token (error *er, dict *di, const byte *text, unsigned int ind)\r
+{\r
+ byte *str = NULL;\r
+\r
+ if (er->m_token)\r
+ {\r
+ barray *ba;\r
+ unsigned int filter_index = 0;\r
+ regbyte_ctx *ctx = NULL;\r
+\r
+ barray_create (&ba);\r
+ if (ba != NULL)\r
+ {\r
+ if (match (di, text + ind, &filter_index, er->m_token, &ba, 0, &ctx) == mr_matched &&\r
+ filter_index)\r
+ {\r
+ str = mem_alloc (filter_index + 1);\r
+ if (str != NULL)\r
+ {\r
+ str_copy_n (str, text + ind, filter_index);\r
+ str[filter_index] = '\0';\r
+ }\r
+ }\r
+ barray_destroy (&ba);\r
+ }\r
+ }\r
+\r
+ return str;\r
+}\r
+\r
+typedef struct grammar_load_state_\r
+{\r
+ dict *di;\r
+ byte *syntax_symbol;\r
+ byte *string_symbol;\r
+ map_str *maps;\r
+ map_byte *mapb;\r
+ map_rule *mapr;\r
+} grammar_load_state;\r
+\r
+static void grammar_load_state_create (grammar_load_state **gr)\r
+{\r
+ *gr = mem_alloc (sizeof (grammar_load_state));\r
+ if (*gr)\r
+ {\r
+ (**gr).di = NULL;\r
+ (**gr).syntax_symbol = NULL;\r
+ (**gr).string_symbol = NULL;\r
+ (**gr).maps = NULL;\r
+ (**gr).mapb = NULL;\r
+ (**gr).mapr = NULL;\r
+ }\r
+}\r
+\r
+static void grammar_load_state_destroy (grammar_load_state **gr)\r
+{\r
+ if (*gr)\r
+ {\r
+ dict_destroy (&(**gr).di);\r
+ mem_free ((void **) &(**gr).syntax_symbol);\r
+ mem_free ((void **) &(**gr).string_symbol);\r
+ map_str_destroy (&(**gr).maps);\r
+ map_byte_destroy (&(**gr).mapb);\r
+ map_rule_destroy (&(**gr).mapr);\r
+ mem_free ((void **) gr);\r
+ }\r
+}\r
+\r
+/*\r
+ the API\r
+*/\r
+\r
+grammar grammar_load_from_text (const byte *text)\r
+{\r
+ grammar_load_state *g = NULL;\r
+ grammar id = 0;\r
+\r
+ clear_last_error ();\r
+\r
+ grammar_load_state_create (&g);\r
+ if (g == NULL)\r
+ return 0;\r
+\r
+ dict_create (&g->di);\r
+ if (g->di == NULL)\r
+ {\r
+ grammar_load_state_destroy (&g);\r
+ return 0;\r
+ }\r
+\r
+ eat_spaces (&text);\r
+\r
+ /* skip ".syntax" keyword */\r
+ text += 7;\r
+ eat_spaces (&text);\r
+\r
+ /* retrieve root symbol */\r
+ if (get_identifier (&text, &g->syntax_symbol))\r
+ {\r
+ grammar_load_state_destroy (&g);\r
+ return 0;\r
+ }\r
+ eat_spaces (&text);\r
+\r
+ /* skip semicolon */\r
+ text++;\r
+ eat_spaces (&text);\r
+\r
+ while (*text)\r
+ {\r
+ byte *symbol = NULL;\r
+ int is_dot = *text == '.';\r
+\r
+ if (is_dot)\r
+ text++;\r
+\r
+ if (get_identifier (&text, &symbol))\r
+ {\r
+ grammar_load_state_destroy (&g);\r
+ return 0;\r
+ }\r
+ eat_spaces (&text);\r
+\r
+ /* .emtcode */\r
+ if (is_dot && str_equal (symbol, (byte *) "emtcode"))\r
+ {\r
+ map_byte *ma = NULL;\r
+\r
+ mem_free ((void **) &symbol);\r
+\r
+ if (get_emtcode (&text, &ma))\r
+ {\r
+ grammar_load_state_destroy (&g);\r
+ return 0;\r
+ }\r
+\r
+ map_byte_append (&g->mapb, &ma);\r
+ }\r
+ /* .regbyte */\r
+ else if (is_dot && str_equal (symbol, (byte *) "regbyte"))\r
+ {\r
+ map_byte *ma = NULL;\r
+\r
+ mem_free ((void **) &symbol);\r
+\r
+ if (get_regbyte (&text, &ma))\r
+ {\r
+ grammar_load_state_destroy (&g);\r
+ return 0;\r
+ }\r
+\r
+ map_byte_append (&g->di->m_regbytes, &ma);\r
+ }\r
+ /* .errtext */\r
+ else if (is_dot && str_equal (symbol, (byte *) "errtext"))\r
+ {\r
+ map_str *ma = NULL;\r
+\r
+ mem_free ((void **) &symbol);\r
+\r
+ if (get_errtext (&text, &ma))\r
+ {\r
+ grammar_load_state_destroy (&g);\r
+ return 0;\r
+ }\r
+\r
+ map_str_append (&g->maps, &ma);\r
+ }\r
+ /* .string */\r
+ else if (is_dot && str_equal (symbol, (byte *) "string"))\r
+ {\r
+ mem_free ((void **) &symbol);\r
+\r
+ if (g->di->m_string != NULL)\r
+ {\r
+ grammar_load_state_destroy (&g);\r
+ return 0;\r
+ }\r
+\r
+ if (get_identifier (&text, &g->string_symbol))\r
+ {\r
+ grammar_load_state_destroy (&g);\r
+ return 0;\r
+ }\r
+\r
+ /* skip semicolon */\r
+ eat_spaces (&text);\r
+ text++;\r
+ eat_spaces (&text);\r
+ }\r
+ else\r
+ {\r
+ rule *ru = NULL;\r
+ map_rule *ma = NULL;\r
+\r
+ if (get_rule (&text, &ru, g->maps, g->mapb))\r
+ {\r
+ grammar_load_state_destroy (&g);\r
+ return 0;\r
+ }\r
+\r
+ rule_append (&g->di->m_rulez, &ru);\r
+\r
+ /* if a rule consist of only one specifier, give it an ".and" operator */\r
+ if (ru->m_oper == op_none)\r
+ ru->m_oper = op_and;\r
+\r
+ map_rule_create (&ma);\r
+ if (ma == NULL)\r
+ {\r
+ grammar_load_state_destroy (&g);\r
+ return 0;\r
+ }\r
+\r
+ ma->key = symbol;\r
+ ma->data = ru;\r
+ map_rule_append (&g->mapr, &ma);\r
+ }\r
+ }\r
+\r
+ if (update_dependencies (g->di, g->mapr, &g->syntax_symbol, &g->string_symbol,\r
+ g->di->m_regbytes))\r
+ {\r
+ grammar_load_state_destroy (&g);\r
+ return 0;\r
+ }\r
+\r
+ dict_append (&g_dicts, &g->di);\r
+ id = g->di->m_id;\r
+ g->di = NULL;\r
+\r
+ grammar_load_state_destroy (&g);\r
+\r
+ return id;\r
+}\r
+\r
+int grammar_set_reg8 (grammar id, const byte *name, byte value)\r
+{\r
+ dict *di = NULL;\r
+ map_byte *reg = NULL;\r
+\r
+ clear_last_error ();\r
+\r
+ dict_find (&g_dicts, id, &di);\r
+ if (di == NULL)\r
+ {\r
+ set_last_error (INVALID_GRAMMAR_ID, NULL, -1);\r
+ return 0;\r
+ }\r
+\r
+ reg = map_byte_locate (&di->m_regbytes, name);\r
+ if (reg == NULL)\r
+ {\r
+ set_last_error (INVALID_REGISTER_NAME, str_duplicate (name), -1);\r
+ return 0;\r
+ }\r
+\r
+ reg->data = value;\r
+ return 1;\r
+}\r
+\r
+int grammar_check (grammar id, const byte *text, byte **prod, unsigned int *size)\r
+{\r
+ dict *di = NULL;\r
+ barray *ba = NULL;\r
+ unsigned int index = 0;\r
+ regbyte_ctx *rbc = NULL;\r
+\r
+ clear_last_error ();\r
+\r
+ dict_find (&g_dicts, id, &di);\r
+ if (di == NULL)\r
+ {\r
+ set_last_error (INVALID_GRAMMAR_ID, NULL, -1);\r
+ return 0;\r
+ }\r
+\r
+ barray_create (&ba);\r
+ if (ba == NULL)\r
+ return 0;\r
+\r
+ *prod = NULL;\r
+ *size = 0;\r
+\r
+ if (match (di, text, &index, di->m_syntax, &ba, 0, &rbc) != mr_matched)\r
+ {\r
+ barray_destroy (&ba);\r
+ free_regbyte_ctx_stack (rbc, NULL);\r
+ return 0;\r
+ }\r
+\r
+ free_regbyte_ctx_stack (rbc, NULL);\r
+\r
+ *prod = mem_alloc (ba->len * sizeof (byte));\r
+ if (*prod == NULL)\r
+ {\r
+ barray_destroy (&ba);\r
+ return 0;\r
+ }\r
+\r
+ mem_copy (*prod, ba->data, ba->len * sizeof (byte));\r
+ *size = ba->len;\r
+ barray_destroy (&ba);\r
+\r
+ return 1;\r
+}\r
+\r
+int grammar_destroy (grammar id)\r
+{\r
+ dict **di = &g_dicts;\r
+\r
+ clear_last_error ();\r
+\r
+ while (*di != NULL)\r
+ {\r
+ if ((**di).m_id == id)\r
+ {\r
+ dict *tmp = *di;\r
+ *di = (**di).m_next;\r
+ dict_destroy (&tmp);\r
+ return 1;\r
+ }\r
+\r
+ di = &(**di).m_next;\r
+ }\r
+\r
+ set_last_error (INVALID_GRAMMAR_ID, NULL, -1);\r
+ return 0;\r
+}\r
+\r
+void grammar_get_last_error (byte *text, unsigned int size, int *pos)\r
+{\r
+ unsigned int len = 0, dots_made = 0;\r
+ const byte *p = error_message;\r
+\r
+ *text = '\0';\r
+\r
+#define APPEND_CHARACTER(x) if (dots_made == 0) {\\r
+ if (len < size - 1) {\\r
+ text[len++] = (x); text[len] = '\0';\\r
+ } else {\\r
+ int i;\\r
+ for (i = 0; i < 3; i++)\\r
+ if (--len >= 0)\\r
+ text[len] = '.';\\r
+ dots_made = 1;\\r
+ }\\r
+ }\r
+\r
+ if (p)\r
+ while (*p)\r
+ if (*p == '$')\r
+ {\r
+ const byte *r = error_param;\r
+\r
+ while (*r)\r
+ {\r
+ APPEND_CHARACTER(*r)\r
+ r++;\r
+ }\r
+\r
+ p++;\r
+ }\r
+ else\r
+ {\r
+ APPEND_CHARACTER(*p)\r
+ p++;\r
+ }\r
+\r
+ *pos = error_position;\r
+\r
+#undef APPEND_CHARACTER\r
+\r
+}\r
+\r