2 * lens.h: Repreentation of lenses
4 * Copyright (C) 2007-2016 David Lutterkort
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * Author: David Lutterkort <dlutter@redhat.com>
30 /* keep in sync with tag name table */
32 L_DEL = 42, /* Shift tag values so we fail fast(er) on bad pointers */
48 /* A lens. The way the type information is computed is a little
49 * delicate. There are various regexps involved to form the final type:
51 * CTYPE - the concrete type, used to parse file -> tree
52 * ATYPE - the abstract type, used to parse tree -> file
53 * KTYPE - the 'key' type, matching the label that this lens
54 * can produce, or NULL if no label is produced
55 * VTYPE - the 'value' type, matching the value that this lens
56 * can produce, or NULL if no value is produce
58 * We distinguish between regular and recursive (context-free) lenses. Only
59 * L_REC and the combinators can be marked recursive.
61 * Types are computed at different times, depending on whether the lens is
62 * recursive or not. For non-recursive lenses, types are computed when the
63 * lens is constructed by one of the LNS_MAKE_* functions; for recursive
64 * lenses, we never compute an explicit ctype (since regular approximations
65 * of it are pretty much useless), we do however compute regular
66 * approximations of the ktype, vtype, and atype in LNS_CHECK_REC. That
67 * means that recursive lenses accept context free languages in the string
68 * -> tree direction, but only regular tree languages in the tree -> string
71 * Any lens that uses a recursive lens somehow is marked as recursive
78 struct regexp *ctype; /* NULL when recursive == 1 */
82 struct jmt *jmt; /* When recursive == 1, might have jmt */
83 unsigned int value : 1;
85 unsigned int recursive : 1;
86 unsigned int consumes_value : 1;
87 /* Whether we are inside a recursive lens or outside */
88 unsigned int rec_internal : 1;
89 unsigned int ctype_nullable : 1;
91 /* Primitive lenses */
92 struct { /* L_DEL uses both */
93 struct regexp *regexp; /* L_STORE, L_KEY */
94 struct string *string; /* L_VALUE, L_LABEL, L_SEQ, L_COUNTER */
97 struct lens *child; /* L_SUBTREE, L_STAR, L_MAYBE, L_SQUARE */
98 struct { /* L_UNION, L_CONCAT */
99 unsigned int nchildren;
100 struct lens **children;
103 struct lens *body; /* L_REC */
104 /* We represent a recursive lens as two instances of struct
105 * lens with L_REC. One has rec_internal set to 1, the other
106 * has it set to 0. The one with rec_internal is used within
107 * the body, the other is what is used from the 'outside'. This
108 * is necessary to break the cycles inherent in recursive
109 * lenses with reference counting. The link through alias is
110 * set up in lns_check_rec, and not reference counted.
112 * Generally, any lens used in the body of a recursive lens is
113 * marked with rec_internal == 1; lenses that use the recursive
114 * lens 'from the outside' are marked with rec_internal ==
115 * 0. In the latter case, we can assign types right away,
116 * except for the ctype, which we never have for any recursive
124 /* Constructors for various lens types. Constructor assumes ownership of
125 * arguments without incrementing. Caller owns returned lenses.
127 * The return type is VALUE instead of LENS so that we can return an
128 * exception iftypechecking fails.
130 struct value *lns_make_prim(enum lens_tag tag, struct info *info,
131 struct regexp *regexp, struct string *string);
132 struct value *lns_make_union(struct info *, struct lens *, struct lens *,
134 struct value *lns_make_concat(struct info *, struct lens *, struct lens *,
136 struct value *lns_make_subtree(struct info *, struct lens *);
137 struct value *lns_make_star(struct info *, struct lens *,
139 struct value *lns_make_plus(struct info *, struct lens *,
141 struct value *lns_make_maybe(struct info *, struct lens *,
143 struct value *lns_make_square(struct info *, struct lens *, struct lens *,
144 struct lens *lens, int check);
147 /* Pretty-print a lens */
148 char *format_lens(struct lens *l);
150 /* Pretty-print the atype of a lens. Allocates BUF, which must be freed by
152 int lns_format_atype(struct lens *, char **buf);
154 /* Recursive lenses */
155 struct value *lns_make_rec(struct info *info);
156 struct value *lns_check_rec(struct info *info,
157 struct lens *body, struct lens *rec,
160 /* Auxiliary data structures used during get/put/create */
165 char *text; /* L_DEL */
166 struct skel *skels; /* L_CONCAT, L_STAR, L_SQUARE */
168 /* Also tag == L_SUBTREE, with no data in the union */
173 struct lens *last; /* The last lens that matched */
174 struct lens *next; /* The next lens that should match but doesn't */
175 int pos; /* Errors from get/parse */
176 char *path; /* Errors from put, pos will be -1 */
180 struct dict *make_dict(char *key, struct skel *skel, struct dict *subdict);
181 void dict_lookup(const char *key, struct dict *dict,
182 struct skel **skel, struct dict **subdict);
183 int dict_append(struct dict **dict, struct dict *d2);
184 void free_skel(struct skel *skel);
185 void free_dict(struct dict *dict);
186 void free_lns_error(struct lns_error *err);
188 /* Parse text TEXT with LENS. INFO indicates where TEXT was read from.
190 * If ERR is non-NULL, *ERR is set to NULL on success, and to an error
191 * message on failure; the constructed tree is always returned. If ERR is
192 * NULL, return the tree on success, and NULL on failure.
194 * ENABLE_SPAN indicates whether span information should be collected or not
196 struct tree *lns_get(struct info *info, struct lens *lens, const char *text,
197 int enable_span, struct lns_error **err);
198 struct skel *lns_parse(struct lens *lens, const char *text,
199 struct dict **dict, struct lns_error **err);
201 /* Write tree TREE that was initially read from TEXT (but might have been
202 * modified) into file OUT using LENS.
204 * If ERR is non-NULL, *ERR is set to NULL on success, and to an error
205 * message on failure.
207 * INFO indicates where we are writing to, and its flags indicate whether
208 * to update spans or not.
210 void lns_put(struct info *info, FILE *out, struct lens *lens, struct tree *tree,
211 const char *text, int enable_span, struct lns_error **err);
213 /* Free up temporary data structures, most importantly compiled
214 regular expressions */
215 void lens_release(struct lens *lens);
216 void free_lens(struct lens *lens);
219 * Encoding of tree levels into strings
222 /* Special characters used when encoding one level of the tree as a string.
223 * We encode one tree node as KEY . ENC_EQ . VALUE . ENC_SLASH; if KEY or
224 * VALUE are NULL, we use ENC_NULL, which is the empty string. This has the
225 * effect that NULL strings are treated the same as empty strings.
227 * This encoding is used both for actual trees in the put direction, and to
228 * produce regular expressions describing one level in the tree (we
229 * disregard subtrees)
231 * For this to work, neither ENC_EQ nor ENC_SLASH can be allowed in a
232 * VALUE; we do this behind the scenes by rewriting regular expressions for
235 #define ENC_EQ "\003"
236 #define ENC_SLASH "\004"
238 #define ENC_EQ_CH (ENC_EQ[0])
239 #define ENC_SLASH_CH (ENC_SLASH[0])
241 /* The reserved range of characters that we do not allow in user-supplied
242 regular expressions, since we need them for internal bookkeeping.
244 This range must include the ENC_* characters
246 #define RESERVED_FROM "\001"
247 #define RESERVED_TO ENC_SLASH
248 #define RESERVED_FROM_CH (RESERVED_FROM[0])
249 #define RESERVED_TO_CH ENC_SLASH_CH
250 /* The range of reserved chars as it appears in a regex */
251 #define RESERVED_RANGE_RX RESERVED_FROM "-" RESERVED_TO
252 /* The equivalent of "." in a regexp for display */
253 #define RESERVED_DOT_RX "[^" RESERVED_RANGE_RX "\n]"
255 /* The length of the string S encoded */
256 #define ENCLEN(s) ((s) == NULL ? strlen(ENC_NULL) : strlen(s))
257 #define ENCSTR(s) ((s) == NULL ? ENC_NULL : s)
259 /* helper to access first and last child */
260 #define child_first(l) (l)->children[0]
261 #define child_last(l) (l)->children[(l)->nchildren - 1]
263 /* Format an encoded level as
264 * { key1 = value1 } { key2 = value2 } .. { keyN = valueN }
266 char *enc_format(const char *e, size_t len);
267 /* Format an encoded level similar to ENC_FORMAT, but put each tree node
268 * on a new line indented by INDENT spaces. If INDENT is negative, produce the
269 * same output as ENC_FORMAT
270 * { key1 = value1 } { key2 = value2 } .. { keyN = valueN }
272 char *enc_format_indent(const char *e, size_t len, int indent);
275 void dump_lens_tree(struct lens *lens);
276 void dump_lens(FILE *out, struct lens *lens);
284 * indent-tabs-mode: nil