doc/lenses.tex

   1 \documentclass[12pt,fleqn]{amsart}
   2
   3 \usepackage{amsmath}
   4 \usepackage{xspace}
   5 \usepackage{amssymb}
   6 \usepackage{bcprules}
   7
   8 \newcommand{\ensmath}[1]{\ensuremath{#1}\xspace}
   9
  10 \newcommand{\opnam}[1]{\ensmath{\operatorname{\mathit{#1}}}}
  11 \newcommand{\nget}{\opnam{get}}
  12 \newcommand{\nput}{\opnam{put}}
  13 \newcommand{\nparse}{\opnam{parse}}
  14 \newcommand{\ncreate}{\opnam{create}}
  15 \newcommand{\nkey}{\opnam{key}}
  16 \newcommand{\lget}[1]{\opnam{get}{#1}}
  17 \newcommand{\lparse}[1]{\nparse{#1}}
  18 \newcommand{\lput}[2]{\opnam{put}{#1}\,{(#2)}}
  19 \newcommand{\lcreate}[2]{\opnam{create}{#1}\,{(#2)}}
  20 \newcommand{\lkey}[1]{\nkey{#1}}
  21
  22 \newcommand{\suff}{\ensmath{\operatorname{suff}}}
  23 \newcommand{\pref}{\ensmath{\operatorname{pref}}}
  24 \newcommand{\lenstype}[3][K]{\ensmath{{#2}\stackrel{#1}{\Longleftrightarrow}{#3}}}
  25 \newcommand{\tree}[1]{\ensmath{[#1]}}
  26 \newcommand{\niltree}{\ensmath{[]}}
  27 \newcommand{\nildict}{\ensmath{\{\!\}}}
  28 \newcommand{\Regexp}{\ensmath{\mathcal R}}
  29 \newcommand{\reglang}[1]{\ensmath{[\![{#1}]\!]}}
  30 \newcommand{\lens}[1]{\opnam{#1}}
  31 \newcommand{\eps}{\ensmath{\epsilon}}
  32 \newcommand{\conc}[2]{\ensmath{#1\cdot #2}}
  33 \newcommand{\uaconc}[2]{\ensmath{#1\cdot^{!} #2}}
  34 \newcommand{\xconc}[2]{\ensmath{#1\odot #2}}
  35 \newcommand{\dconc}[2]{\ensmath{#1\oplus #2}}
  36 \newcommand{\alt}[2]{\ensmath{#1\,|\,#2}}
  37 \newcommand{\cstar}[1]{\ensmath{#1^*}}
  38 \newcommand{\uastar}[1]{\ensmath{#1^{!*}}}
  39 \newcommand{\Trees}{\ensmath{\mathcal T}}
  40 \newcommand{\Words}{\ensmath{\Sigma^*_{\rhd}}}
  41 \newcommand{\tmap}[2]{\ensmath{#1\mapsto #2}}
  42 \newcommand{\tmapv}[3]{\ensmath{#1 = #2\mapsto #3}}
  43 \newcommand{\tmaptt}[2]{\ensmath{{\mathtt #1}\mapsto {\mathtt #2}}}
  44 \newcommand{\dom}[1]{\ensmath{\mathrm{dom}(#1)}}
  45 \newcommand{\List}[1]{\ensmath{\mathtt{List(#1)}}}
  46 \newcommand{\redigits}{\ensmath{\mathtt{D}}}
  47 \newcommand{\Skel}{\ensmath{\mathcal{S}}}
  48 \newcommand{\Powerset}{\ensmath{\mathcal{P}}}
  49 \newcommand{\Keys}{\ensmath{\mathcal{K}}}
  50 \newcommand{\key}[1]{\ensmath{\kappa(#1)}}
  51 \newcommand{\lto}{\ensmath{\longrightarrow}}
  52 \newcommand{\Dictlangs}{\ensmath{\List{\Powerset(\Skel)}}}
  53 \newcommand{\lookup}{\opnam{lookup}}
  54
  55 \newtheorem{theorem}{Theorem}
  56 \newtheorem{lemma}[theorem]{Lemma}
  57 {\theoremstyle{definition}
  58   \newtheorem{defn}{Definition}
  59   \newtheorem*{remark}{Remark}
  60   \newtheorem*{example}{Example}}
  61
  62 \begin{document}
  63
  64 \section{Trees}
  65 We use trees as the abstract view for our lenses; since the concrete data
  66 structure, strings, is ordered, and to support some properties of lenses
  67 that seem sensible intuitively, the trees differ from garden-variety trees
  68 in a number of ways:
  69 \begin{itemize}
  70   \item a tree node consists of three pieces of data: a \emph{label}, a
  71     \emph{value} and an ordered list of \emph{children}, each of them a
  72     tree by themselves.
  73   \item the labels for tree nodes are either words not containing a slash
  74     {\tt /} or the special symbol $\rhd$; in the implementation $\rhd$
  75     corresponds to {\tt NULL}. The latter is used to indicate
  76     that an entry in the tree corresponds to text that was deleted.
  77   \item the children of a tree node form a list of subtrees, i.e. are
  78     ordered. In addition, several subtrees in such a list may use the same
  79     label. This makes it possible to accommodate concrete files where
  80     entries that are logically connected are stored scattered between
  81     unrelated entries like the {\tt AcceptEnv} entries in {\tt
  82       sshd\_config}.
  83 \end{itemize}
  84
  85 We write $\tmapv{k}{v}{t}$ for a tree node with label $k$, value $v$ and
  86 children $t$. If it is clear from the context, or unimportant, $v$ will
  87 often be omitted.
  88
  89 \subsection{Tree labels}
  90 We take the tree labels from the set of \emph{path components} $\Keys =
  91 (\Sigma \setminus \{ {\mathtt /} \})^+ \cup \{ \rhd \}$, that is,
  92 a tree label is any word not containing a backslash or the special symbol
  93 $\rhd$. For tree labels, we define a partial concatenation operator
  94 $\odot$, as
  95 \begin{equation*}
  96   \xconc{k_1}{k_2} = \begin{cases}
  97     k_1 & \text{if } k_2 = \rhd\\
  98     k_2 & \text{if } k_1 = \rhd\\
  99     \text{undefined} & \text{otherwise}
 100   \end{cases}
 101 \end{equation*}
 102
 103 Defining tree labels in this way (1) guarantees that there is a
 104 one--to--one correspondence between a tree label and the word it came from
 105 in the concrete text and (2) avoids any pain in splitting tree labels in
 106 the \nput direction.
 107
 108 \subsection{Trees}
 109 The set of \emph{ordered trees} \Trees over $\Words$ is recursively
 110 defined as
 111 \begin{itemize}
 112 \item The empty tree $\rhd$
 113 \item For any words $k\in\Keys$, $v\in\Words$ and any tree $t\in\Trees$,
 114   $\tree{\tmapv{k}{v}{t}}$ is in \Trees
 115 \item For any $n$ and trees $\tree{\tmapv{k_i}{v_i}{t_i}} \in Trees$,
 116   the list
 117   $[\tmapv{k_1}{v_1}{t_1};\tmapv{k_2}{v_2}{t_2};\ldots;\tmapv{k_n}{v_n}{t_n}]$
 118   is in \Trees
 119 \end{itemize}
 120
 121 Note that this allows the same key to be used multiple times in a tree; for
 122 example, $[\tmaptt{a}{x}; \tmaptt{a}{x}]$ is a valid tree and
 123 different from $[\tmaptt{a}{x}]$.
 124
 125 The domain of a tree $\dom{t}$ is the list of all its labels, i.e. an
 126 element of $\List{\Keys}$; for a tree $t =
 127 \tree{\tmap{k_1}{t_1};\ldots;\tmap{k_n}{t_n}}$, $\dom{t} =
 128      [k_1;\ldots;k_n].$
 129
 130 %% Why ?
 131 %A tree $t$ can also be viewed as a total map from \Words to
 132 %$\List{\Trees\cup\Words}$, defined as
 133 %\begin{equation*}
 134 %  t(k) =
 135 %  \begin{cases}
 136 %    v & \text{for } t = \tree{v}\\
 137 %    [t_1] :: t_2(k) & \text{for } t = t' :: t_2
 138 %                      \text{ and } t' = \tree{\tmap{k}{t_1}}\\
 139 %    t_2(k) & \text{for } t = t' :: t_2
 140 %             \text{ and } t' = \tree{\tmap{k'}{t_1}} \text{with } k\neq k'
 141 %  \end{cases}
 142 %\end{equation*}
 143 %
 144 %Note that this definition implies that for a tree $t = \tree{\tmap{k}{v}}$
 145 %with $k,v\in\Words$, $t(k)=[v].$ For the tree $t_{\mathtt{aa}} = [\tmaptt{a}{x};
 146 %  \tmaptt{a}{x}]$, $t_{\mathtt{aa}}(\mathtt a) = [\mathtt x; \mathtt x]$.
 147
 148 The concatenation of trees $\conc{t_1}{t_2}$ is simply list concatenation.
 149
 150 For sets $K\subset\Keys$ and $T\subset\Trees$, $\tree{\tmap{K}{T}}$
 151 denotes the set of all trees $t = \tree{\tmap{k}{t'}}$ with $k\in K$, $t'
 152 \in T$.
 153
 154 \subsection{Concatenation and iteration}
 155 For a tree $t\in \Trees$, we define its underlying \emph{key language}
 156 $\key{t}$ by
 157 \begin{equation*}
 158   \key{t} = \begin{cases}
 159     {\tt /} & \text{if } t = \rhd \text{ or } t = \tree{\tmap{\rhd}{t_1}}\\
 160     k \cdot {\mathtt /} & \text{if } t = \tree{\tmap{k}{t_1}}\\
 161     k\cdot {\mathtt /}\cdot \key{t_2} & \text{for } t = \tree{\tmap{k}{t_1};t_2}
 162   \end{cases}
 163 \end{equation*}
 164 where $\conc{k_1}{k_2}$ is normal string concatenation. The key language of
 165 a set of trees $\key{T}$ is defined as $\{\key{t} | t \in T\}$.
 166
 167 In analogy to languages, we call two tree sets $T_1, T_2 \subseteq \Trees$
 168 \emph{unambiguously concatenable} if the key languages $\key{T_1}$ and
 169 $\key{T_2}$ are unambiguously concatenable. A tree set $T\in\Trees$ is
 170 \emph{unambiguously iterable} if the underlying key language $\key{T}$ is
 171 unambiguously iterable.
 172
 173 \subsection{Public tree operations}
 174 We need the public API to support the following operations. The set
 175 $P\subseteq \Sigma^*$ are paths
 176
 177 \begin{itemize}
 178   \item $\opnam{lookup}(p, t): P \times \Trees \lto \Trees$ finds the tree
 179     with path $p$
 180   \item $\opnam{assign}(p, v, t): P \times \Sigma^* \times \Trees \lto
 181     \Trees$ assigns the value $v$ to the tree node $p$
 182   \item $\opnam{remove}(p, t): P \times \Trees \lto \Trees$ removes the
 183     subtree denoted by $p$
 184   \item $\opnam{get}(p, t): P \times \Trees \lto \Sigma^*$ looks up the
 185     value associated with $p$
 186     \item $\opnam{ls}(p, t): P \times \Trees \lto \List{\Trees}$ lists all
 187       the subtrees underneath $p$
 188 \end{itemize}
 189
 190 \section{Lenses}
 191
 192 Lenses map between strings in the regular language $C$ and trees
 193 $T\subseteq\Trees$. They can also produce keys from a regular language $K$;
 194 these keys are used by the $\lens{subtree}$ lens to construct new
 195 trees.
 196
 197 A lens $l$ consists of the functions $\nget$, $\nput$, $\ncreate$, and
 198 $\nparse$.
 199
 200 Lenses here are written as $l:\lenstype[K,S,L]{C}{T}$ where $K$ and $C$ are
 201 regular languages and $T\subseteq\Trees$.  The skeletons $S\subseteq\Skel$
 202 and dictionary type specifications $L$ are as for Boomerang (really ??)
 203 Intuitively, the notation says that $l$ is a lens that takes strings from
 204 $C$ and transforms them to trees in $T$. Generally,
 205
 206 \infrule{C\subseteq\Words \andalso K\subseteq\List{\Keys}
 207            \andalso T\subseteq\Trees
 208            \andalso S\subseteq\Skel \andalso L \in \Dictlangs}
 209         {\lens{l} \in \lenstype[K,S,L]{C}{T}}
 210
 211 \begin{align*}
 212   \nget &\in C \lto T\\
 213   \nparse &\in C \lto K \times S \times D(L)\\
 214   \nput &\in T \lto K \times S \times D(L) \lto C \times D(L)\\
 215   \ncreate &\in T \lto K \times D(L) \lto C \times D(L)
 216 \end{align*}
 217
 218 \subsection{const}
 219
 220 The $\lens{const} E\,t\,v$ maps words matching $E$ in the \nget direction
 221 to a fixed tree $t$ and maps that fixed tree $t$ back in the \nput
 222 direction. When text needs to be created from $t$, it produces the default
 223 word $v$.
 224
 225 \infrule{E\in\Regexp \andalso t\in T\andalso u\in\reglang{E} \andalso L \in \Dictlangs}
 226         {\lens{const} E\,t\,u \in \lenstype[\rhd,\reglang{E},L]{\reglang{E}}{\{t\}}}
 227
 228 \begin{align*}
 229   \lget{c} &= t\\
 230   \lparse{c} &= \rhd, c, \nildict\\
 231   \lput{t}{k,s,d} &= s, d\\
 232   \lcreate{t}{k,d} &= u, d
 233 \end{align*}
 234
 235 The \lens{del} lens is syntactic sugar: $\lens{del} E\,u = \lens{const}
 236 E\,\niltree\,u$.
 237
 238 \subsection{copy}
 239
 240 Copies a word into a leaf.
 241
 242 \infrule{E\in\Regexp \andalso L\in\Dictlangs}
 243         {\lens{copy}\in\lenstype[\rhd,\reglang{E},L]{\reglang{E}}{\tree{\reglang{E}}}}
 244
 245 \begin{align*}
 246   \lget{c} &= \tree{c}\\
 247   \lparse{c} &= \rhd, c, \nildict\\
 248   \lput{\tree{v}}{k,s,d} &= v, d\\
 249   \lcreate{\tree{v}}{k,d} &= v, d
 250 \end{align*}
 251
 252 \subsection{seq}
 253
 254 Gets the next value from a sequence as the key. We assume there's a
 255 generator $\mathtt{nextval}: \Sigma^* \to \mathbb{N}$ that returns
 256 successive numbers on each invocation. \redigits is the regular expression
 257 {\tt [0-9]+} that matches positive numbers.
 258
 259 \infrule{w\in\Sigma^* \andalso L\in\Dictlangs \andalso n = \mathtt{nextval}(w)}
 260         {\lens{seq} w\in\lenstype[\reglang{\redigits},\eps,L]{\eps}{\niltree}}
 261
 262 \begin{align*}
 263   \lget{\eps} &= \niltree\\
 264   \lparse{\eps} &= n, \eps, \nildict\\
 265   \lput{\niltree}{k,\eps,d} &= \eps, d\\
 266   \lcreate{\niltree}{k,d} &= \eps,d
 267 \end{align*}
 268
 269 \subsection{label}
 270
 271 Uses a fixed tree label
 272
 273 \infrule{w\in\Sigma^* \andalso L\in\Dictlangs}
 274         {\lens{label} w\in\lenstype[w,\eps,L]{\eps}{\niltree}}
 275
 276 \begin{align*}
 277   \lget{\eps} &= \niltree\\
 278   \lparse{\eps} &= w,\eps,\nildict\\
 279   \lput{\niltree}{w,\eps,d} &= \eps,d\\
 280   \lcreate{\niltree}{k,d} &= \eps,d
 281 \end{align*}
 282
 283 \subsection{key}
 284
 285 Uses a parsed tree label
 286
 287 \infrule{E\in\Regexp \andalso L\in\Dictlangs}
 288         {\lens{key} E\in\lenstype[\reglang{E},\eps,L]{\reglang{E}}{\niltree}}
 289
 290 \begin{align*}
 291   \lget{c} &= \niltree\\
 292   \lparse{c} &= c,\eps,\nildict\\
 293   \lput{\niltree}{c,\eps,d} &= c,d\\
 294   \lcreate{\niltree}{c, d} &= c,d
 295 \end{align*}
 296
 297 \subsection{subtree}
 298
 299 The subtree combinator $[l]$ constructs a subtree from $l$
 300
 301 \infrule{l\in\lenstype[K,S,L]{C}{T}}
 302         {[l]\in\lenstype[\rhd,\square,S::L]{C}{\tree{\tmap{K}{T}}}}
 303
 304 \begin{align*}
 305   \lget{c} &= \tree{\tmap{l.\lkey{c}}{l.\lget{c}}}\\
 306   \lparse{c} &= \rhd, \square, \{\tmap{l.\lkey{c}}{[l.\lparse{c}]}\}\\
 307   \lput{\tree{\tmap{k}{t}}}{k',\square,d} &=
 308   \begin{cases}
 309     \pi_1\left(l.\lput{t}{k,\bar{s},\bar{d}}\right),d' & \text{if } (\bar{k}, \bar{s}, \bar{d}), d' = \lookup(k, d)\\
 310     \pi_1\left(l.\lcreate{t}{k,\nildict}\right), d & \text{if } \lookup(k,
 311     d) \text{ undefined}
 312   \end{cases}\\
 313   \lcreate{\tree{\tmap{k}{t}}}{k',d} &= \lput{\tree{\tmap{k}{t}}}{k',\square,d}
 314 \end{align*}
 315
 316 We store a triple $(k,s,d)$ in dictionaries, but we don't use the stored
 317 key $k$.
 318
 319 \subsection{concat}
 320
 321 The concat combinator $\conc{l_1}{l_2}$ joins two trees.
 322
 323 \infrule{l_1\in\lenstype[K_1,S_1,L]{C_1}{T_1}
 324          \andalso l_2\in\lenstype[K_2,S_2,L]{C_2}{T_2}
 325          \andalso \uaconc{C_1}{C_2}
 326          \andalso \uaconc{\key{T_1}}{\key{T_2}}}
 327         {\conc{l_1}{l_2}\in\lenstype[\conc{K_1}{K_2},S_1\times S_2, L]{\conc{C_1}{C_2}}{\conc{T_1}{T_2}}}
 328
 329 \begin{align*}
 330   \lget{(\conc{c_1}{c_2})} &= \conc{(l_1.\lget{c_1})}{(l_2.\lget{c_2})}\\
 331   \lparse{\conc{c_1}{c_2}} &= \xconc{k_1}{k_2}, (s_1,s_2), \dconc{d_1}{d_2} \\
 332   \lput{\conc{t_1}{t_2}}{k, (s_1,s_2), d_1} &= \conc{c_1}{c_2}, d_3 \\
 333   & \text{where } c_i, d_{i+1} = l_i.\lput{t_i}{k, s_i, d_i} \\
 334   \lcreate{\conc{t_1}{t_2}}{k, d_1} &= \conc{c_1}{c_2}, d_3\\
 335     & \text{where } c_i, d_{i+1} = l_i.\lcreate{t_i}{k, d_i}
 336 \end{align*}
 337
 338 \subsection{union}
 339
 340 The union combinator $\alt{l_1}{l_2}$ chooses.
 341
 342 \infrule{l_i\in\lenstype[K_i,S_i,L]{C_i}{T_i}\text{ for } i=1,2
 343   \andalso C_1 \cap C_2 = \emptyset
 344   \andalso S_1 \cap S_2 = \emptyset
 345   \andalso \key{T_1} \cap \key{T_2} = \emptyset}
 346   {\alt{l_1}{l_2}\in\lenstype[K_1\cup K_2, S_1\cup S_2, L]{C_1 \cup C_2}{T_1 \cup T_2}}
 347
 348 \begin{align*}
 349   \lget{c} &=
 350      \begin{cases}
 351        l_1.\lget{c} & \text{if } c \in C_1\\
 352        l_2.\lget{c} & \text{if } c \in C_2\\
 353      \end{cases}
 354   \\
 355   \lparse{c} &=
 356      \begin{cases}
 357        l_1.\lparse{c} & \text{if } c \in C_1\\
 358        l_2.\lparse{c} & \text{if } c \in C_2\\
 359      \end{cases}
 360   \\
 361   \lput{t}{k, s, d} &=
 362      \begin{cases}
 363        l_1.\lput{t}{k, s, d} & \text{if } t, s \in T_1 \times S_1 \\
 364        l_2.\lput{t}{k, s, d} & \text{if } t, s \in T_2 \times S_2 \\
 365        l_1.\lcreate{t}{k, d} & \text{if } t, s \in (T_1\setminus T_2)
 366        \times S_2\\
 367        l_2.\lcreate{t}{k, d} & \text{if } t, s \in (T_2\setminus T_1)
 368        \times S_1
 369      \end{cases}\\
 370   \lcreate{t}{k, d} &=
 371      \begin{cases}
 372        l_1.\lcreate{t}{k, d} & \text{if } t\in T_1\\
 373        l_2.\lcreate{t}{k, d} & \text{if } t\in T_2\setminus T_1
 374      \end{cases}
 375 \end{align*}
 376
 377 \subsection{star}
 378
 379 The star combinator $\cstar{l}$ iterates.
 380
 381 \infrule{l\in\lenstype{C}{T} \andalso \uastar{C} \andalso \uastar{\key{T}}}
 382         {\cstar{l}\in\lenstype[\cstar{K}]{\cstar{C}}{\cstar{T}}}
 383
 384 \begin{align*}
 385   \lget{c_1\cdots c_n} &= (l.\lget{c_1}) \cdots (l.\lget{c_n})\\
 386   \lparse{c_1\cdots c_n} &= k_1\odot\ldots\odot k_n, [s_1;\ldots;s_n],
 387                             d_1\oplus\ldots\oplus d_n\\
 388                             & \text{where } k_i,s_i,d_i = l.\lparse{c_i}\\
 389   \lput{t_1\cdots t_n}{k, [s_1;\cdots;s_m], d_1} &= (c_1 \cdots c_n), d_{n+1}\\
 390   \text{where } &c_i, d_{i+1} =
 391     \begin{cases}
 392       l.\lput{t_i}{k, s_i, d_i} & \text{for } 1 \leq i \leq \min(m,n)\\
 393       l.\lcreate{t_i}{k, d_i} & m+1 \leq i \leq n
 394     \end{cases}\\
 395   \lcreate{t_1 \cdots t_n}{k, d_1} &= (c_1\cdots c_n), d_{n+1}\\
 396   & \text{where } c_i, d_{i+1} = l.\lcreate{t_i}{k, d_i}
 397 \end{align*}
 398
 399 Want reordering and insertion in the middle to be reflected. If
 400 $\lget{\conc{c_1}{c_2}} = \conc{t_1}{t_2}$, want
 401 $\lput{(\conc{t_2}{t_1})}{\conc{c_1}{c_2}} =
 402 \conc{l.\lput{t_2}{c_2}}{l.\lput{t_1}{c_1}}$ This can only happen if the
 403 information to be reordered is in subtrees. In particular, comment lines
 404 need to become their own subtree, with some support from the language to
 405 create `hidden' entries. Simplest: allow {\tt NULL} as the key for a
 406 subtree and ignore such tree entries in the public API.
 407
 408 Need to split a tree $t\in T$ into subtrees according to ?? Keeping a fake
 409 `slot' $\rhd$ around for text that didn't produce a tree should help with
 410 that.
 411
 412 For $K^*$ to make any sense, must have $\rhd\in K$ and the application of
 413 $(l^*).\nparse$ must return $\rhd$ for all except at most one application.
 414
 415 \section{Regular expressions and languages}
 416
 417 For type checking, we need to compute the following properties of regular
 418 languages $R, R_1, R_2$
 419 \begin{itemize}
 420 \item decide unambiguous concatenation $\uaconc{R_1}{R_2}$ and compute
 421   $\conc{R_1}{R_2}$
 422 \item decide unambiguous iteration $\uastar{R}$ and compute $R^*$
 423 \item disjointness $R_1 \cap R_2 = \emptyset$ (we don't need general
 424   intersection, though I don't know of a quicker way to decide
 425   disjointness)
 426 \item compute the regular language $R = \reglang{E}$ for a regular
 427   expression $E\in\Regexp$
 428 \end{itemize}
 429 \end{document}
 430
 431 %% TeX-parse-self: t
 432 %% TeX-auto-save: t
 433
 434 %% Local Variables:
 435 %% TeX-master: "lenses.tex"
 436 %% compile-command: "pdflatex -file-line-error -halt-on-error lenses.tex"
 437 %% End: