source: Deliverables/D1.2/CompilerProofOutline/outline.tex @ 1726

Last change on this file since 1726 was 1726, checked in by mulligan, 8 years ago

Finished description of LTL to LIN

File size: 11.5 KB
Line 
1\documentclass[a4paper, 10pt]{article}
2
3\usepackage{a4wide}
4\usepackage{amsfonts}
5\usepackage{amsmath}
6\usepackage{amssymb}
7\usepackage[english]{babel}
8\usepackage{color}
9\usepackage{diagrams}
10\usepackage{graphicx}
11\usepackage[colorlinks]{hyperref}
12\usepackage[utf8x]{inputenc}
13\usepackage{listings}
14\usepackage{microtype}
15\usepackage{skull}
16\usepackage{stmaryrd}
17
18\lstdefinelanguage{matita-ocaml} {
19  mathescape=true
20}
21\lstset{
22  language=matita-ocaml,basicstyle=\tt,columns=flexible,breaklines=false,
23  showspaces=false, showstringspaces=false, extendedchars=false,
24  inputencoding=utf8x, tabsize=2
25}
26
27\title{Proof outline for the correctness of the CerCo compiler}
28\date{\today}
29\author{The CerCo team}
30
31\begin{document}
32
33\maketitle
34
35\section{Introduction}
36\label{sect.introduction}
37
38\section{The RTLabs to RTL translation}
39\label{sect.rtlabs.rtl.translation}
40
41% dpm: type system and casting load (???)
42We require a map, $\sigma$, between \texttt{Values} of the front-end memory model to lists of \texttt{BEValues} of the back-end memory model:
43
44\begin{displaymath}
45\mathtt{Value} ::= \bot \mid \mathtt{int(size)} \mid \mathtt{float} \mid \mathtt{null} \mid \mathtt{ptr} \quad\stackrel{\sigma}{\longrightarrow}\quad \mathtt{BEValue} ::= \bot \mid \mathtt{byte} \mid \mathtt{int}_i \mid \mathtt{ptr}_i
46\end{displaymath}
47
48We further require a map, $\sigma$, which maps the front-end \texttt{Memory} and the back-end's notion of \texttt{BEMemory}.
49
50\begin{displaymath}
51\mathtt{Mem}\ \alpha = \mathtt{Block} \rightarrow (\mathbb{Z} \rightarrow \alpha)
52\end{displaymath}
53
54where
55
56\begin{displaymath}
57\mathtt{Block} ::= \mathtt{Region} \cup \mathtt{ID}
58\end{displaymath}
59
60\begin{displaymath}
61\mathtt{BEMem} = \mathtt{Mem} \mathtt{Value}
62\end{displaymath}
63
64\begin{displaymath}
65\mathtt{Address} = \mathtt{BEValue} \times  \mathtt{BEValue} \\
66\end{displaymath}
67
68\begin{displaymath}
69\mathtt{Mem} = \mathtt{Block} \rightarrow (\mathbb{Z} \rightarrow \mathtt{Cont} \mid \mathtt{Value} \times \mathtt{Size} \mid \mathtt{null})
70\end{displaymath}
71
72\begin{center}
73\begin{picture}(2, 2)
74% picture of sigma mapping memory to memory
75TODO
76\end{picture}
77\end{center}
78
79\begin{displaymath}
80\mathtt{load}\ s\ a\ M = \mathtt{Some}\ v \rightarrow \forall i \leq s.\ \mathtt{load}\ s\ (a + i)\ \sigma(M) = \mathtt{Some}\ v_i
81\end{displaymath}
82
83\begin{displaymath}
84\sigma(\mathtt{store}\ v\ M) = \mathtt{store}\ \sigma(v)\ \sigma(M)
85\end{displaymath}
86
87\begin{displaymath}
88\begin{array}{rll}
89\mathtt{State} & ::=  & (\mathtt{State} : \mathtt{Frame}^* \times \mathtt{Frame} \\
90               & \mid & \mathtt{Call} : \mathtt{Frame}^* \times \mathtt{Args} \times \mathtt{Return} \times \mathtt{Fun} \\
91               & \mid & \mathtt{Return} : \mathtt{Frame}^* \times \mathtt{Value} \times \mathtt{Return}) \times \mathtt{Mem}
92\end{array}
93\end{displaymath}
94
95\begin{displaymath}
96\mathtt{State} ::= \mathtt{Frame}^* \times \mathtt{PC} \times \mathtt{SP} \times \mathtt{ISP} \times \mathtt{CARRY} \times \mathtt{REGS}
97\end{displaymath}
98
99\begin{displaymath}
100\mathtt{State} \stackrel{\sigma}{\longrightarrow} \mathtt{State}
101\end{displaymath}
102
103\begin{displaymath}
104\sigma(\mathtt{State} (\mathtt{Frame}^* \times \mathtt{Frame})) \longrightarrow ((\sigma(\mathtt{Frame}^*), \sigma(\mathtt{PC}), \sigma(\mathtt{SP}), 0, 0, \sigma(\mathtt{REGS})), \sigma(\mathtt{Mem}))
105\end{displaymath}
106
107\begin{displaymath}
108\sigma(\mathtt{Return}(-)) \longrightarrow \sigma \circ \text{return one step}
109\end{displaymath}
110
111\begin{displaymath}
112\sigma(\mathtt{Call}(-)) \longrightarrow \sigma \circ \text{call one step}
113\end{displaymath}
114
115Return one step commuting diagram:
116
117\begin{displaymath}
118\begin{diagram}
119s & \rTo^{\text{one step of execution}} & s'   \\
120  & \rdTo                             & \dTo \\
121  &                                   & \llbracket s'' \rrbracket
122\end{diagram}
123\end{displaymath}
124
125Call one step commuting diagram:
126
127\begin{displaymath}
128\begin{diagram}
129s & \rTo^{\text{one step of execution}} & s'   \\
130  & \rdTo                             & \dTo \\
131  &                                   & \llbracket s'' \rrbracket
132\end{diagram}
133\end{displaymath}
134
135\begin{displaymath}
136\begin{diagram}
137\mathtt{CALL}(\mathtt{id},\ \mathtt{args},\ \mathtt{dst},\ \mathtt{pc}),\ \mathtt{State}(\mathtt{Frame},\ \mathtt{Frames}) & \rTo & \mathtt{Call}(FINISH ME)
138\end{diagram}
139\end{displaymath}
140
141\begin{displaymath}
142\begin{array}{rcl}
143\mathtt{CALL}(\mathtt{id}, \mathtt{args}, \mathtt{dst}, \mathtt{pc}) & \longrightarrow & \mathtt{CALL\_ID}(\mathtt{id}, \sigma'(\mathtt{args}), \sigma(\mathtt{dst}), \mathtt{pc}) \\
144\mathtt{RETURN}                                                      & \longrightarrow & \mathtt{RETURN} \\
145\end{array} 
146\end{displaymath}
147
148\begin{displaymath}
149\begin{array}{rcl}
150\sigma & : & \mathtt{register} \rightarrow \mathtt{list\ register} \\
151\sigma' & : & \mathtt{list\ register} \rightarrow \mathtt{list\ register}
152\end{array} 
153\end{displaymath}
154
155\section{The RTL to ERTL translation}
156\label{sect.rtl.ertl.translation}
157
158\begin{displaymath}
159\begin{diagram}
160& & \llbracket \mathtt{CALL\_ID}(\mathtt{id}, \mathtt{args}, \mathtt{dst}, \mathtt{pc})\rrbracket & & \\
161& \ldTo^{\text{external}} & & \rdTo^{\text{internal}} & \\
162\skull & & & & \mathtt{regs} = [\mathtt{params}/-] \\
163& & & & \mathtt{sp} = \mathtt{ALLOC} \\
164& & & & \mathtt{PUSH}(\mathtt{carry}, \mathtt{regs}, \mathtt{dst}, \mathtt{return\_addr}), \mathtt{pc}_{0}, \mathtt{regs}, \mathtt{sp} \\
165\end{diagram}
166\end{displaymath}
167
168\begin{align*}
169\llbracket \mathtt{RETURN} \rrbracket \\
170\mathtt{return\_addr} & := \mathtt{top}(\mathtt{stack}) \\
171v*                    & := m(\mathtt{rv\_regs}) \\
172\mathtt{dst}, \mathtt{sp}, \mathtt{carry}, \mathtt{regs} & := \mathtt{pop} \\
173\mathtt{regs}[v* / \mathtt{dst}] \\
174\end{align*}
175
176\begin{displaymath}
177\begin{diagram}
178s    & \rTo^1 & s' & \rTo^1 & s'' \\
179\dTo &        &    & \rdTo  & \dTo \\
180\llbracket s \rrbracket & \rTo(1,3)^1 & & & \llbracket s'' \rrbracket \\ 
181\mathtt{CALL} \\
182\end{diagram}
183\end{displaymath}
184
185\begin{displaymath}
186\begin{diagram}
187s    & \rTo^1 & s' & \rTo^1 & s'' \\
188\dTo &        &    & \rdTo  & \dTo \\
189\  & \rTo(1,3) & & & \ \\
190\mathtt{RETURN} \\
191\end{diagram}
192\end{displaymath}
193
194\begin{displaymath}
195\mathtt{b\_graph\_translate}: (\mathtt{label} \rightarrow \mathtt{blist'})
196\rightarrow \mathtt{graph} \rightarrow \mathtt{graph}
197\end{displaymath}
198
199\begin{align*}
200\mathtt{theorem} &\ \mathtt{b\_graph\_translate\_ok}: \\
201& \forall  f.\forall G_{i}.\mathtt{let}\ G_{\sigma} := \mathtt{b\_graph\_translate}\ f\ G_{i}\ \mathtt{in} \\
202&       \forall l \in G_{i}.\mathtt{subgraph}\ (f\ l)\ l\ (\mathtt{next}\ l\ G_{i})\ G_{\sigma}
203\end{align*}
204
205\begin{align*}
206\mathtt{lemma} &\ \mathtt{execute\_1\_step\_ok}: \\
207&       \forall s.  \mathtt{let}\ s' := s\ \sigma\ \mathtt{in} \\
208&       \mathtt{let}\ l := pc\ s\ \mathtt{in} \\
209&       s \stackrel{1}{\rightarrow} s^{*} \Rightarrow \exists n. s' \stackrel{n}{\rightarrow} s'^{*} \wedge s'^{*} = s'\ \sigma
210\end{align*}
211
212\section{The ERTL to LTL translation}
213\label{sect.ertl.ltl.translation}
214
215\section{The LTL to LIN translation}
216\label{sect.ltl.lin.translation}
217
218We require a map, $\sigma$, from LTL statuses, where program counters are represented as labels in a graph data structure, to LIN statuses, where program counters are natural numbers:
219\begin{displaymath}
220\mathtt{pc : label} \stackrel{\sigma}{\longrightarrow} \mathbb{N}
221\end{displaymath}
222
223The LTL to LIN translation pass also linearises the graph data structure into a list of instructions.
224Pseudocode for the linearisation process is as follows:
225
226\begin{lstlisting}
227let rec linearise graph visited required generated todo :=
228  match todo with
229  | l::todo ->
230    if l $\in$ visited then
231      let generated := generated $\cup\ \{$ Goto l $\}$ in
232      let required := required $\cup$ l in
233        linearise graph visited required generated todo
234    else
235      -- Get the instruction at label `l' in the graph
236      let lookup := graph(l) in
237      let generated := generated $\cup\ \{$ lookup $\}$ in
238      -- Find the successor of the instruction at label `l' in the graph
239      let successor := succ(l, graph) in
240      let todo := successor::todo in
241        linearise graph visited required generated todo
242  | []      -> (required, generated)
243\end{lstlisting}
244
245It is easy to see that this linearisation process eventually terminates.
246In particular, the size of the visited label set is monotonically increasing, and is bounded above by the size of the graph that we are linearising.
247
248The initial call to \texttt{linearise} sees the \texttt{visited}, \texttt{required} and \texttt{generated} sets set to the empty set, and \texttt{todo} initialized with the singleton list consisting of the entry point of the graph.
249We envisage needing to prove the following invariants on the linearisation function above:
250
251\begin{enumerate}
252\item
253$\mathtt{visited} \approx \mathtt{generated}$, where $\approx$ is \emph{multiset} equality, as \texttt{generated} is a set of instructions where instructions may mention labels multiple times, and \texttt{visited} is a set of labels,
254\item
255$\forall \mathtt{l} \in \mathtt{generated}.\ \mathtt{succ(l,\ graph)} \subseteq \mathtt{required} \cup \mathtt{todo}$,
256\item
257$\mathtt{required} \subseteq \mathtt{visited}$,
258\item
259$\mathtt{visited} \cap \mathtt{todo} = \emptyset$.
260\end{enumerate}
261
262The invariants collectively imply the following properties, crucial to correctness, about the linearisation process:
263
264\begin{enumerate}
265\item
266Every graph node is visited at most once,
267\item
268Every instruction that is generated is generated due to some graph node being visited,
269\item
270The successor instruction of every instruction that has been visited already will eventually be visited too.
271\end{enumerate}
272
273Note, because the LTL to LIN transformation is the first time the program is linearised, we must discover a notion of `well formed program' suitable for linearised forms.
274In particular, we see the notion of well formedness (yet to be formally defined) resting on the following conditions:
275
276\begin{enumerate}
277\item
278For every jump to a label in a linearised program, the target label exists at some point in the program,
279\item
280Each label is unique, appearing only once in the program,
281\item
282The final instruction of a program must be a return.
283\end{enumerate}
284
285We assume that these properties will be easy consequences of the invariants on the linearisation function defined above.
286
287The final condition above is potentially a little opaque, so we explain further.
288First, the only instructions that can reasonably appear in final position at the end of a program are returns or backward jumps, as any other instruction would cause execution to `fall out' of the end of the program (for example, when a function invoked with \texttt{CALL} returns, it returns to the next instruction past the \texttt{CALL} that invoked it).
289However, in LIN, though each function's graph has been linearised, the entire program is not yet fully linearised into a list of instructions, but rather, a list of `functions', each consisting of a linearised body along with other data.
290Each well-formed function must end with a call to \texttt{RET}, and therefore the only correct instruction that can terminate a LIN program is a \texttt{RET} instruction.
291
292\section{The LIN to ASM and ASM to MCS-51 machine code translations}
293\label{sect.lin.asm.translation}
294
295The LIN to ASM translation step is trivial, being almost the identity function.
296The only non-trivial feature of the LIN to ASM translation is that all labels are `named apart' so that there is no chance of freshly generated labels from different namespaces clashing with labels from another namespace.
297
298The ASM to MCS-51 machine code translation step, and the required statements of correctness, are found in an unpublished manuscript attached to this document.
299
300\end{document}
Note: See TracBrowser for help on using the repository browser.