source: Deliverables/D1.2/CompilerProofOutline/outline.tex @ 1727

Last change on this file since 1727 was 1727, checked in by boender, 8 years ago
  • added last RTL - ERTL diagrams
File size: 13.0 KB
Line 
1\documentclass[a4paper, 10pt]{article}
2
3\usepackage{a4wide}
4\usepackage{amsfonts}
5\usepackage{amsmath}
6\usepackage{amssymb}
7\usepackage[english]{babel}
8\usepackage{color}
9\usepackage{diagrams}
10\usepackage{graphicx}
11\usepackage[colorlinks]{hyperref}
12\usepackage[utf8x]{inputenc}
13\usepackage{listings}
14\usepackage{microtype}
15\usepackage{skull}
16\usepackage{stmaryrd}
17
18\lstdefinelanguage{matita-ocaml} {
19  mathescape=true
20}
21\lstset{
22  language=matita-ocaml,basicstyle=\tt,columns=flexible,breaklines=false,
23  showspaces=false, showstringspaces=false, extendedchars=false,
24  inputencoding=utf8x, tabsize=2
25}
26
27\title{Proof outline for the correctness of the CerCo compiler}
28\date{\today}
29\author{The CerCo team}
30
31\begin{document}
32
33\maketitle
34
35\section{Introduction}
36\label{sect.introduction}
37
38\section{The RTLabs to RTL translation}
39\label{sect.rtlabs.rtl.translation}
40
41% dpm: type system and casting load (???)
42We require a map, $\sigma$, between \texttt{Values} of the front-end memory model to lists of \texttt{BEValues} of the back-end memory model:
43
44\begin{displaymath}
45\mathtt{Value} ::= \bot \mid \mathtt{int(size)} \mid \mathtt{float} \mid \mathtt{null} \mid \mathtt{ptr} \quad\stackrel{\sigma}{\longrightarrow}\quad \mathtt{BEValue} ::= \bot \mid \mathtt{byte} \mid \mathtt{int}_i \mid \mathtt{ptr}_i
46\end{displaymath}
47
48We further require a map, $\sigma$, which maps the front-end \texttt{Memory} and the back-end's notion of \texttt{BEMemory}.
49
50\begin{displaymath}
51\mathtt{Mem}\ \alpha = \mathtt{Block} \rightarrow (\mathbb{Z} \rightarrow \alpha)
52\end{displaymath}
53
54where
55
56\begin{displaymath}
57\mathtt{Block} ::= \mathtt{Region} \cup \mathtt{ID}
58\end{displaymath}
59
60\begin{displaymath}
61\mathtt{BEMem} = \mathtt{Mem} \mathtt{Value}
62\end{displaymath}
63
64\begin{displaymath}
65\mathtt{Address} = \mathtt{BEValue} \times  \mathtt{BEValue} \\
66\end{displaymath}
67
68\begin{displaymath}
69\mathtt{Mem} = \mathtt{Block} \rightarrow (\mathbb{Z} \rightarrow \mathtt{Cont} \mid \mathtt{Value} \times \mathtt{Size} \mid \mathtt{null})
70\end{displaymath}
71
72\begin{center}
73\begin{picture}(2, 2)
74% picture of sigma mapping memory to memory
75TODO
76\end{picture}
77\end{center}
78
79\begin{displaymath}
80\mathtt{load}\ s\ a\ M = \mathtt{Some}\ v \rightarrow \forall i \leq s.\ \mathtt{load}\ s\ (a + i)\ \sigma(M) = \mathtt{Some}\ v_i
81\end{displaymath}
82
83\begin{displaymath}
84\sigma(\mathtt{store}\ v\ M) = \mathtt{store}\ \sigma(v)\ \sigma(M)
85\end{displaymath}
86
87\begin{displaymath}
88\begin{array}{rll}
89\mathtt{State} & ::=  & (\mathtt{State} : \mathtt{Frame}^* \times \mathtt{Frame} \\
90               & \mid & \mathtt{Call} : \mathtt{Frame}^* \times \mathtt{Args} \times \mathtt{Return} \times \mathtt{Fun} \\
91               & \mid & \mathtt{Return} : \mathtt{Frame}^* \times \mathtt{Value} \times \mathtt{Return}) \times \mathtt{Mem}
92\end{array}
93\end{displaymath}
94
95\begin{displaymath}
96\mathtt{State} ::= \mathtt{Frame}^* \times \mathtt{PC} \times \mathtt{SP} \times \mathtt{ISP} \times \mathtt{CARRY} \times \mathtt{REGS}
97\end{displaymath}
98
99\begin{displaymath}
100\mathtt{State} \stackrel{\sigma}{\longrightarrow} \mathtt{State}
101\end{displaymath}
102
103\begin{displaymath}
104\sigma(\mathtt{State} (\mathtt{Frame}^* \times \mathtt{Frame})) \longrightarrow ((\sigma(\mathtt{Frame}^*), \sigma(\mathtt{PC}), \sigma(\mathtt{SP}), 0, 0, \sigma(\mathtt{REGS})), \sigma(\mathtt{Mem}))
105\end{displaymath}
106
107\begin{displaymath}
108\sigma(\mathtt{Return}(-)) \longrightarrow \sigma \circ \text{return one step}
109\end{displaymath}
110
111\begin{displaymath}
112\sigma(\mathtt{Call}(-)) \longrightarrow \sigma \circ \text{call one step}
113\end{displaymath}
114
115Return one step commuting diagram:
116
117\begin{displaymath}
118\begin{diagram}
119s & \rTo^{\text{one step of execution}} & s'   \\
120  & \rdTo                             & \dTo \\
121  &                                   & \llbracket s'' \rrbracket
122\end{diagram}
123\end{displaymath}
124
125Call one step commuting diagram:
126
127\begin{displaymath}
128\begin{diagram}
129s & \rTo^{\text{one step of execution}} & s'   \\
130  & \rdTo                             & \dTo \\
131  &                                   & \llbracket s'' \rrbracket
132\end{diagram}
133\end{displaymath}
134
135\begin{displaymath}
136\begin{diagram}
137\mathtt{CALL}(\mathtt{id},\ \mathtt{args},\ \mathtt{dst},\ \mathtt{pc}),\ \mathtt{State}(\mathtt{Frame},\ \mathtt{Frames}) & \rTo & \mathtt{Call}(FINISH ME)
138\end{diagram}
139\end{displaymath}
140
141\begin{displaymath}
142\begin{array}{rcl}
143\mathtt{CALL}(\mathtt{id}, \mathtt{args}, \mathtt{dst}, \mathtt{pc}) & \longrightarrow & \mathtt{CALL\_ID}(\mathtt{id}, \sigma'(\mathtt{args}), \sigma(\mathtt{dst}), \mathtt{pc}) \\
144\mathtt{RETURN}                                                      & \longrightarrow & \mathtt{RETURN} \\
145\end{array} 
146\end{displaymath}
147
148\begin{displaymath}
149\begin{array}{rcl}
150\sigma & : & \mathtt{register} \rightarrow \mathtt{list\ register} \\
151\sigma' & : & \mathtt{list\ register} \rightarrow \mathtt{list\ register}
152\end{array} 
153\end{displaymath}
154
155\section{The RTL to ERTL translation}
156\label{sect.rtl.ertl.translation}
157
158\begin{displaymath}
159\begin{diagram}
160& & \llbracket \mathtt{CALL\_ID}(\mathtt{id}, \mathtt{args}, \mathtt{dst}, \mathtt{pc})\rrbracket & & \\
161& \ldTo^{\text{external}} & & \rdTo^{\text{internal}} & \\
162\skull & & & & \mathtt{regs} = [\mathtt{params}/-] \\
163& & & & \mathtt{sp} = \mathtt{ALLOC} \\
164& & & & \mathtt{PUSH}(\mathtt{carry}, \mathtt{regs}, \mathtt{dst}, \mathtt{return\_addr}), \mathtt{pc}_{0}, \mathtt{regs}, \mathtt{sp} \\
165\end{diagram}
166\end{displaymath}
167
168\begin{align*}
169\llbracket \mathtt{RETURN} \rrbracket \\
170\mathtt{return\_addr} & := \mathtt{top}(\mathtt{stack}) \\
171v*                    & := m(\mathtt{rv\_regs}) \\
172\mathtt{dst}, \mathtt{sp}, \mathtt{carry}, \mathtt{regs} & := \mathtt{pop} \\
173\mathtt{regs}[v* / \mathtt{dst}] \\
174\end{align*}
175
176\begin{displaymath}
177\begin{diagram}
178s    & \rTo^1 & s' & \rTo^1 & s'' \\
179\dTo &        &    & \rdTo  & \dTo \\
180\llbracket s \rrbracket & \rTo(1,3)^1 & & & \llbracket s'' \rrbracket \\ 
181\mathtt{CALL} \\
182\end{diagram}
183\end{displaymath}
184
185\begin{displaymath}
186\begin{diagram}
187s    & \rTo^1 & s' & \rTo^1 & s'' \\
188\dTo &        &    & \rdTo  & \dTo \\
189\  & \rTo(1,3) & & & \ \\
190\mathtt{RETURN} \\
191\end{diagram}
192\end{displaymath}
193
194\begin{displaymath}
195\mathtt{b\_graph\_translate}: (\mathtt{label} \rightarrow \mathtt{blist'})
196\rightarrow \mathtt{graph} \rightarrow \mathtt{graph}
197\end{displaymath}
198
199\begin{align*}
200\mathtt{theorem} &\ \mathtt{b\_graph\_translate\_ok}: \\
201& \forall  f.\forall G_{i}.\mathtt{let}\ G_{\sigma} := \mathtt{b\_graph\_translate}\ f\ G_{i}\ \mathtt{in} \\
202&       \forall l \in G_{i}.\mathtt{subgraph}\ (f\ l)\ l\ (\mathtt{next}\ l\ G_{i})\ G_{\sigma}
203\end{align*}
204
205\begin{align*}
206\mathtt{lemma} &\ \mathtt{execute\_1\_step\_ok}: \\
207&       \forall s.  \mathtt{let}\ s' := s\ \sigma\ \mathtt{in} \\
208&       \mathtt{let}\ l := pc\ s\ \mathtt{in} \\
209&       s \stackrel{1}{\rightarrow} s^{*} \Rightarrow \exists n. s' \stackrel{n}{\rightarrow} s'^{*} \wedge s'^{*} = s'\ \sigma
210\end{align*}
211
212\begin{align*}
213\mathrm{RTL\ status} & \ \ \mathrm{ERTL\ status} \\
214\mathtt{sp} & = \mathtt{spl} / \mathtt{sph} \\
215\mathtt{graph} & \mapsto \mathtt{graph} + \mathtt{prologue}(s) + \mathtt{epilogue}(s) \\
216& \mathrm{where}\ s = \mathrm{callee\ saved} + \nu \mathrm{RA} \\
217\end{align*}
218
219\begin{displaymath}
220\begin{diagram}
221\mathtt{CALL} & \rTo^1 & \mathtt{inside\ function} \\
222\dTo & & \dTo \\
223\underbrace{\ldots}_{\llbracket \mathtt{CALL} \rrbracket} & \rTo &
224\underbrace{\ldots}_{\mathtt{prologue}} \\
225\end{diagram}
226\end{displaymath}
227
228\begin{displaymath}
229\begin{diagram}
230\mathtt{RETURN} & \rTo^1 & \mathtt{.} \\
231\dTo & & \dTo \\
232\underbrace{\ldots}_{\mathtt{epilogue}} & \rTo &
233\underbrace{\ldots} \\
234\end{diagram}
235\end{displaymath}
236
237\begin{align*}
238\mathtt{prologue}(s) = & \mathtt{create\_new\_frame}; \\
239                       & \mathtt{pop\ ra}; \\
240                       & \mathtt{save\ callee\_saved}; \\
241                                                                                         & \mathtt{get\_params} \\
242                                                                                         & \ \ \mathtt{reg\_params}: \mathtt{move} \\
243                                                                                         & \ \ \mathtt{stack\_params}: \mathtt{push}/\mathtt{pop}/\mathtt{move} \\
244\end{align*}
245
246\begin{align*}
247\mathtt{epilogue}(s) = & \mathtt{save\ return\ to\ tmp\ real\ regs}; \\
248                                                                                         & \mathtt{restore\_registers}; \\
249                       & \mathtt{push\ ra}; \\
250                       & \mathtt{delete\_frame}; \\
251                       & \mathtt{save return} \\
252\end{align*}
253
254\begin{displaymath}
255\mathtt{CALL} id \mapsto \mathtt{set\_params}; \mathtt{CALL} id; \mathtt{fetch\_result}
256\end{displaymath}
257
258\section{The ERTL to LTL translation}
259\label{sect.ertl.ltl.translation}
260
261\section{The LTL to LIN translation}
262\label{sect.ltl.lin.translation}
263
264We require a map, $\sigma$, from LTL statuses, where program counters are represented as labels in a graph data structure, to LIN statuses, where program counters are natural numbers:
265\begin{displaymath}
266\mathtt{pc : label} \stackrel{\sigma}{\longrightarrow} \mathbb{N}
267\end{displaymath}
268
269The LTL to LIN translation pass also linearises the graph data structure into a list of instructions.
270Pseudocode for the linearisation process is as follows:
271
272\begin{lstlisting}
273let rec linearise graph visited required generated todo :=
274  match todo with
275  | l::todo ->
276    if l $\in$ visited then
277      let generated := generated $\cup\ \{$ Goto l $\}$ in
278      let required := required $\cup$ l in
279        linearise graph visited required generated todo
280    else
281      -- Get the instruction at label `l' in the graph
282      let lookup := graph(l) in
283      let generated := generated $\cup\ \{$ lookup $\}$ in
284      -- Find the successor of the instruction at label `l' in the graph
285      let successor := succ(l, graph) in
286      let todo := successor::todo in
287        linearise graph visited required generated todo
288  | []      -> (required, generated)
289\end{lstlisting}
290
291It is easy to see that this linearisation process eventually terminates.
292In particular, the size of the visited label set is monotonically increasing, and is bounded above by the size of the graph that we are linearising.
293
294The initial call to \texttt{linearise} sees the \texttt{visited}, \texttt{required} and \texttt{generated} sets set to the empty set, and \texttt{todo} initialized with the singleton list consisting of the entry point of the graph.
295We envisage needing to prove the following invariants on the linearisation function above:
296
297\begin{enumerate}
298\item
299$\mathtt{visited} \approx \mathtt{generated}$, where $\approx$ is \emph{multiset} equality, as \texttt{generated} is a set of instructions where instructions may mention labels multiple times, and \texttt{visited} is a set of labels,
300\item
301$\forall \mathtt{l} \in \mathtt{generated}.\ \mathtt{succ(l,\ graph)} \subseteq \mathtt{required} \cup \mathtt{todo}$,
302\item
303$\mathtt{required} \subseteq \mathtt{visited}$,
304\item
305$\mathtt{visited} \cap \mathtt{todo} = \emptyset$.
306\end{enumerate}
307
308The invariants collectively imply the following properties, crucial to correctness, about the linearisation process:
309
310\begin{enumerate}
311\item
312Every graph node is visited at most once,
313\item
314Every instruction that is generated is generated due to some graph node being visited,
315\item
316The successor instruction of every instruction that has been visited already will eventually be visited too.
317\end{enumerate}
318
319Note, because the LTL to LIN transformation is the first time the program is linearised, we must discover a notion of `well formed program' suitable for linearised forms.
320In particular, we see the notion of well formedness (yet to be formally defined) resting on the following conditions:
321
322\begin{enumerate}
323\item
324For every jump to a label in a linearised program, the target label exists at some point in the program,
325\item
326Each label is unique, appearing only once in the program,
327\item
328The final instruction of a program must be a return.
329\end{enumerate}
330
331We assume that these properties will be easy consequences of the invariants on the linearisation function defined above.
332
333The final condition above is potentially a little opaque, so we explain further.
334First, the only instructions that can reasonably appear in final position at the end of a program are returns or backward jumps, as any other instruction would cause execution to `fall out' of the end of the program (for example, when a function invoked with \texttt{CALL} returns, it returns to the next instruction past the \texttt{CALL} that invoked it).
335However, in LIN, though each function's graph has been linearised, the entire program is not yet fully linearised into a list of instructions, but rather, a list of `functions', each consisting of a linearised body along with other data.
336Each well-formed function must end with a call to \texttt{RET}, and therefore the only correct instruction that can terminate a LIN program is a \texttt{RET} instruction.
337
338\section{The LIN to ASM and ASM to MCS-51 machine code translations}
339\label{sect.lin.asm.translation}
340
341The LIN to ASM translation step is trivial, being almost the identity function.
342The only non-trivial feature of the LIN to ASM translation is that all labels are `named apart' so that there is no chance of freshly generated labels from different namespaces clashing with labels from another namespace.
343
344The ASM to MCS-51 machine code translation step, and the required statements of correctness, are found in an unpublished manuscript attached to this document.
345
346\end{document}
Note: See TracBrowser for help on using the repository browser.