source: Deliverables/D1.1/Presentations/Paris presentation March 2012/WP4-presentation.tex @ 1813

Last change on this file since 1813 was 1813, checked in by mulligan, 8 years ago

Added presentation that I have been working on for Paris meeting next week.

File size: 14.7 KB
Line 
1\documentclass[serif]{beamer}
2
3\usepackage[english]{babel}
4\usepackage{listings}
5\usepackage{microtype}
6
7\author{Dominic Mulligan}
8\title{CerCo Work Package 4}
9\date{CerCo project review meeting\\March 2012}
10
11\lstdefinelanguage{matita-ocaml}
12  {keywords={definition,coercion,lemma,theorem,remark,inductive,record,qed,let,in,rec,match,return,with,Type,try},
13   morekeywords={[2]whd,normalize,elim,cases,destruct},
14   morekeywords={[3]type,of},
15   mathescape=true,
16  }
17
18\lstset{language=matita-ocaml,basicstyle=\scriptsize\tt,columns=flexible,breaklines=false,
19        keywordstyle=\color{red}\bfseries,
20        keywordstyle=[2]\color{blue},
21        keywordstyle=[3]\color{blue}\bfseries,
22        commentstyle=\color{green},
23        stringstyle=\color{blue},
24        showspaces=false,showstringspaces=false}
25
26\begin{document}
27
28\begin{frame}
29\maketitle
30\end{frame}
31
32\begin{frame}
33\frametitle{Summary I}
34Relevant tasks: T4.2 and T4.3 (from the CerCo Contract):
35\begin{quotation}
36\textbf{Task 4.2}
37Functional encoding in the Calculus of Inductive Construction (indicative effort: UNIBO: 8; UDP: 2; UEDIN: 0)
38\end{quotation}
39
40\begin{quotation}
41\textbf{Task 4.3}
42Formal semantics of intermediate languages (indicative effort: UNIBO: 4; UDP: 0; UEDIN: 0)
43\end{quotation}
44\end{frame}
45
46\begin{frame}
47\frametitle{Summary II}
48Task 4.2 in detail (from the CerCo Contract):
49\begin{quotation}
50\textbf{CIC encoding: Back-end}:
51Functional Specification in the internal language of the Proof Assistant (the Calculus of Inductive Construction) of the back end of the compiler.
52This unit is meant to be composable with the front-end of deliverable D3.2, to obtain a full working compiler for Milestone M2.
53A first validation of the design principles and implementation choices for the Untrusted Cost-annotating OCaml Compiler D2.2 is achieved and reported in the deliverable, possibly triggering updates of the Untrusted Cost-annotating OCaml Compiler sources.
54\end{quotation}
55\end{frame}
56
57\begin{frame}
58\frametitle{Summary III}
59Task 4.3 in detail (from the CerCo contract):
60\begin{quotation}
61\textbf{Executable Formal Semantics of back-end intermediate languages}:
62This prototype is the formal counterpart of deliverable D2.1 for the back end side of the compiler and validates it.
63\end{quotation}
64\end{frame}
65
66\begin{frame}
67\frametitle{Backend intermediate languages I}
68\begin{itemize}
69\item
70O'Caml prototype has five backend intermediate languages: RTLabs, RTL, ERTL, LTL, LIN
71\item
72RTLabs is the `frontier' between backend and frontend, last abstract language
73\item
74RTLabs, RTL, ERTL and LTL are graph based languages: functions represented as graphs of statements, with entry and exit points
75\item
76LIN is a linearised form of LTL, and is the exit point of the compiler's backend
77\end{itemize}
78\end{frame}
79
80\begin{frame}
81\frametitle{Backend intermediate languages II}
82\begin{small}
83\begin{tabbing}
84\quad \=\,\quad \= \\
85\textsf{RTLabs}\\
86\> $\downarrow$ \> copy propagation (an endo-transformation, yet to be ported) \\
87\> $\downarrow$ \> instruction selection\\
88\> $\downarrow$ \> change of memory models in compiler\\
89\textsf{RTL}\\
90\> $\downarrow$ \> constant propagation (an endo-transformation, yet to be ported) \\
91\> $\downarrow$ \> calling convention made explicit \\
92\> $\downarrow$ \> layout of activation records \\
93\textsf{ERTL}\\
94\> $\downarrow$ \> register allocation and spilling\\
95\> $\downarrow$ \> dead code elimination\\
96\textsf{LTL}\\
97\> $\downarrow$ \> function linearisation\\
98\> $\downarrow$ \> branch compression (an endo-transformation) \\
99\textsf{LIN}\\
100\> $\downarrow$ \> relabeling\\
101\end{tabbing}
102\end{small}
103\end{frame}
104
105\begin{frame}
106\frametitle{\texttt{Joint}: a new approach}
107XXX: todo
108\end{frame}
109
110\begin{frame}
111\frametitle{Commuting passes}
112XXX: todo
113\end{frame}
114
115\begin{frame}
116\frametitle{Instruction selection in RTLabs}
117XXX: todo
118\end{frame}
119
120\begin{frame}
121\frametitle{Semantics of \texttt{Joint} I}
122\end{frame}
123
124\begin{frame}
125\frametitle{Semantics of \texttt{Joint} II}
126\end{frame}
127
128\begin{frame}
129\frametitle{A new intermediate language}
130\begin{itemize}
131\item
132Matita backend includes a new intermediate language: RTLntc
133\item
134Sits between RTL and ERTL
135\item
136RTLntc is the RTL language where all tailcalls have been eliminated
137\item
138This language is `implicit' in the O'Caml compiler
139\item
140There, the RTL to ERTL transformation eliminates tailcalls as part of translation
141\item
142But including an extra, explicit intermediate language is `almost free' using the \texttt{Joint} language approach
143\end{itemize}
144\end{frame}
145
146\begin{frame}
147\frametitle{The LTL to LIN transform I}
148\begin{itemize}
149\item
150The order of transformations in O'Caml prototype is fixed
151\item
152Linearisation takes place at a fixed place, in the translation between LTL and LIN
153\item
154The Matita compiler is different: linearisation is a generic process
155\item
156Any graph-based language can now be linearised
157\end{itemize}
158\end{frame}
159
160\begin{frame}
161\frametitle{The LTL to LIN transform II}
162\begin{itemize}
163\item
164The CompCert backend linearises much sooner than CerCo's
165\item
166We can now experiment with linearising much earlier
167\item
168Many transformations and optimisations can work fine on a linearised form
169\end{itemize}
170\end{frame}
171
172\begin{frame}
173\frametitle{`Free time'}
174\begin{itemize}
175\item
176We had six months of `free time', time not allotted in the Contract to working on any explicit deliverable
177\item
178We invested this time working on:
179\begin{itemize}
180\item
181The proof of correctness for the assembler
182\item
183A notion of `structured traces', used throughout the compiler formalisation, as a means of eventually proving that the compiler correctly preserves costs
184\item
185Structured traces were defined in collaboration with the team at UEDIN
186\end{itemize}
187\item
188I now explain in more detail how this time was used
189\end{itemize}
190\end{frame}
191
192\begin{frame}
193\frametitle{Assembler}
194\begin{itemize}
195\item
196After LIN, compiler spits out assembly language for MCS-51
197\item
198Assembler has pseudoinstructions similar to many commercial assembly languages
199\item
200For instance, instead of computed jumps (e.g. \texttt{SJMP} to a specific address), compiler can simply spit out a generic jump instruction to a label
201\item
202Simplifies the compiler, at the expense of introducing more proof obligations
203\end{itemize}
204\end{frame}
205
206\begin{frame}
207\frametitle{A problem: jump expansion}
208\begin{itemize}
209\item
210`Jump expansion' is our name for the following (standard) problem
211\item
212Given a pseudojump to a label $l$, how best can this be expanded into an assembly instruction \texttt{SJMP}, \texttt{AJMP} or \texttt{LJMP} to a concrete address?
213\item
214Problem also applies to conditional jumps
215\item
216Problem especially relevant for MCS-51 as it has a small code memory, therefore aggressive expansion of jumps into smallest possible concrete jump instruction needed
217\item
218But a known hard problem (NP-complete depending on architecture), and easy to imagine knotty configurations where size of jumps are interdependent
219\end{itemize}
220\end{frame}
221
222\begin{frame}
223\frametitle{Jump expansion I}
224\begin{itemize}
225\item
226We employed the following tactic: split the decision over how any particular pseudoinstruction is expanded from pseudoinstruction expansion
227\item
228Call the decision maker a `policy'
229\item
230We started the proof of correctness for the assembler based on the premise that a correct policy exists
231\item
232Further, we know that the assembler only fails to assemble a program if a good policy does not exist (a side-effect of using dependent types)
233\item
234A bad policy is a function that expands a given pseudojump into a concrete jump instruction that is `too small' for the distance to be jumped
235\end{itemize}
236\end{frame}
237
238\begin{frame}
239\frametitle{Jump expansion II}
240\begin{itemize}
241\item
242Boender at UNIBO has been working on a verified implementation of a good jump expansion policy for the MCS-51
243\item
244The strategy initially translates all pseudojumps as \texttt{SJMP} and then increases their size if necessary
245\item
246Termination of the procedure is proved, as well as a safety property, stating that jumps are not expanded into jumps that are too long
247\item
248Optimality is not proved
249\item
250Boender's work is the first formal treatment of the `jump expansion problem'
251\end{itemize}
252\end{frame}
253
254\begin{frame}
255\frametitle{Assembler correctness proof}
256\begin{itemize}
257\item
258Assuming the existence of a good jump expansion property, we completed about 50\% of the correctness proof for the assembler
259\item
260Boender's work constitutes a large missing piece (modulo a few missing lemmas), but holes still remain in the main proof
261\item
262We abandoned this work to start on other tasks
263\item
264However, we intend to return to the proof, and publishing an account of the work (possibly) as a journal paper
265\end{itemize}
266\end{frame}
267
268\begin{frame}
269\frametitle{Structured traces I}
270\begin{itemize}
271\item
272We introduced a notion of `structured traces'
273\item
274These are intended to statically capture the execution trace of a program
275\item
276Come in two variants: inductive and coinductive
277\item
278Inductive is meant to capture program execution traces that eventually halt
279\item
280Coinductive is meant to capture program execution traces that diverge
281\end{itemize}
282\end{frame}
283
284\begin{frame}
285\frametitle{Structured traces II}
286\begin{itemize}
287\item
288Here, I focus on the inductive variety
289\item
290Used the most (for now) in the backend
291\item
292In particular, used in the proof that static and dynamic cost computations coincide
293\item
294This will be explained later
295\end{itemize}
296\end{frame}
297
298\begin{frame}
299\frametitle{Structured traces III}
300\begin{itemize}
301\item
302Central insight is that program execution is always in the body of some function (from \texttt{main} onwards)
303\item
304A well formed program must have labels appearing at certain spots
305\item
306Similarly, the final instruction executed when executing a function must be a \texttt{RET} instruction
307\item
308These invariants, and others, are crystalised in the specific syntactic form of a structured trace
309\item
310Some examples...
311\end{itemize}
312\end{frame}
313
314\begin{frame}[fragile]
315\frametitle{Structured trace examples I}
316Traces end with a return being executed, or not:
317\begin{lstlisting}
318inductive trace_ends_with_ret: Type[0] :=
319 | ends_with_ret: trace_ends_with_ret
320 | doesnt_end_with_ret: trace_ends_with_ret.
321\end{lstlisting}
322A trace starting with a label and ending with a return:
323\begin{lstlisting}
324inductive trace_label_return (S: abstract_status): S $\rightarrow$ S $\rightarrow$ Type[0] :=
325 | tlr_base:
326  $\forall$status_before: S.
327  $\forall$status_after: S.
328   trace_label_label S ends_with_ret status_before status_after $\rightarrow$
329    trace_label_return S status_before status_after
330  ...
331\end{lstlisting}
332\end{frame}
333
334\begin{frame}[fragile]
335\frametitle{Structured trace examples II}
336A trace starting and ending with a cost label:
337\begin{lstlisting}
338...
339with trace_label_label: trace_ends_with_ret $\rightarrow$ S $\rightarrow$ S $\rightarrow$ Type[0] :=
340 | tll_base:
341  $\forall$ends_flag: trace_ends_with_ret.
342  $\forall$start_status: S.
343  $\forall$end_status: S.
344   trace_any_label S ends_flag start_status end_status $\rightarrow$
345   as_costed S start_status $\rightarrow$
346    trace_label_label S ends_flag start_status end_status
347...
348\end{lstlisting}
349\end{frame}
350
351\begin{frame}[fragile]
352\frametitle{Structured trace examples III}
353A call followed by a label on return:
354\begin{lstlisting}
355...
356with trace_any_label: trace_ends_with_ret $\rightarrow$ S $\rightarrow$ S $\rightarrow$ Type[0] :=
357...
358 | tal_base_call:
359  $\forall$status_pre_fun_call: S.   
360  $\forall$status_start_fun_call: S.
361  $\forall$status_final: S.
362   as_execute S status_pre_fun_call status_start_fun_call $\rightarrow$
363   $\forall$H:as_classifier S status_pre_fun_call cl_call.
364    as_after_return S (mk_Sig ?? status_pre_fun_call H) status_final $\rightarrow$
365    trace_label_return S status_start_fun_call status_final $\rightarrow$
366    as_costed S status_final $\rightarrow$
367     trace_any_label S doesnt_end_with_ret status_pre_fun_call status_final
368...
369\end{lstlisting}
370\end{frame}
371
372\begin{frame}[fragile]
373\frametitle{Structured trace examples IV}
374A call followed by a non-empty trace:
375\begin{lstlisting}
376...
377| tal_step_call:
378 $\forall$end_flag: trace_ends_with_ret.
379 $\forall$status_pre_fun_call: S.
380 $\forall$status_start_fun_call: S.
381 $\forall$status_after_fun_call: S.
382 $\forall$status_final: S.
383  as_execute S status_pre_fun_call status_start_fun_call $\rightarrow$
384  $\forall$H:as_classifier S status_pre_fun_call cl_call.
385   as_after_return S (mk_Sig ?? status_pre_fun_call H) status_after_fun_call $\rightarrow$
386   trace_label_return S status_start_fun_call status_after_fun_call $\rightarrow$
387   ¬ as_costed S status_after_fun_call $\rightarrow$
388   trace_any_label S end_flag status_after_fun_call status_final $\rightarrow$
389    trace_any_label S end_flag status_pre_fun_call status_final
390...
391\end{lstlisting}
392\end{frame}
393
394\begin{frame}
395\frametitle{Static and dynamic costs I}
396\begin{itemize}
397\item
398Given a structured trace, we can compute its associated cost
399\item
400This is the \emph{static} cost of a program execution
401\item
402Similarly, given a program counter and a code memory (corresponding to the trace), we can compute a \emph{dynamic cost} of a simple block
403\item
404Do this by repeatedly fetching, obtaining the next instruction, and a new program counter
405\item
406This requires some predicates defining what a `good program' and what a `good program counter' are
407\end{itemize}
408\end{frame}
409
410\begin{frame}
411\frametitle{Static and dynamic costs II}
412\begin{itemize}
413\item
414We aim to prove that the dynamic and static cost calculations coincide
415\item
416This would imply that the static cost computation is correct
417\item
418This proof is surprisingly tricky to complete (about 3 man months of work so far)
419\item
420Is now about 75\% complete
421\end{itemize}
422\end{frame}
423
424\begin{frame}
425\frametitle{Changes ported to O'Caml prototype}
426XXX: todo
427\end{frame}
428
429\begin{frame}
430\frametitle{Improvements in Matita}
431\begin{itemize}
432\item
433Part of the motivation for using Matita was for CerCo to act a `stress test'
434\item
435The proofs talked about in this talk have done this
436\item
437Many improvements to Matita have been made since the last project meeting
438\item
439These include major speed-ups of e.g. printing large goals, bug fixes, the porting of CerCo code to standard library, and more options for passing to tactics
440\end{itemize}
441\end{frame}
442
443\begin{frame}
444\frametitle{Summary}
445We have:
446\begin{itemize}
447\item
448Translated the O'Caml prototype's backend intermediate languages into Matita
449\item
450Implemented the translations between languages, and given the intermediate languages a semantics
451\item
452Refactored many of the backend intermediate languages into a common, parametric `joint' language, that is later specialised
453\item
454Spotted opportunities for commuting backend translation passes
455\item
456Used six months of `free time' to define structured traces and start the proof of correctness for the assembler
457\end{itemize}
458\end{frame}
459
460\end{document}
Note: See TracBrowser for help on using the repository browser.