source: Deliverables/D1.2/Presentations/WP4-dominic.tex @ 1860

Last change on this file since 1860 was 1860, checked in by sacerdot, 8 years ago

...

File size: 17.8 KB
Line 
1\documentclass{beamer}
2
3\usepackage{amssymb}
4\usepackage[english]{babel}
5\usepackage{listings}
6\usepackage{microtype}
7
8\usetheme{Frankfurt}
9\logo{\includegraphics[height=1.0cm]{fetopen.png}}
10
11\author{Dominic Mulligan \\ Postdoc, University of Bologna}
12\title{CerCo Work Package 4}
13\date{CerCo project review meeting\\March 2012}
14
15\AtBeginSection[]
16{
17  \begin{frame}<beamer>
18    \frametitle{Outline}
19    \tableofcontents[currentsection]
20  \end{frame}
21}
22
23\lstdefinelanguage{matita-ocaml}
24  {keywords={definition,coercion,lemma,theorem,remark,inductive,record,qed,let,in,rec,match,return,with,Type,try},
25   morekeywords={[2]whd,normalize,elim,cases,destruct},
26   morekeywords={[3]type,of},
27   mathescape=true,
28  }
29
30\lstset{language=matita-ocaml,basicstyle=\scriptsize\tt,columns=flexible,breaklines=false,
31        keywordstyle=\color{blue}\bfseries,
32        keywordstyle=[2]\color{blue},
33        keywordstyle=[3]\color{blue}\bfseries,
34        commentstyle=\color{green},
35        stringstyle=\color{blue},
36        showspaces=false,showstringspaces=false,escapechar=\%}
37
38\begin{document}
39
40\begin{frame}
41\maketitle
42\end{frame}
43
44\begin{frame}
45\frametitle{Overview of progress}
46Status at end of first period:
47\begin{center}
48{\large Executable semantics of MCS-51 in OCaml and Matita}
49\end{center}
50
51~\\~\\Goals for the end of second period:
52\begin{center}
53{\large Executable semantics of back-end intermediate languages\\~\\
54Encoding of compiler back-end in CIC}
55\end{center}
56\end{frame}
57
58\begin{frame}
59\frametitle{Contents}
60\tableofcontents
61\end{frame}
62
63\section{Rationalisation of backend languages}
64
65\begin{frame}
66\frametitle{Backend intermediate languages}
67\vspace{-1em}
68\begin{small}
69\begin{tabbing}
70\quad \=\,\quad \=\ \\
71\textsf{RTLabs}\\
72\> $\downarrow$ \> copy propagation \color{red}{$\times$} \\
73\> $\downarrow$ \> instruction selection \color{green}{{\checkmark}} \\
74\> $\downarrow$ \> change of memory models in compiler \color{green}{{\checkmark}} \\
75\textsf{RTL}\\
76\> $\downarrow$ \> constant propagation \color{red}{$\times$} \\
77\> $\downarrow$ \> calling convention made explicit \color{green}{{\checkmark}} \\
78\> $\downarrow$ \> layout of activation records \color{green}{{\checkmark}} \\
79\textsf{ERTL}\\
80\> $\downarrow$ \> register allocation and spilling \color{green}{{\checkmark}} \\
81\> $\downarrow$ \> dead code elimination \color{green}{{\checkmark}} \\
82\textsf{LTL}\\
83\> $\downarrow$ \> function linearisation \color{green}{{\checkmark}} \\
84\> $\downarrow$ \> branch compression \color{red}{$\times$} \\
85\textsf{LIN}\\
86\> $\downarrow$ \> relabeling \color{green}{{\checkmark}} \\
87\textsf{ASM}
88\end{tabbing}
89\end{small}
90\end{frame}
91
92\begin{frame}
93\frametitle{\texttt{Joint}: a new approach I}
94\begin{itemize}
95\item
96Consecutive languages in backend must be similar
97\item
98Transformations between languages translate away some small specific set of features
99\item
100But looking at OCaml code, not clear precisely what differences between languages are, as code is repeated
101\item
102Not clear if translation passes can commute, for instance
103\item
104CerCo passes are in a different order to CompCert (calling convention and register allocation done in different places)
105\item
106Instruction selection done early: changing subset of instructions used would require instructions to be duplicated everywhere in backend
107\end{itemize}
108\end{frame}
109
110\begin{frame}
111\frametitle{\texttt{Joint}: a new approach II}
112\begin{itemize}
113\item
114Idea: all of these languages are just instances of a single language
115\item
116This language \texttt{Joint} is parameterised by a type of registers to be used in instructions, and so forth
117\item
118Each language after RTLabs is now just defined as the \texttt{Joint} language instantiated with some concrete types
119\item
120Similarly for semantics: common definitions that take e.g. type representing program counters as parameters
121\end{itemize}
122\end{frame}
123
124\begin{frame}[fragile]
125\frametitle{\texttt{Joint}: a new approach III}
126\texttt{Joint} instructions allow us to embed language-specific instructions:
127\begin{lstlisting}
128inductive joint_instruction (p: params__) (globals: list ident): Type[0] :=
129 | COMMENT: String $\rightarrow$ joint_instruction p globals
130 | COST_LABEL: costlabel $\rightarrow$ joint_instruction p globals
131 ...
132 | COND: acc_a_reg p $\rightarrow$ label $\rightarrow$ joint_instruction p globals
133 | extension: extend_statements p $\rightarrow$ joint_instruction p globals.
134\end{lstlisting}
135
136\begin{lstlisting}
137inductive ertl_statement_extension: Type[0] :=
138 | ertl_st_ext_new_frame: ertl_statement_extension
139 | ertl_st_ext_del_frame: ertl_statement_extension
140 | ertl_st_ext_frame_size: register $\rightarrow$ ertl_statement_extension.
141\end{lstlisting}
142\end{frame}
143
144\begin{frame}
145\frametitle{\texttt{Joint}: a new approach IV}
146\begin{itemize}
147\item
148Languages that provide extensions need to provide translations and semantics for those extensions
149\item
150Everything else can be handled at the \texttt{Joint}-level
151\item
152This modularises the handling of these languages
153\end{itemize}
154\end{frame}
155
156\begin{frame}
157\frametitle{\texttt{Joint}: advantages I}
158\begin{itemize}
159\item
160We can recover the concrete OCaml languages by instantiating parameterized types
161\item
162Why use \texttt{Joint}?
163\item
164Reduces repeated code (fewer bugs, or places to change)
165\item
166Unify some proofs, making correctness proof easier
167\item
168Generic optimizations (e.g. constant propagation)
169\end{itemize}
170\end{frame}
171
172\begin{frame}
173\frametitle{\texttt{Joint}: advantages II}
174\begin{itemize}
175\item
176Easier to add new intermediate languages as needed
177\item
178Easier to see relationship between consecutive languages at a glance
179\item
180MCS-51 instruction set embedded in \texttt{Joint} syntax
181\item
182Simplifies instruction selection
183\item
184We can investigate which translation passes commute much more easily
185\end{itemize}
186\end{frame}
187
188\begin{frame}
189\frametitle{Semantics of \texttt{Joint} I}
190\begin{itemize}
191\item
192As mentioned, use of \texttt{Joint} also unifies semantics of these languages
193\item
194We use several sets of records, which represent the state that a program is in
195\item
196These records are parametric in representations for e.g. frames
197\end{itemize}
198\end{frame}
199
200\begin{frame}
201\frametitle{A new intermediate language}
202\begin{itemize}
203\item
204Matita backend includes a new intermediate language: RTLntc
205\item
206Sits between RTL and ERTL
207\item
208RTLntc is the RTL language where all tailcalls have been eliminated
209\item
210This language is `implicit' in the OCaml compiler
211\item
212There, the RTL to ERTL transformation eliminates tailcalls as part of translation
213\item
214But including an extra, explicit intermediate language is `almost free' using the \texttt{Joint} language approach
215\end{itemize}
216\end{frame}
217
218\begin{frame}
219\frametitle{The LTL to LIN transform I}
220\begin{itemize}
221\item
222\texttt{Joint} clearly separates fetching from program execution
223\item
224We can vary how one works whilst fixing the other
225\item
226Linearisation is moving from fetching from a graph-based language to fetching from a list-based program representation
227\item
228The order of transformations in OCaml prototype is fixed
229\item
230Linearisation takes place at a fixed place, in the translation between LTL and LIN
231\item
232The Matita compiler is different: linearisation is a generic process
233\item
234Any graph-based language can now be linearised
235\end{itemize}
236\end{frame}
237
238\begin{frame}
239\frametitle{The LTL to LIN transform II}
240\begin{itemize}
241\item
242CompCert backend linearises much sooner than CerCo's
243\item
244Can now experiment with linearising much earlier
245\item
246Many transformations and optimisations can work fine on a linearised form
247\item
248Only place in the (current) backend that requires a graph-based language is in the ERTL pass, where we do a dataflow analysis
249\end{itemize}
250\end{frame}
251
252\section{Optimizing assembler correctness proof}
253
254\begin{frame}
255\frametitle{Time not reported}
256\begin{itemize}
257\item
258We had six months of time which is not reported on in any deliverable
259\item
260We invested this time working on:
261\begin{itemize}
262\item
263The global proof sketch
264\item
265The setup of `proof infrastructure', common definitions, lemmas, invariants etc. required for main body of proof
266\item
267The proof of correctness for the assembler
268\item
269A notion of `structured traces', used throughout the compiler formalisation, as a means of eventually proving that the compiler correctly preserves costs
270\item
271Structured traces were defined in collaboration with the team at UEDIN
272\end{itemize}
273\end{itemize}
274\end{frame}
275
276\begin{frame}
277\frametitle{Assembler}
278\begin{itemize}
279\item
280After LIN, compiler spits out assembly language for MCS-51
281\item
282Assembler has pseudoinstructions similar to many commercial assembly languages
283\item
284For instance, instead of computed jumps (e.g. \texttt{SJMP} to a specific address), compiler can simply spit out a generic jump instruction to a label
285\item
286Simplifies the compiler, at the expense of introducing more proof obligations
287\item
288Now need a formalized assembler (a step further than CompCert)
289\end{itemize}
290\end{frame}
291
292\begin{frame}
293\frametitle{A problem: jump expansion}
294\begin{itemize}
295\item
296`Jump expansion' is our name for the standard `branch displacement' problem
297\item
298Given a pseudojump to a label $l$, how best can this be expanded into an assembly instruction \texttt{SJMP}, \texttt{AJMP} or \texttt{LJMP} to a concrete address?
299\item
300Problem also applies to conditional jumps
301\item
302Problem especially relevant for MCS-51 as it has a small code memory, therefore aggressive expansion of jumps into smallest possible concrete jump instruction needed
303\item
304But a known hard problem (NP-complete depending on architecture), and easy to imagine knotty configurations where size of jumps are interdependent
305\end{itemize}
306\end{frame}
307
308\begin{frame}
309\frametitle{Jump expansion I}
310\begin{itemize}
311\item
312We employed the following tactic: split the decision over how any particular pseudoinstruction is expanded from pseudoinstruction expansion
313\item
314Call the decision maker a `policy'
315\item
316We started the proof of correctness for the assembler based on the premise that a correct policy exists
317\item
318Further, we know that the assembler only fails to assemble a program if a good policy does not exist (a side-effect of using dependent types)
319\item
320A bad policy is a function that expands a given pseudojump into a concrete jump instruction that is `too small' for the distance to be jumped, or makes the program consume too much memory
321\end{itemize}
322\end{frame}
323
324\begin{frame}
325\frametitle{Jump expansion II}
326\begin{itemize}
327\item
328Jaap Boender at UNIBO has been working on a verified implementation of a good jump expansion policy for the MCS-51
329\item
330The strategy initially translates all pseudojumps as \texttt{SJMP} and then increases their size if necessary
331\item
332Termination of the procedure is proved, as well as a safety property, stating that jumps are not expanded into jumps that are too long
333\item
334His strategy is not optimal (though the computed solution is optimal for the strategy employed)
335\item
336Jaap's work is the first formal treatment of the `jump expansion problem'
337\end{itemize}
338\end{frame}
339
340\begin{frame}
341\frametitle{Assembler correctness proof}
342\begin{itemize}
343\item
344Assuming the existence of a good jump expansion property, we completed about 75\% of the correctness proof for the assembler
345\item
346Jaap's work has just been completed (modulo a few missing lemmas)
347\item
348Postponed the remainder of main assembler proof to start work on other tasks (and for Jaap to finish)
349\item
350We intend to return to proof, and publish an account of the work (possibly) as a journal paper
351\end{itemize}
352\end{frame}
353
354\section{Structured traces}
355
356
357\begin{frame}[fragile]
358\frametitle{Who pays? I}
359\begin{columns}
360\begin{column}[b]{0.5\linewidth}
361\centering
362In C:
363\begin{lstlisting}
364int main(int argc, char** argv) {
365  cost_label1:
366    ...
367    some_function();
368  cost_label2:
369    ...
370}
371\end{lstlisting}
372\end{column}
373\begin{column}[b]{0.5\linewidth}
374\centering
375In ASM:
376\begin{lstlisting}
377    ...
378  main:
379    ...
380  cost_label1:
381    ...
382    LCALL some_function
383  cost_label2:
384    ...
385\end{lstlisting}
386\end{column}
387\end{columns}
388\begin{itemize}
389\item
390Where do we put cost labels to capture execution costs?
391\item
392Proof obligations complicated by panoply of labels
393\item
394Doesn't work well with \texttt{g(h() + 2 + f())}
395\item
396Is \texttt{cost\_label2} ever reached?
397\item
398\texttt{some\_function()} may not return correctly
399\end{itemize}
400\end{frame}
401
402\begin{frame}
403\frametitle{Who pays? II}
404\begin{itemize}
405\item
406Solution: omit \texttt{cost\_label2} and just keep \texttt{cost\_label1}
407\item
408We pay for everything `up front' when entering a function
409\item
410No need to prove \texttt{some\_function()} terminates
411\item
412But now execution of functions in CerCo takes a particular form
413\item
414Functions begin with a label, call other functions that begin with a label, eventually return, but \emph{return} to the correct place
415\item
416`Recursive structure'
417\end{itemize}
418\end{frame}
419
420\begin{frame}
421\frametitle{Structured traces I}
422\begin{itemize}
423\item
424We introduced a notion of `structured traces'
425\item
426These are intended to statically capture the (good) execution traces of a program
427\item
428To borrow a slogan: they are the `computational content of a well-formed program's execution'
429\item
430Come in two variants: inductive and coinductive
431\item
432Inductive captures program execution traces that eventually halt, coinductive ones that diverge
433\end{itemize}
434\end{frame}
435
436\begin{frame}
437\frametitle{Structured traces II}
438\begin{itemize}
439\item
440I focus on the inductive variety, as used the most (for now) in the backend
441\item
442In particular, used in the proof that static and dynamic cost computations coincide
443\item
444Traces preserved by backend compilation, initially created at RTL
445\item
446This will be explained later
447\end{itemize}
448\end{frame}
449
450\begin{frame}
451\frametitle{Structured traces III}
452\begin{itemize}
453\item
454Central insight is that program execution is always in the body of some function (from \texttt{main} onwards)
455\item
456A well formed program must have labels appearing at certain spots
457\item
458Similarly, the final instruction executed when executing a function must be a \texttt{RET}
459\item
460Execution must then continue in body of calling function, at correct place
461\item
462These invariants, and others, are crystalised in the specific syntactic form of a structured trace
463\end{itemize}
464\end{frame}
465
466\begin{frame}
467\frametitle{Recursive structure of `good' execution I}
468\begin{itemize}
469\item
470All calls return to the correct place\\
471\item
472\texttt{CALL}s and \texttt{JMP}s only to labels
473\end{itemize}
474\begin{center}
475\includegraphics[scale=0.33]{recursive_structure.png}
476\end{center}
477\end{frame}
478
479\begin{frame}
480\frametitle{Static and dynamic costs}
481\begin{center}
482\includegraphics[scale=0.33]{recursive_structure.png}
483\begin{tabular}[b]{ll}
484    & emit(l1) \\
485    & MOV r1 0\\
486    & ADD r1 r2\\
487    & CALL f \\
488    & ADD r2 r2\\
489    & MOV r2 0\\
490    & RET \\ \\ \\ \\
491\end{tabular}
492\end{center}
493\makebox[0pt][l]{k($l_1$) = k(MOV) + k (ADD) + \ldots + k(RET)}\\
494Static-cost(trace) = k($l_1$) + \ldots + k($l_4$)\\
495Dynamic-cost(trace) = \texttt{clock}(Final$_1$) - \texttt{clock}(Start$_1$)\\
496\alert{Theorem: Static-cost(trace) = Dynamic-cost(trace)}
497\end{frame}
498
499\begin{frame}
500\frametitle{Static and dynamic costs II}
501This proof is surprisingly tricky to complete ($\approx$ 3 man months)
502\begin{itemize}
503\item Issues
504 \begin{itemize}
505  \item Finite memory\\
506   the PC can overflow during fetching and/or execution
507  \item Variable length instructions\\
508    not all addresses corresponds to instruction boundaries
509 \end{itemize}
510\item
511Requires predicates defining `good program' and `good program counter'
512\item
513Is now about 95\% complete
514\end{itemize}
515\end{frame}
516
517\section{Changes to tools and prototypes, looking forward}
518
519\begin{frame}
520\frametitle{Changes ported to OCaml prototype}
521\begin{itemize}
522\item
523Bug fixes spotted in the formalisation so far have been merged back into the OCaml compiler
524\item
525Larger changes like the \texttt{Joint} machinery have so far not
526\item
527It is unclear whether they will be
528\item
529Just a generalisation of what is already there
530\item
531Supposed to make formalisation easier
532\item
533Further, we want to ensure that the untrusted compiler is as correct as possible, for experiments in e.g. Frama-C
534\item
535Porting a large change to the untrusted compiler would jeopardise these experiments
536\end{itemize}
537\end{frame}
538
539\begin{frame}
540\frametitle{Improvements in Matita}
541\begin{itemize}
542\item
543Part of the motivation for using Matita was for CerCo to act a `stress test'
544\item
545The proofs talked about in this talk have done this
546\item
547Many improvements to Matita have been made since the last project meeting
548\item
549These include major speed-ups of e.g. printing large goals, bug fixes, the porting of CerCo code to standard library, and more options for passing to tactics
550\end{itemize}
551\end{frame}
552
553\begin{frame}
554\frametitle{The next period}
555UNIBO has following pool of remaining manpower (postdocs):
556\begin{center}
557\begin{tabular}{ll}
558Person & Man months remaining \\
559\hline
560Boender & 10 months \\
561Mulligan & 6 months \\
562Tranquilli & 10 months \\
563\end{tabular}
564\end{center}
565\begin{itemize}
566\item
567Boender finishing assembler correctness proof
568\item
569Mulligan proofs of correctness for 1 intermediate language
570\item
571Tranquilli proofs of correctness for 2 intermediate languages
572\item
573Sacerdoti Coen `floating'
574\item
575Believe we have enough manpower to complete backend (required 21 man months)
576\end{itemize}
577\end{frame}
578
579\begin{frame}
580\frametitle{Summary}
581We have:
582\begin{itemize}
583\item
584Translated the OCaml prototype's backend intermediate languages into Matita
585\item
586Implemented the translations between languages, and given the intermediate languages a semantics
587\item
588Refactored many of the backend intermediate languages into a common, parametric `joint' language, that is later specialised
589\item
590Spotted opportunities for possibly commuting backend translation passes
591\item
592Used six months to define structured traces and start the proof of correctness for the assembler
593\item
594Distinguished our proof from CompCert's by heavy use of dependent types throughout whole compiler
595\end{itemize}
596\end{frame}
597
598\end{document}
Note: See TracBrowser for help on using the repository browser.