source: Deliverables/D1.2/Presentations/WP4-dominic.tex @ 1847

Last change on this file since 1847 was 1847, checked in by mulligan, 8 years ago

More changes to presentation based on comments

File size: 18.1 KB
Line 
1\documentclass{beamer}
2
3\usepackage{amssymb}
4\usepackage[english]{babel}
5\usepackage{listings}
6\usepackage{microtype}
7
8\usetheme{Frankfurt}
9\logo{\includegraphics[height=1.0cm]{fetopen.png}}
10
11\author{Dominic Mulligan}
12\title{CerCo Work Package 4}
13\date{CerCo project review meeting\\March 2012}
14
15\lstdefinelanguage{matita-ocaml}
16  {keywords={definition,coercion,lemma,theorem,remark,inductive,record,qed,let,in,rec,match,return,with,Type,try},
17   morekeywords={[2]whd,normalize,elim,cases,destruct},
18   morekeywords={[3]type,of},
19   mathescape=true,
20  }
21
22\lstset{language=matita-ocaml,basicstyle=\scriptsize\tt,columns=flexible,breaklines=false,
23        keywordstyle=\color{red}\bfseries,
24        keywordstyle=[2]\color{blue},
25        keywordstyle=[3]\color{blue}\bfseries,
26        commentstyle=\color{green},
27        stringstyle=\color{blue},
28        showspaces=false,showstringspaces=false}
29
30\begin{document}
31
32\begin{frame}
33\maketitle
34\end{frame}
35
36\begin{frame}
37\frametitle{Summary}
38Relevant tasks: T4.2 and T4.3 (from the CerCo Contract):
39\begin{quotation}
40\textbf{Task 4.2}
41Functional encoding in the Calculus of Inductive Construction (indicative effort: UNIBO: 8; UDP: 2; UEDIN: 0)
42\end{quotation}
43
44\begin{quotation}
45\textbf{Task 4.3}
46Formal semantics of intermediate languages (indicative effort: UNIBO: 4; UDP: 0; UEDIN: 0)
47\end{quotation}
48\end{frame}
49
50\begin{frame}
51\frametitle{Contents}
52\tableofcontents
53\end{frame}
54
55\section{Rationalisation of backend languages}
56
57\begin{frame}
58\frametitle{Backend intermediate languages I}
59\begin{itemize}
60\item
61OCaml prototype has five backend intermediate languages: RTLabs, RTL, ERTL, LTL, LIN
62\item
63RTLabs is the `frontier' between backend and frontend, last abstract language
64\item
65RTLabs, RTL, ERTL and LTL are graph based languages: functions represented as graphs of statements, with entry and exit points
66\item
67LIN is a linearised form of LTL, and is the exit point of the compiler's backend
68\item
69In contrast to frontend, backend is very different to CompCert's
70\end{itemize}
71\end{frame}
72
73\begin{frame}
74\frametitle{Backend intermediate languages II}
75\vspace{-1em}
76\begin{small}
77\begin{tabbing}
78\quad \=\,\quad \=\ \\
79\textsf{RTLabs}\\
80\> $\downarrow$ \> copy propagation \color{red}{$\times$} \\
81\> $\downarrow$ \> instruction selection \color{green}{{\checkmark}} \\
82\> $\downarrow$ \> change of memory models in compiler \color{green}{{\checkmark}} \\
83\textsf{RTL}\\
84\> $\downarrow$ \> constant propagation \color{red}{$\times$} \\
85\> $\downarrow$ \> calling convention made explicit \color{green}{{\checkmark}} \\
86\> $\downarrow$ \> layout of activation records \color{green}{{\checkmark}} \\
87\textsf{ERTL}\\
88\> $\downarrow$ \> register allocation and spilling \color{green}{{\checkmark}} \\
89\> $\downarrow$ \> dead code elimination \color{green}{{\checkmark}} \\
90\textsf{LTL}\\
91\> $\downarrow$ \> function linearisation \color{green}{{\checkmark}} \\
92\> $\downarrow$ \> branch compression \color{red}{$\times$} \\
93\textsf{LIN}\\
94\> $\downarrow$ \> relabeling \color{green}{{\checkmark}} \\
95\textsf{ASM}
96\end{tabbing}
97\end{small}
98\end{frame}
99
100\begin{frame}
101\frametitle{\texttt{Joint}: a new approach I}
102\begin{itemize}
103\item
104Consecutive languages in backend must be similar
105\item
106Transformations between languages translate away some small specific set of features
107\item
108But looking at OCaml code, not clear precisely what differences between languages are, as code is repeated
109\item
110Not clear if translation passes can commute, for instance
111\item
112CerCo passes are in a different order to CompCert (calling convention and register allocation done in different places)
113\item
114Instruction selection done early: changing subset of instructions used would require instructions to be duplicated everywhere in backend
115\end{itemize}
116\end{frame}
117
118\begin{frame}
119\frametitle{\texttt{Joint}: a new approach II}
120\begin{itemize}
121\item
122Idea: all of these languages are just instances of a single language
123\item
124This language \texttt{Joint} is parameterised by a type of registers to be used in instructions, and so forth
125\item
126Each language after RTLabs is now just defined as the \texttt{Joint} language instantiated with some concrete types
127\item
128Similarly for semantics: common definitions that take e.g. type representing program counters as parameters
129\end{itemize}
130\end{frame}
131
132\begin{frame}[fragile]
133\frametitle{\texttt{Joint}: a new approach III}
134\texttt{Joint} instructions allow us to embed language-specific instructions:
135\begin{lstlisting}
136inductive joint_instruction (p: params__) (globals: list ident): Type[0] :=
137 | COMMENT: String $\rightarrow$ joint_instruction p globals
138 | COST_LABEL: costlabel $\rightarrow$ joint_instruction p globals
139 ...
140 | COND: acc_a_reg p $\rightarrow$ label $\rightarrow$ joint_instruction p globals
141 | extension: extend_statements p $\rightarrow$ joint_instruction p globals.
142\end{lstlisting}
143
144\begin{lstlisting}
145inductive ertl_statement_extension: Type[0] :=
146 | ertl_st_ext_new_frame: ertl_statement_extension
147 | ertl_st_ext_del_frame: ertl_statement_extension
148 | ertl_st_ext_frame_size: register $\rightarrow$ ertl_statement_extension.
149\end{lstlisting}
150\end{frame}
151
152\begin{frame}
153\frametitle{\texttt{Joint}: a new approach IV}
154\begin{itemize}
155\item
156Languages that provide extensions need to provide translations and semantics for those extensions
157\item
158Everything else can be handled at the \texttt{Joint}-level
159\item
160This modularises the handling of these languages
161\end{itemize}
162\end{frame}
163
164\begin{frame}
165\frametitle{\texttt{Joint}: advantages I}
166\begin{itemize}
167\item
168We can recover the concrete OCaml languages by instantiating parameterized types
169\item
170Why use \texttt{Joint}?
171\item
172Reduces repeated code (fewer bugs, or places to change)
173\item
174Unify some proofs, making correctness proof easier
175\end{itemize}
176\end{frame}
177
178\begin{frame}
179\frametitle{\texttt{Joint}: advantages II}
180\begin{itemize}
181\item
182Easier to add new intermediate languages as needed
183\item
184Easier to see relationship between consecutive languages at a glance
185\item
186MCS-51 instruction set embedded in \texttt{Joint} syntax
187\item
188Simplifies instruction selection
189\item
190We can investigate which translation passes commute much more easily
191\end{itemize}
192\end{frame}
193
194\begin{frame}
195\frametitle{Semantics of \texttt{Joint} I}
196\begin{itemize}
197\item
198As mentioned, use of \texttt{Joint} also unifies semantics of these languages
199\item
200We use several sets of records, which represent the state that a program is in
201\item
202These records are parametric in representations for e.g. frames
203\end{itemize}
204\end{frame}
205
206\begin{frame}
207\frametitle{A new intermediate language}
208\begin{itemize}
209\item
210Matita backend includes a new intermediate language: RTLntc
211\item
212Sits between RTL and ERTL
213\item
214RTLntc is the RTL language where all tailcalls have been eliminated
215\item
216This language is `implicit' in the OCaml compiler
217\item
218There, the RTL to ERTL transformation eliminates tailcalls as part of translation
219\item
220But including an extra, explicit intermediate language is `almost free' using the \texttt{Joint} language approach
221\end{itemize}
222\end{frame}
223
224\begin{frame}
225\frametitle{The LTL to LIN transform I}
226\begin{itemize}
227\item
228\texttt{Joint} clearly separates fetching from program execution
229\item
230We can vary how one works whilst fixing the other
231\item
232Linearisation is moving from fetching from a graph-based language to fetching from a list-based program representation
233\item
234The order of transformations in OCaml prototype is fixed
235\item
236Linearisation takes place at a fixed place, in the translation between LTL and LIN
237\item
238The Matita compiler is different: linearisation is a generic process
239\item
240Any graph-based language can now be linearised
241\end{itemize}
242\end{frame}
243
244\begin{frame}
245\frametitle{The LTL to LIN transform II}
246\begin{itemize}
247\item
248CompCert backend linearises much sooner than CerCo's
249\item
250Can now experiment with linearising much earlier
251\item
252Many transformations and optimisations can work fine on a linearised form
253\item
254Only place in the (current) backend that requires a graph-based language is in the ERTL pass, where we do a dataflow analysis
255\end{itemize}
256\end{frame}
257
258\section{Assembler correctness proof and structured traces}
259
260\begin{frame}
261\frametitle{Time not reported}
262\begin{itemize}
263\item
264We had six months of time which is not reported on in any deliverable
265\item
266We invested this time working on:
267\begin{itemize}
268\item
269The global proof sketch
270\item
271The setup of `proof infrastructure', common definitions, lemmas, invariants etc. required for main body of proof
272\item
273The proof of correctness for the assembler
274\item
275A notion of `structured traces', used throughout the compiler formalisation, as a means of eventually proving that the compiler correctly preserves costs
276\item
277Structured traces were defined in collaboration with the team at UEDIN
278\end{itemize}
279\end{itemize}
280\end{frame}
281
282\begin{frame}
283\frametitle{Assembler}
284\begin{itemize}
285\item
286After LIN, compiler spits out assembly language for MCS-51
287\item
288Assembler has pseudoinstructions similar to many commercial assembly languages
289\item
290For instance, instead of computed jumps (e.g. \texttt{SJMP} to a specific address), compiler can simply spit out a generic jump instruction to a label
291\item
292Simplifies the compiler, at the expense of introducing more proof obligations
293\item
294Now need a formalized assembler (a step further than CompCert)
295\end{itemize}
296\end{frame}
297
298\begin{frame}
299\frametitle{A problem: jump expansion}
300\begin{itemize}
301\item
302`Jump expansion' is our name for the standard `branch displacement' problem
303\item
304Given a pseudojump to a label $l$, how best can this be expanded into an assembly instruction \texttt{SJMP}, \texttt{AJMP} or \texttt{LJMP} to a concrete address?
305\item
306Problem also applies to conditional jumps
307\item
308Problem especially relevant for MCS-51 as it has a small code memory, therefore aggressive expansion of jumps into smallest possible concrete jump instruction needed
309\item
310But a known hard problem (NP-complete depending on architecture), and easy to imagine knotty configurations where size of jumps are interdependent
311\end{itemize}
312\end{frame}
313
314\begin{frame}
315\frametitle{Jump expansion I}
316\begin{itemize}
317\item
318We employed the following tactic: split the decision over how any particular pseudoinstruction is expanded from pseudoinstruction expansion
319\item
320Call the decision maker a `policy'
321\item
322We started the proof of correctness for the assembler based on the premise that a correct policy exists
323\item
324Further, we know that the assembler only fails to assemble a program if a good policy does not exist (a side-effect of using dependent types)
325\item
326A bad policy is a function that expands a given pseudojump into a concrete jump instruction that is `too small' for the distance to be jumped, or makes the program consume too much memory
327\end{itemize}
328\end{frame}
329
330\begin{frame}
331\frametitle{Jump expansion II}
332\begin{itemize}
333\item
334Jaap Boender at UNIBO has been working on a verified implementation of a good jump expansion policy for the MCS-51
335\item
336The strategy initially translates all pseudojumps as \texttt{SJMP} and then increases their size if necessary
337\item
338Termination of the procedure is proved, as well as a safety property, stating that jumps are not expanded into jumps that are too long
339\item
340His strategy is not optimal (though the computed solution is optimal for the strategy employed)
341\item
342Jaap's work is the first formal treatment of the `jump expansion problem'
343\end{itemize}
344\end{frame}
345
346\begin{frame}
347\frametitle{Assembler correctness proof}
348\begin{itemize}
349\item
350Assuming the existence of a good jump expansion property, we completed about 75\% of the correctness proof for the assembler
351\item
352Jaap's work has just been completed (modulo a few missing lemmas)
353\item
354Postponed the remainder of main assembler proof to start work on other tasks (and for Jaap to finish)
355\item
356We intend to return to proof, and publish an account of the work (possibly) as a journal paper
357\end{itemize}
358\end{frame}
359
360\begin{frame}[fragile]
361\frametitle{Who pays? I}
362\begin{columns}
363\begin{column}[b]{0.5\linewidth}
364\centering
365In C:
366\begin{lstlisting}
367int main(int argc, char** argv) {
368  cost_label1:
369    ...
370    some_function();
371  cost_label2:
372    ...
373}
374\end{lstlisting}
375\end{column}
376\begin{column}[b]{0.5\linewidth}
377\centering
378In ASM:
379\begin{lstlisting}
380    ...
381  main:
382    ...
383  cost_label1:
384    ...
385    LCALL some_function
386  cost_label2:
387    ...
388\end{lstlisting}
389\end{column}
390\end{columns}
391\begin{itemize}
392\item
393Where do we put cost labels to capture execution costs?
394\item
395Proof obligations complicated by panoply of labels
396\item
397Doesn't work well with \texttt{g(h() + 2 + f())}
398\item
399Is \texttt{cost\_label2} ever reached?
400\item
401\texttt{some\_function()} may not return correctly
402\end{itemize}
403\end{frame}
404
405\begin{frame}
406\frametitle{Who pays? II}
407\begin{itemize}
408\item
409Solution: omit \texttt{cost\_label2} and just keep \texttt{cost\_label1}
410\item
411We pay for everything `up front' when entering a function
412\item
413No need to prove \texttt{some\_function()} terminates
414\item
415But now execution of functions in CerCo takes a particular form
416\item
417Functions begin with a label, call other functions that begin with a label, eventually return, but \emph{return} to the correct place
418\item
419`Recursive structure'
420\end{itemize}
421\end{frame}
422
423\begin{frame}
424\frametitle{Structured traces I}
425\begin{itemize}
426\item
427We introduced a notion of `structured traces'
428\item
429These are intended to statically capture the (good) execution traces of a program
430\item
431To borrow a slogan: they are the `computational content of a well-formed program's execution'
432\item
433Come in two variants: inductive and coinductive
434\item
435Inductive captures program execution traces that eventually halt, coinductive ones that diverge
436\end{itemize}
437\end{frame}
438
439\begin{frame}
440\frametitle{Structured traces II}
441\begin{itemize}
442\item
443I focus on the inductive variety, as used the most (for now) in the backend
444\item
445In particular, used in the proof that static and dynamic cost computations coincide
446\item
447Traces preserved by backend compilation, initially created at RTL
448\item
449This will be explained later
450\end{itemize}
451\end{frame}
452
453\begin{frame}
454\frametitle{Structured traces III}
455\begin{itemize}
456\item
457Central insight is that program execution is always in the body of some function (from \texttt{main} onwards)
458\item
459A well formed program must have labels appearing at certain spots
460\item
461Similarly, the final instruction executed when executing a function must be a \texttt{RET}
462\item
463Execution must then continue in body of calling function, at correct place
464\item
465These invariants, and others, are crystalised in the specific syntactic form of a structured trace
466\end{itemize}
467\end{frame}
468
469\begin{frame}
470\frametitle{Recursive structure of `good' execution}
471Structure captured by structured traces:
472\begin{center}
473\includegraphics[scale=0.33]{recursive_structure.png}
474\end{center}
475\end{frame}
476
477\begin{frame}
478\frametitle{Static and dynamic costs I}
479\begin{itemize}
480\item
481Given a structured trace, we can compute its associated cost
482\item
483In previous slide, cost of trace is cost assigned to \texttt{label\_1} + \texttt{label\_2} + \texttt{label\_3} (+ \texttt{label\_4})
484\item
485This is the \emph{static} cost of a program execution
486\item
487Similarly, given a program counter and a code memory (corresponding to the trace), we can compute a \emph{dynamic cost} of a simple block
488\item
489Do this by repeatedly fetching, obtaining the next instruction, and a new program counter
490\item
491This requires some predicates defining what a `good program' and what a `good program counter' are
492\item
493Want program counters on instruction boundaries
494\end{itemize}
495\end{frame}
496
497\begin{frame}
498\frametitle{Static and dynamic costs II}
499\begin{itemize}
500\item
501We aim to prove that the dynamic and static cost calculations coincide
502\item
503This would imply that the static cost computation is correct
504\item
505This proof is surprisingly tricky to complete (about 3 man months of work so far)
506\item
507Is now about 95\% complete
508\end{itemize}
509\end{frame}
510
511\section{Changes to tools and prototypes, looking forward}
512
513\begin{frame}
514\frametitle{Changes ported to OCaml prototype}
515\begin{itemize}
516\item
517Bug fixes spotted in the formalisation so far have been merged back into the OCaml compiler
518\item
519Larger changes like the \texttt{Joint} machinery have so far not
520\item
521It is unclear whether they will be
522\item
523Just a generalisation of what is already there
524\item
525Supposed to make formalisation easier
526\item
527Further, we want to ensure that the untrusted compiler is as correct as possible, for experiments in e.g. Frama-C
528\item
529Porting a large change to the untrusted compiler would jeopardise these experiments
530\end{itemize}
531\end{frame}
532
533\begin{frame}
534\frametitle{Improvements in Matita}
535\begin{itemize}
536\item
537Part of the motivation for using Matita was for CerCo to act a `stress test'
538\item
539The proofs talked about in this talk have done this
540\item
541Many improvements to Matita have been made since the last project meeting
542\item
543These include major speed-ups of e.g. printing large goals, bug fixes, the porting of CerCo code to standard library, and more options for passing to tactics
544\end{itemize}
545\end{frame}
546
547\begin{frame}
548\frametitle{The next period}
549UNIBO has following pool of remaining manpower (postdocs):
550\begin{center}
551\begin{tabular}{ll}
552Person & Man months remaining \\
553\hline
554Boender & 10 months \\
555Mulligan & 6 months \\
556Tranquilli & 10 months \\
557\end{tabular}
558\end{center}
559\begin{itemize}
560\item
561Boender finishing assembler correctness proof
562\item
563Mulligan proofs of correctness for 1 intermediate language
564\item
565Tranquilli proofs of correctness for 2 intermediate languages
566\item
567Sacerdoti Coen `floating'
568\item
569Believe we have enough manpower to complete backend (required 21 man months)
570\end{itemize}
571\end{frame}
572
573\begin{frame}
574\frametitle{Summary}
575We have:
576\begin{itemize}
577\item
578Translated the OCaml prototype's backend intermediate languages into Matita
579\item
580Implemented the translations between languages, and given the intermediate languages a semantics
581\item
582Refactored many of the backend intermediate languages into a common, parametric `joint' language, that is later specialised
583\item
584Spotted opportunities for possibly commuting backend translation passes
585\item
586Used six months to define structured traces and start the proof of correctness for the assembler
587\item
588Distinguished our proof from CompCert's by heavy use of dependent types throughout whole compiler
589\end{itemize}
590\end{frame}
591
592\end{document}
Note: See TracBrowser for help on using the repository browser.