source: Deliverables/D1.2/Presentations/WP4-dominic.tex @ 1849

Last change on this file since 1849 was 1849, checked in by mulligan, 8 years ago

Added title pages to split talk into three separate sections

File size: 18.4 KB
Line 
1\documentclass{beamer}
2
3\usepackage{amssymb}
4\usepackage[english]{babel}
5\usepackage{listings}
6\usepackage{microtype}
7
8\usetheme{Frankfurt}
9\logo{\includegraphics[height=1.0cm]{fetopen.png}}
10
11\author{Dominic Mulligan}
12\title{CerCo Work Package 4}
13\date{CerCo project review meeting\\March 2012}
14
15\lstdefinelanguage{matita-ocaml}
16  {keywords={definition,coercion,lemma,theorem,remark,inductive,record,qed,let,in,rec,match,return,with,Type,try},
17   morekeywords={[2]whd,normalize,elim,cases,destruct},
18   morekeywords={[3]type,of},
19   mathescape=true,
20  }
21
22\lstset{language=matita-ocaml,basicstyle=\scriptsize\tt,columns=flexible,breaklines=false,
23        keywordstyle=\color{red}\bfseries,
24        keywordstyle=[2]\color{blue},
25        keywordstyle=[3]\color{blue}\bfseries,
26        commentstyle=\color{green},
27        stringstyle=\color{blue},
28        showspaces=false,showstringspaces=false}
29
30\begin{document}
31
32\begin{frame}
33\maketitle
34\end{frame}
35
36\begin{frame}
37\frametitle{Summary}
38Relevant tasks: T4.2 and T4.3 (from the CerCo Contract):
39\begin{quotation}
40\textbf{Task 4.2}
41Functional encoding in the Calculus of Inductive Construction (indicative effort: UNIBO: 8; UDP: 2; UEDIN: 0)
42\end{quotation}
43
44\begin{quotation}
45\textbf{Task 4.3}
46Formal semantics of intermediate languages (indicative effort: UNIBO: 4; UDP: 0; UEDIN: 0)
47\end{quotation}
48\end{frame}
49
50\begin{frame}
51\frametitle{Contents}
52\tableofcontents
53\end{frame}
54
55\section{Rationalisation of backend languages}
56
57\begin{frame}
58\begin{center}
59Rationalisation of backend languages
60\end{center}
61\end{frame}
62
63\begin{frame}
64\frametitle{Backend intermediate languages I}
65\begin{itemize}
66\item
67OCaml prototype has five backend intermediate languages: RTLabs, RTL, ERTL, LTL, LIN
68\item
69RTLabs is the `frontier' between backend and frontend, last abstract language
70\item
71RTLabs, RTL, ERTL and LTL are graph based languages: functions represented as graphs of statements, with entry and exit points
72\item
73LIN is a linearised form of LTL, and is the exit point of the compiler's backend
74\item
75In contrast to frontend, backend is very different to CompCert's
76\end{itemize}
77\end{frame}
78
79\begin{frame}
80\frametitle{Backend intermediate languages II}
81\vspace{-1em}
82\begin{small}
83\begin{tabbing}
84\quad \=\,\quad \=\ \\
85\textsf{RTLabs}\\
86\> $\downarrow$ \> copy propagation \color{red}{$\times$} \\
87\> $\downarrow$ \> instruction selection \color{green}{{\checkmark}} \\
88\> $\downarrow$ \> change of memory models in compiler \color{green}{{\checkmark}} \\
89\textsf{RTL}\\
90\> $\downarrow$ \> constant propagation \color{red}{$\times$} \\
91\> $\downarrow$ \> calling convention made explicit \color{green}{{\checkmark}} \\
92\> $\downarrow$ \> layout of activation records \color{green}{{\checkmark}} \\
93\textsf{ERTL}\\
94\> $\downarrow$ \> register allocation and spilling \color{green}{{\checkmark}} \\
95\> $\downarrow$ \> dead code elimination \color{green}{{\checkmark}} \\
96\textsf{LTL}\\
97\> $\downarrow$ \> function linearisation \color{green}{{\checkmark}} \\
98\> $\downarrow$ \> branch compression \color{red}{$\times$} \\
99\textsf{LIN}\\
100\> $\downarrow$ \> relabeling \color{green}{{\checkmark}} \\
101\textsf{ASM}
102\end{tabbing}
103\end{small}
104\end{frame}
105
106\begin{frame}
107\frametitle{\texttt{Joint}: a new approach I}
108\begin{itemize}
109\item
110Consecutive languages in backend must be similar
111\item
112Transformations between languages translate away some small specific set of features
113\item
114But looking at OCaml code, not clear precisely what differences between languages are, as code is repeated
115\item
116Not clear if translation passes can commute, for instance
117\item
118CerCo passes are in a different order to CompCert (calling convention and register allocation done in different places)
119\item
120Instruction selection done early: changing subset of instructions used would require instructions to be duplicated everywhere in backend
121\end{itemize}
122\end{frame}
123
124\begin{frame}
125\frametitle{\texttt{Joint}: a new approach II}
126\begin{itemize}
127\item
128Idea: all of these languages are just instances of a single language
129\item
130This language \texttt{Joint} is parameterised by a type of registers to be used in instructions, and so forth
131\item
132Each language after RTLabs is now just defined as the \texttt{Joint} language instantiated with some concrete types
133\item
134Similarly for semantics: common definitions that take e.g. type representing program counters as parameters
135\end{itemize}
136\end{frame}
137
138\begin{frame}[fragile]
139\frametitle{\texttt{Joint}: a new approach III}
140\texttt{Joint} instructions allow us to embed language-specific instructions:
141\begin{lstlisting}
142inductive joint_instruction (p: params__) (globals: list ident): Type[0] :=
143 | COMMENT: String $\rightarrow$ joint_instruction p globals
144 | COST_LABEL: costlabel $\rightarrow$ joint_instruction p globals
145 ...
146 | COND: acc_a_reg p $\rightarrow$ label $\rightarrow$ joint_instruction p globals
147 | extension: extend_statements p $\rightarrow$ joint_instruction p globals.
148\end{lstlisting}
149
150\begin{lstlisting}
151inductive ertl_statement_extension: Type[0] :=
152 | ertl_st_ext_new_frame: ertl_statement_extension
153 | ertl_st_ext_del_frame: ertl_statement_extension
154 | ertl_st_ext_frame_size: register $\rightarrow$ ertl_statement_extension.
155\end{lstlisting}
156\end{frame}
157
158\begin{frame}
159\frametitle{\texttt{Joint}: a new approach IV}
160\begin{itemize}
161\item
162Languages that provide extensions need to provide translations and semantics for those extensions
163\item
164Everything else can be handled at the \texttt{Joint}-level
165\item
166This modularises the handling of these languages
167\end{itemize}
168\end{frame}
169
170\begin{frame}
171\frametitle{\texttt{Joint}: advantages I}
172\begin{itemize}
173\item
174We can recover the concrete OCaml languages by instantiating parameterized types
175\item
176Why use \texttt{Joint}?
177\item
178Reduces repeated code (fewer bugs, or places to change)
179\item
180Unify some proofs, making correctness proof easier
181\end{itemize}
182\end{frame}
183
184\begin{frame}
185\frametitle{\texttt{Joint}: advantages II}
186\begin{itemize}
187\item
188Easier to add new intermediate languages as needed
189\item
190Easier to see relationship between consecutive languages at a glance
191\item
192MCS-51 instruction set embedded in \texttt{Joint} syntax
193\item
194Simplifies instruction selection
195\item
196We can investigate which translation passes commute much more easily
197\end{itemize}
198\end{frame}
199
200\begin{frame}
201\frametitle{Semantics of \texttt{Joint} I}
202\begin{itemize}
203\item
204As mentioned, use of \texttt{Joint} also unifies semantics of these languages
205\item
206We use several sets of records, which represent the state that a program is in
207\item
208These records are parametric in representations for e.g. frames
209\end{itemize}
210\end{frame}
211
212\begin{frame}
213\frametitle{A new intermediate language}
214\begin{itemize}
215\item
216Matita backend includes a new intermediate language: RTLntc
217\item
218Sits between RTL and ERTL
219\item
220RTLntc is the RTL language where all tailcalls have been eliminated
221\item
222This language is `implicit' in the OCaml compiler
223\item
224There, the RTL to ERTL transformation eliminates tailcalls as part of translation
225\item
226But including an extra, explicit intermediate language is `almost free' using the \texttt{Joint} language approach
227\end{itemize}
228\end{frame}
229
230\begin{frame}
231\frametitle{The LTL to LIN transform I}
232\begin{itemize}
233\item
234\texttt{Joint} clearly separates fetching from program execution
235\item
236We can vary how one works whilst fixing the other
237\item
238Linearisation is moving from fetching from a graph-based language to fetching from a list-based program representation
239\item
240The order of transformations in OCaml prototype is fixed
241\item
242Linearisation takes place at a fixed place, in the translation between LTL and LIN
243\item
244The Matita compiler is different: linearisation is a generic process
245\item
246Any graph-based language can now be linearised
247\end{itemize}
248\end{frame}
249
250\begin{frame}
251\frametitle{The LTL to LIN transform II}
252\begin{itemize}
253\item
254CompCert backend linearises much sooner than CerCo's
255\item
256Can now experiment with linearising much earlier
257\item
258Many transformations and optimisations can work fine on a linearised form
259\item
260Only place in the (current) backend that requires a graph-based language is in the ERTL pass, where we do a dataflow analysis
261\end{itemize}
262\end{frame}
263
264\section{Assembler correctness proof and structured traces}
265
266\begin{frame}
267\begin{center}
268Assembler correctness proof and structured traces
269\end{center}
270\end{frame}
271
272\begin{frame}
273\frametitle{Time not reported}
274\begin{itemize}
275\item
276We had six months of time which is not reported on in any deliverable
277\item
278We invested this time working on:
279\begin{itemize}
280\item
281The global proof sketch
282\item
283The setup of `proof infrastructure', common definitions, lemmas, invariants etc. required for main body of proof
284\item
285The proof of correctness for the assembler
286\item
287A notion of `structured traces', used throughout the compiler formalisation, as a means of eventually proving that the compiler correctly preserves costs
288\item
289Structured traces were defined in collaboration with the team at UEDIN
290\end{itemize}
291\end{itemize}
292\end{frame}
293
294\begin{frame}
295\frametitle{Assembler}
296\begin{itemize}
297\item
298After LIN, compiler spits out assembly language for MCS-51
299\item
300Assembler has pseudoinstructions similar to many commercial assembly languages
301\item
302For instance, instead of computed jumps (e.g. \texttt{SJMP} to a specific address), compiler can simply spit out a generic jump instruction to a label
303\item
304Simplifies the compiler, at the expense of introducing more proof obligations
305\item
306Now need a formalized assembler (a step further than CompCert)
307\end{itemize}
308\end{frame}
309
310\begin{frame}
311\frametitle{A problem: jump expansion}
312\begin{itemize}
313\item
314`Jump expansion' is our name for the standard `branch displacement' problem
315\item
316Given a pseudojump to a label $l$, how best can this be expanded into an assembly instruction \texttt{SJMP}, \texttt{AJMP} or \texttt{LJMP} to a concrete address?
317\item
318Problem also applies to conditional jumps
319\item
320Problem especially relevant for MCS-51 as it has a small code memory, therefore aggressive expansion of jumps into smallest possible concrete jump instruction needed
321\item
322But a known hard problem (NP-complete depending on architecture), and easy to imagine knotty configurations where size of jumps are interdependent
323\end{itemize}
324\end{frame}
325
326\begin{frame}
327\frametitle{Jump expansion I}
328\begin{itemize}
329\item
330We employed the following tactic: split the decision over how any particular pseudoinstruction is expanded from pseudoinstruction expansion
331\item
332Call the decision maker a `policy'
333\item
334We started the proof of correctness for the assembler based on the premise that a correct policy exists
335\item
336Further, we know that the assembler only fails to assemble a program if a good policy does not exist (a side-effect of using dependent types)
337\item
338A bad policy is a function that expands a given pseudojump into a concrete jump instruction that is `too small' for the distance to be jumped, or makes the program consume too much memory
339\end{itemize}
340\end{frame}
341
342\begin{frame}
343\frametitle{Jump expansion II}
344\begin{itemize}
345\item
346Jaap Boender at UNIBO has been working on a verified implementation of a good jump expansion policy for the MCS-51
347\item
348The strategy initially translates all pseudojumps as \texttt{SJMP} and then increases their size if necessary
349\item
350Termination of the procedure is proved, as well as a safety property, stating that jumps are not expanded into jumps that are too long
351\item
352His strategy is not optimal (though the computed solution is optimal for the strategy employed)
353\item
354Jaap's work is the first formal treatment of the `jump expansion problem'
355\end{itemize}
356\end{frame}
357
358\begin{frame}
359\frametitle{Assembler correctness proof}
360\begin{itemize}
361\item
362Assuming the existence of a good jump expansion property, we completed about 75\% of the correctness proof for the assembler
363\item
364Jaap's work has just been completed (modulo a few missing lemmas)
365\item
366Postponed the remainder of main assembler proof to start work on other tasks (and for Jaap to finish)
367\item
368We intend to return to proof, and publish an account of the work (possibly) as a journal paper
369\end{itemize}
370\end{frame}
371
372\begin{frame}[fragile]
373\frametitle{Who pays? I}
374\begin{columns}
375\begin{column}[b]{0.5\linewidth}
376\centering
377In C:
378\begin{lstlisting}
379int main(int argc, char** argv) {
380  cost_label1:
381    ...
382    some_function();
383  cost_label2:
384    ...
385}
386\end{lstlisting}
387\end{column}
388\begin{column}[b]{0.5\linewidth}
389\centering
390In ASM:
391\begin{lstlisting}
392    ...
393  main:
394    ...
395  cost_label1:
396    ...
397    LCALL some_function
398  cost_label2:
399    ...
400\end{lstlisting}
401\end{column}
402\end{columns}
403\begin{itemize}
404\item
405Where do we put cost labels to capture execution costs?
406\item
407Proof obligations complicated by panoply of labels
408\item
409Doesn't work well with \texttt{g(h() + 2 + f())}
410\item
411Is \texttt{cost\_label2} ever reached?
412\item
413\texttt{some\_function()} may not return correctly
414\end{itemize}
415\end{frame}
416
417\begin{frame}
418\frametitle{Who pays? II}
419\begin{itemize}
420\item
421Solution: omit \texttt{cost\_label2} and just keep \texttt{cost\_label1}
422\item
423We pay for everything `up front' when entering a function
424\item
425No need to prove \texttt{some\_function()} terminates
426\item
427But now execution of functions in CerCo takes a particular form
428\item
429Functions begin with a label, call other functions that begin with a label, eventually return, but \emph{return} to the correct place
430\item
431`Recursive structure'
432\end{itemize}
433\end{frame}
434
435\begin{frame}
436\frametitle{Structured traces I}
437\begin{itemize}
438\item
439We introduced a notion of `structured traces'
440\item
441These are intended to statically capture the (good) execution traces of a program
442\item
443To borrow a slogan: they are the `computational content of a well-formed program's execution'
444\item
445Come in two variants: inductive and coinductive
446\item
447Inductive captures program execution traces that eventually halt, coinductive ones that diverge
448\end{itemize}
449\end{frame}
450
451\begin{frame}
452\frametitle{Structured traces II}
453\begin{itemize}
454\item
455I focus on the inductive variety, as used the most (for now) in the backend
456\item
457In particular, used in the proof that static and dynamic cost computations coincide
458\item
459Traces preserved by backend compilation, initially created at RTL
460\item
461This will be explained later
462\end{itemize}
463\end{frame}
464
465\begin{frame}
466\frametitle{Structured traces III}
467\begin{itemize}
468\item
469Central insight is that program execution is always in the body of some function (from \texttt{main} onwards)
470\item
471A well formed program must have labels appearing at certain spots
472\item
473Similarly, the final instruction executed when executing a function must be a \texttt{RET}
474\item
475Execution must then continue in body of calling function, at correct place
476\item
477These invariants, and others, are crystalised in the specific syntactic form of a structured trace
478\end{itemize}
479\end{frame}
480
481\begin{frame}
482\frametitle{Recursive structure of `good' execution}
483Structure captured by structured traces:
484\begin{center}
485\includegraphics[scale=0.33]{recursive_structure.png}
486\end{center}
487\end{frame}
488
489\begin{frame}
490\frametitle{Static and dynamic costs I}
491\begin{itemize}
492\item
493Given a structured trace, we can compute its associated cost
494\item
495In previous slide, cost of trace is cost assigned to \texttt{label\_1} + \texttt{label\_2} + \texttt{label\_3} (+ \texttt{label\_4})
496\item
497This is the \emph{static} cost of a program execution
498\item
499Similarly, given a program counter and a code memory (corresponding to the trace), we can compute a \emph{dynamic cost} of a simple block
500\item
501Do this by repeatedly fetching, obtaining the next instruction, and a new program counter
502\item
503This requires some predicates defining what a `good program' and what a `good program counter' are
504\item
505Want program counters on instruction boundaries
506\end{itemize}
507\end{frame}
508
509\begin{frame}
510\frametitle{Static and dynamic costs II}
511\begin{itemize}
512\item
513We aim to prove that the dynamic and static cost calculations coincide
514\item
515This would imply that the static cost computation is correct
516\item
517This proof is surprisingly tricky to complete (about 3 man months of work so far)
518\item
519Is now about 95\% complete
520\end{itemize}
521\end{frame}
522
523\section{Changes to tools and prototypes, looking forward}
524
525\begin{frame}
526\begin{center}
527Changes to tools and prototypes, looking forward
528\end{center}
529\end{frame}
530
531\begin{frame}
532\frametitle{Changes ported to OCaml prototype}
533\begin{itemize}
534\item
535Bug fixes spotted in the formalisation so far have been merged back into the OCaml compiler
536\item
537Larger changes like the \texttt{Joint} machinery have so far not
538\item
539It is unclear whether they will be
540\item
541Just a generalisation of what is already there
542\item
543Supposed to make formalisation easier
544\item
545Further, we want to ensure that the untrusted compiler is as correct as possible, for experiments in e.g. Frama-C
546\item
547Porting a large change to the untrusted compiler would jeopardise these experiments
548\end{itemize}
549\end{frame}
550
551\begin{frame}
552\frametitle{Improvements in Matita}
553\begin{itemize}
554\item
555Part of the motivation for using Matita was for CerCo to act a `stress test'
556\item
557The proofs talked about in this talk have done this
558\item
559Many improvements to Matita have been made since the last project meeting
560\item
561These include major speed-ups of e.g. printing large goals, bug fixes, the porting of CerCo code to standard library, and more options for passing to tactics
562\end{itemize}
563\end{frame}
564
565\begin{frame}
566\frametitle{The next period}
567UNIBO has following pool of remaining manpower (postdocs):
568\begin{center}
569\begin{tabular}{ll}
570Person & Man months remaining \\
571\hline
572Boender & 10 months \\
573Mulligan & 6 months \\
574Tranquilli & 10 months \\
575\end{tabular}
576\end{center}
577\begin{itemize}
578\item
579Boender finishing assembler correctness proof
580\item
581Mulligan proofs of correctness for 1 intermediate language
582\item
583Tranquilli proofs of correctness for 2 intermediate languages
584\item
585Sacerdoti Coen `floating'
586\item
587Believe we have enough manpower to complete backend (required 21 man months)
588\end{itemize}
589\end{frame}
590
591\begin{frame}
592\frametitle{Summary}
593We have:
594\begin{itemize}
595\item
596Translated the OCaml prototype's backend intermediate languages into Matita
597\item
598Implemented the translations between languages, and given the intermediate languages a semantics
599\item
600Refactored many of the backend intermediate languages into a common, parametric `joint' language, that is later specialised
601\item
602Spotted opportunities for possibly commuting backend translation passes
603\item
604Used six months to define structured traces and start the proof of correctness for the assembler
605\item
606Distinguished our proof from CompCert's by heavy use of dependent types throughout whole compiler
607\end{itemize}
608\end{frame}
609
610\end{document}
Note: See TracBrowser for help on using the repository browser.