source: Deliverables/D1.2/Presentations/WP4-dominic.tex @ 1859

Last change on this file since 1859 was 1859, checked in by mulligan, 8 years ago

Added a new slide with the dynamic cost computation on it

File size: 18.1 KB
Line 
1\documentclass{beamer}
2
3\usepackage{amssymb}
4\usepackage[english]{babel}
5\usepackage{listings}
6\usepackage{microtype}
7
8\usetheme{Frankfurt}
9\logo{\includegraphics[height=1.0cm]{fetopen.png}}
10
11\author{Dominic Mulligan \\ Postdoc, University of Bologna}
12\title{CerCo Work Package 4}
13\date{CerCo project review meeting\\March 2012}
14
15\AtBeginSection[]
16{
17  \begin{frame}<beamer>
18    \frametitle{Outline}
19    \tableofcontents[currentsection]
20  \end{frame}
21}
22
23\lstdefinelanguage{matita-ocaml}
24  {keywords={definition,coercion,lemma,theorem,remark,inductive,record,qed,let,in,rec,match,return,with,Type,try},
25   morekeywords={[2]whd,normalize,elim,cases,destruct},
26   morekeywords={[3]type,of},
27   mathescape=true,
28  }
29
30\lstset{language=matita-ocaml,basicstyle=\scriptsize\tt,columns=flexible,breaklines=false,
31        keywordstyle=\color{blue}\bfseries,
32        keywordstyle=[2]\color{blue},
33        keywordstyle=[3]\color{blue}\bfseries,
34        commentstyle=\color{green},
35        stringstyle=\color{blue},
36        showspaces=false,showstringspaces=false,escapechar=\%}
37
38\begin{document}
39
40\begin{frame}
41\maketitle
42\end{frame}
43
44\begin{frame}
45\frametitle{Overview of progress}
46Status at end of first period:
47\begin{center}
48{\large Executable semantics of MCS-51 in OCaml and Matita}
49\end{center}
50
51~\\~\\Goals for the end of second period:
52\begin{center}
53{\large Executable semantics of back-end intermediate languages\\~\\
54Encoding of compiler back-end in CIC}
55\end{center}
56\end{frame}
57
58\begin{frame}
59\frametitle{Contents}
60\tableofcontents
61\end{frame}
62
63\section{Rationalisation of backend languages}
64
65\begin{frame}
66\frametitle{Backend intermediate languages}
67\vspace{-1em}
68\begin{small}
69\begin{tabbing}
70\quad \=\,\quad \=\ \\
71\textsf{RTLabs}\\
72\> $\downarrow$ \> copy propagation \color{red}{$\times$} \\
73\> $\downarrow$ \> instruction selection \color{green}{{\checkmark}} \\
74\> $\downarrow$ \> change of memory models in compiler \color{green}{{\checkmark}} \\
75\textsf{RTL}\\
76\> $\downarrow$ \> constant propagation \color{red}{$\times$} \\
77\> $\downarrow$ \> calling convention made explicit \color{green}{{\checkmark}} \\
78\> $\downarrow$ \> layout of activation records \color{green}{{\checkmark}} \\
79\textsf{ERTL}\\
80\> $\downarrow$ \> register allocation and spilling \color{green}{{\checkmark}} \\
81\> $\downarrow$ \> dead code elimination \color{green}{{\checkmark}} \\
82\textsf{LTL}\\
83\> $\downarrow$ \> function linearisation \color{green}{{\checkmark}} \\
84\> $\downarrow$ \> branch compression \color{red}{$\times$} \\
85\textsf{LIN}\\
86\> $\downarrow$ \> relabeling \color{green}{{\checkmark}} \\
87\textsf{ASM}
88\end{tabbing}
89\end{small}
90\end{frame}
91
92\begin{frame}
93\frametitle{\texttt{Joint}: a new approach I}
94\begin{itemize}
95\item
96Consecutive languages in backend must be similar
97\item
98Transformations between languages translate away some small specific set of features
99\item
100But looking at OCaml code, not clear precisely what differences between languages are, as code is repeated
101\item
102Not clear if translation passes can commute, for instance
103\item
104CerCo passes are in a different order to CompCert (calling convention and register allocation done in different places)
105\item
106Instruction selection done early: changing subset of instructions used would require instructions to be duplicated everywhere in backend
107\end{itemize}
108\end{frame}
109
110\begin{frame}
111\frametitle{\texttt{Joint}: a new approach II}
112\begin{itemize}
113\item
114Idea: all of these languages are just instances of a single language
115\item
116This language \texttt{Joint} is parameterised by a type of registers to be used in instructions, and so forth
117\item
118Each language after RTLabs is now just defined as the \texttt{Joint} language instantiated with some concrete types
119\item
120Similarly for semantics: common definitions that take e.g. type representing program counters as parameters
121\end{itemize}
122\end{frame}
123
124\begin{frame}[fragile]
125\frametitle{\texttt{Joint}: a new approach III}
126\texttt{Joint} instructions allow us to embed language-specific instructions:
127\begin{lstlisting}
128inductive joint_instruction (p: params__) (globals: list ident): Type[0] :=
129 | COMMENT: String $\rightarrow$ joint_instruction p globals
130 | COST_LABEL: costlabel $\rightarrow$ joint_instruction p globals
131 ...
132 | COND: acc_a_reg p $\rightarrow$ label $\rightarrow$ joint_instruction p globals
133 | extension: extend_statements p $\rightarrow$ joint_instruction p globals.
134\end{lstlisting}
135
136\begin{lstlisting}
137inductive ertl_statement_extension: Type[0] :=
138 | ertl_st_ext_new_frame: ertl_statement_extension
139 | ertl_st_ext_del_frame: ertl_statement_extension
140 | ertl_st_ext_frame_size: register $\rightarrow$ ertl_statement_extension.
141\end{lstlisting}
142\end{frame}
143
144\begin{frame}
145\frametitle{\texttt{Joint}: a new approach IV}
146\begin{itemize}
147\item
148Languages that provide extensions need to provide translations and semantics for those extensions
149\item
150Everything else can be handled at the \texttt{Joint}-level
151\item
152This modularises the handling of these languages
153\end{itemize}
154\end{frame}
155
156\begin{frame}
157\frametitle{\texttt{Joint}: advantages I}
158\begin{itemize}
159\item
160We can recover the concrete OCaml languages by instantiating parameterized types
161\item
162Why use \texttt{Joint}?
163\item
164Reduces repeated code (fewer bugs, or places to change)
165\item
166Unify some proofs, making correctness proof easier
167\item
168Generic optimizations (e.g. constant propagation)
169\end{itemize}
170\end{frame}
171
172\begin{frame}
173\frametitle{\texttt{Joint}: advantages II}
174\begin{itemize}
175\item
176Easier to add new intermediate languages as needed
177\item
178Easier to see relationship between consecutive languages at a glance
179\item
180MCS-51 instruction set embedded in \texttt{Joint} syntax
181\item
182Simplifies instruction selection
183\item
184We can investigate which translation passes commute much more easily
185\end{itemize}
186\end{frame}
187
188\begin{frame}
189\frametitle{Semantics of \texttt{Joint} I}
190\begin{itemize}
191\item
192As mentioned, use of \texttt{Joint} also unifies semantics of these languages
193\item
194We use several sets of records, which represent the state that a program is in
195\item
196These records are parametric in representations for e.g. frames
197\end{itemize}
198\end{frame}
199
200\begin{frame}
201\frametitle{A new intermediate language}
202\begin{itemize}
203\item
204Matita backend includes a new intermediate language: RTLntc
205\item
206Sits between RTL and ERTL
207\item
208RTLntc is the RTL language where all tailcalls have been eliminated
209\item
210This language is `implicit' in the OCaml compiler
211\item
212There, the RTL to ERTL transformation eliminates tailcalls as part of translation
213\item
214But including an extra, explicit intermediate language is `almost free' using the \texttt{Joint} language approach
215\end{itemize}
216\end{frame}
217
218\begin{frame}
219\frametitle{The LTL to LIN transform I}
220\begin{itemize}
221\item
222\texttt{Joint} clearly separates fetching from program execution
223\item
224We can vary how one works whilst fixing the other
225\item
226Linearisation is moving from fetching from a graph-based language to fetching from a list-based program representation
227\item
228The order of transformations in OCaml prototype is fixed
229\item
230Linearisation takes place at a fixed place, in the translation between LTL and LIN
231\item
232The Matita compiler is different: linearisation is a generic process
233\item
234Any graph-based language can now be linearised
235\end{itemize}
236\end{frame}
237
238\begin{frame}
239\frametitle{The LTL to LIN transform II}
240\begin{itemize}
241\item
242CompCert backend linearises much sooner than CerCo's
243\item
244Can now experiment with linearising much earlier
245\item
246Many transformations and optimisations can work fine on a linearised form
247\item
248Only place in the (current) backend that requires a graph-based language is in the ERTL pass, where we do a dataflow analysis
249\end{itemize}
250\end{frame}
251
252\section{Optimizing assembler correctness proof}
253
254\begin{frame}
255\frametitle{Time not reported}
256\begin{itemize}
257\item
258We had six months of time which is not reported on in any deliverable
259\item
260We invested this time working on:
261\begin{itemize}
262\item
263The global proof sketch
264\item
265The setup of `proof infrastructure', common definitions, lemmas, invariants etc. required for main body of proof
266\item
267The proof of correctness for the assembler
268\item
269A notion of `structured traces', used throughout the compiler formalisation, as a means of eventually proving that the compiler correctly preserves costs
270\item
271Structured traces were defined in collaboration with the team at UEDIN
272\end{itemize}
273\end{itemize}
274\end{frame}
275
276\begin{frame}
277\frametitle{Assembler}
278\begin{itemize}
279\item
280After LIN, compiler spits out assembly language for MCS-51
281\item
282Assembler has pseudoinstructions similar to many commercial assembly languages
283\item
284For instance, instead of computed jumps (e.g. \texttt{SJMP} to a specific address), compiler can simply spit out a generic jump instruction to a label
285\item
286Simplifies the compiler, at the expense of introducing more proof obligations
287\item
288Now need a formalized assembler (a step further than CompCert)
289\end{itemize}
290\end{frame}
291
292\begin{frame}
293\frametitle{A problem: jump expansion}
294\begin{itemize}
295\item
296`Jump expansion' is our name for the standard `branch displacement' problem
297\item
298Given a pseudojump to a label $l$, how best can this be expanded into an assembly instruction \texttt{SJMP}, \texttt{AJMP} or \texttt{LJMP} to a concrete address?
299\item
300Problem also applies to conditional jumps
301\item
302Problem especially relevant for MCS-51 as it has a small code memory, therefore aggressive expansion of jumps into smallest possible concrete jump instruction needed
303\item
304But a known hard problem (NP-complete depending on architecture), and easy to imagine knotty configurations where size of jumps are interdependent
305\end{itemize}
306\end{frame}
307
308\begin{frame}
309\frametitle{Jump expansion I}
310\begin{itemize}
311\item
312We employed the following tactic: split the decision over how any particular pseudoinstruction is expanded from pseudoinstruction expansion
313\item
314Call the decision maker a `policy'
315\item
316We started the proof of correctness for the assembler based on the premise that a correct policy exists
317\item
318Further, we know that the assembler only fails to assemble a program if a good policy does not exist (a side-effect of using dependent types)
319\item
320A bad policy is a function that expands a given pseudojump into a concrete jump instruction that is `too small' for the distance to be jumped, or makes the program consume too much memory
321\end{itemize}
322\end{frame}
323
324\begin{frame}
325\frametitle{Jump expansion II}
326\begin{itemize}
327\item
328Jaap Boender at UNIBO has been working on a verified implementation of a good jump expansion policy for the MCS-51
329\item
330The strategy initially translates all pseudojumps as \texttt{SJMP} and then increases their size if necessary
331\item
332Termination of the procedure is proved, as well as a safety property, stating that jumps are not expanded into jumps that are too long
333\item
334His strategy is not optimal (though the computed solution is optimal for the strategy employed)
335\item
336Jaap's work is the first formal treatment of the `jump expansion problem'
337\end{itemize}
338\end{frame}
339
340\begin{frame}
341\frametitle{Assembler correctness proof}
342\begin{itemize}
343\item
344Assuming the existence of a good jump expansion property, we completed about 75\% of the correctness proof for the assembler
345\item
346Jaap's work has just been completed (modulo a few missing lemmas)
347\item
348Postponed the remainder of main assembler proof to start work on other tasks (and for Jaap to finish)
349\item
350We intend to return to proof, and publish an account of the work (possibly) as a journal paper
351\end{itemize}
352\end{frame}
353
354\section{Structured traces}
355
356
357\begin{frame}[fragile]
358\frametitle{Who pays? I}
359\begin{columns}
360\begin{column}[b]{0.5\linewidth}
361\centering
362In C:
363\begin{lstlisting}
364int main(int argc, char** argv) {
365  cost_label1:
366    ...
367    some_function();
368  cost_label2:
369    ...
370}
371\end{lstlisting}
372\end{column}
373\begin{column}[b]{0.5\linewidth}
374\centering
375In ASM:
376\begin{lstlisting}
377    ...
378  main:
379    ...
380  cost_label1:
381    ...
382    LCALL some_function
383  cost_label2:
384    ...
385\end{lstlisting}
386\end{column}
387\end{columns}
388\begin{itemize}
389\item
390Where do we put cost labels to capture execution costs?
391\item
392Proof obligations complicated by panoply of labels
393\item
394Doesn't work well with \texttt{g(h() + 2 + f())}
395\item
396Is \texttt{cost\_label2} ever reached?
397\item
398\texttt{some\_function()} may not return correctly
399\end{itemize}
400\end{frame}
401
402\begin{frame}
403\frametitle{Who pays? II}
404\begin{itemize}
405\item
406Solution: omit \texttt{cost\_label2} and just keep \texttt{cost\_label1}
407\item
408We pay for everything `up front' when entering a function
409\item
410No need to prove \texttt{some\_function()} terminates
411\item
412But now execution of functions in CerCo takes a particular form
413\item
414Functions begin with a label, call other functions that begin with a label, eventually return, but \emph{return} to the correct place
415\item
416`Recursive structure'
417\end{itemize}
418\end{frame}
419
420\begin{frame}
421\frametitle{Structured traces I}
422\begin{itemize}
423\item
424We introduced a notion of `structured traces'
425\item
426These are intended to statically capture the (good) execution traces of a program
427\item
428To borrow a slogan: they are the `computational content of a well-formed program's execution'
429\item
430Come in two variants: inductive and coinductive
431\item
432Inductive captures program execution traces that eventually halt, coinductive ones that diverge
433\end{itemize}
434\end{frame}
435
436\begin{frame}
437\frametitle{Structured traces II}
438\begin{itemize}
439\item
440I focus on the inductive variety, as used the most (for now) in the backend
441\item
442In particular, used in the proof that static and dynamic cost computations coincide
443\item
444Traces preserved by backend compilation, initially created at RTL
445\item
446This will be explained later
447\end{itemize}
448\end{frame}
449
450\begin{frame}
451\frametitle{Structured traces III}
452\begin{itemize}
453\item
454Central insight is that program execution is always in the body of some function (from \texttt{main} onwards)
455\item
456A well formed program must have labels appearing at certain spots
457\item
458Similarly, the final instruction executed when executing a function must be a \texttt{RET}
459\item
460Execution must then continue in body of calling function, at correct place
461\item
462These invariants, and others, are crystalised in the specific syntactic form of a structured trace
463\end{itemize}
464\end{frame}
465
466\begin{frame}
467\frametitle{Recursive structure of `good' execution I}
468\begin{itemize}
469\item
470All calls return to the correct place\\
471\item
472\texttt{CALL}s and \texttt{JMP}s only to labels
473\end{itemize}
474\begin{center}
475\includegraphics[scale=0.33]{recursive_structure.png}
476\end{center}
477Static cost = k($l_1$) + \ldots + k($l_4$)
478\end{frame}
479
480\begin{frame}
481\frametitle{Recursive structure of `good' execution II}
482\begin{center}
483\includegraphics[scale=0.3]{dynamic_cost.png}
484\end{center}
485Dynamic cost = \texttt{clock}(Final$_1$) - \texttt{clock}(Start$_1$)
486\end{frame}
487
488\begin{frame}
489\frametitle{Static and dynamic costs I}
490\begin{itemize}
491\item
492Given a structured trace, we can compute its associated cost
493\item
494In previous slide, cost of trace is cost assigned to \texttt{label\_1} + \texttt{label\_2} + \texttt{label\_3} (+ \texttt{label\_4})
495\item
496This is the \emph{static} cost of a program execution
497\item
498Similarly, given a program counter and a code memory (corresponding to the trace), we can compute a \emph{dynamic cost} of a simple block
499\item
500Do this by repeatedly fetching, obtaining the next instruction, and a new program counter
501\item
502This requires some predicates defining what a `good program' and what a `good program counter' are
503\item
504Want program counters on instruction boundaries
505\end{itemize}
506\end{frame}
507
508\begin{frame}
509\frametitle{Static and dynamic costs II}
510\begin{itemize}
511\item
512We aim to prove that the dynamic and static cost calculations coincide
513\item
514This would imply that the static cost computation is correct
515\item
516This proof is surprisingly tricky to complete (about 3 man months of work so far)
517\item
518Is now about 95\% complete
519\end{itemize}
520\end{frame}
521
522\section{Changes to tools and prototypes, looking forward}
523
524\begin{frame}
525\frametitle{Changes ported to OCaml prototype}
526\begin{itemize}
527\item
528Bug fixes spotted in the formalisation so far have been merged back into the OCaml compiler
529\item
530Larger changes like the \texttt{Joint} machinery have so far not
531\item
532It is unclear whether they will be
533\item
534Just a generalisation of what is already there
535\item
536Supposed to make formalisation easier
537\item
538Further, we want to ensure that the untrusted compiler is as correct as possible, for experiments in e.g. Frama-C
539\item
540Porting a large change to the untrusted compiler would jeopardise these experiments
541\end{itemize}
542\end{frame}
543
544\begin{frame}
545\frametitle{Improvements in Matita}
546\begin{itemize}
547\item
548Part of the motivation for using Matita was for CerCo to act a `stress test'
549\item
550The proofs talked about in this talk have done this
551\item
552Many improvements to Matita have been made since the last project meeting
553\item
554These include major speed-ups of e.g. printing large goals, bug fixes, the porting of CerCo code to standard library, and more options for passing to tactics
555\end{itemize}
556\end{frame}
557
558\begin{frame}
559\frametitle{The next period}
560UNIBO has following pool of remaining manpower (postdocs):
561\begin{center}
562\begin{tabular}{ll}
563Person & Man months remaining \\
564\hline
565Boender & 10 months \\
566Mulligan & 6 months \\
567Tranquilli & 10 months \\
568\end{tabular}
569\end{center}
570\begin{itemize}
571\item
572Boender finishing assembler correctness proof
573\item
574Mulligan proofs of correctness for 1 intermediate language
575\item
576Tranquilli proofs of correctness for 2 intermediate languages
577\item
578Sacerdoti Coen `floating'
579\item
580Believe we have enough manpower to complete backend (required 21 man months)
581\end{itemize}
582\end{frame}
583
584\begin{frame}
585\frametitle{Summary}
586We have:
587\begin{itemize}
588\item
589Translated the OCaml prototype's backend intermediate languages into Matita
590\item
591Implemented the translations between languages, and given the intermediate languages a semantics
592\item
593Refactored many of the backend intermediate languages into a common, parametric `joint' language, that is later specialised
594\item
595Spotted opportunities for possibly commuting backend translation passes
596\item
597Used six months to define structured traces and start the proof of correctness for the assembler
598\item
599Distinguished our proof from CompCert's by heavy use of dependent types throughout whole compiler
600\end{itemize}
601\end{frame}
602
603\end{document}
Note: See TracBrowser for help on using the repository browser.