source: Deliverables/D1.2/Presentations/WP4-dominic.tex

Last change on this file was 1867, checked in by mulligan, 8 years ago

Large changes following comments by IS, JMc and CSC

File size: 10.0 KB
Line 
1\documentclass{beamer}
2
3\usepackage{amssymb}
4\usepackage[english]{babel}
5\usepackage{listings}
6\usepackage{microtype}
7
8\usetheme{Frankfurt}
9\setbeamertemplate{navigation symbols}{}
10\logo{\includegraphics[height=1.0cm]{fetopen.png}}
11
12\author{Dominic Mulligan \\ University of Bologna}
13\title{CerCo Work Package 4 \\ Verified compiler --- Back-end}
14\date{CerCo project review meeting\\March 2012}
15
16\AtBeginSection[]
17{
18  \begin{frame}<beamer>
19    \frametitle{Outline}
20    \tableofcontents[currentsection]
21  \end{frame}
22}
23
24\lstdefinelanguage{matita-ocaml}
25  {keywords={definition,coercion,lemma,theorem,remark,inductive,record,qed,let,in,rec,match,return,with,Type,try},
26   morekeywords={[2]whd,normalize,elim,cases,destruct},
27   morekeywords={[3]type,of},
28   mathescape=true,
29  }
30
31\lstset{language=matita-ocaml,basicstyle=\scriptsize\tt,columns=flexible,breaklines=false,
32        keywordstyle=\color{blue}\bfseries,
33        keywordstyle=[2]\color{blue},
34        keywordstyle=[3]\color{blue}\bfseries,
35        commentstyle=\color{green},
36        stringstyle=\color{blue},
37        showspaces=false,showstringspaces=false,escapechar=\%}
38
39\begin{document}
40
41\begin{frame}
42\maketitle
43\end{frame}
44
45\begin{frame}
46\frametitle{Achievements in period 2}
47
48\begin{description}[T4.2]
49\item[T4.2] \alert{Completed}: Matita encoding of compiler back-end
50\item[T4.3] \alert{Completed}: Executable semantics for intermediate languages
51\item[T4.4] \alert{In progress}: Correctness proofs
52\end{description}
53
54\bigskip
55Deliverables D4.2 and D4.3 submitted.
56\end{frame}
57
58\begin{frame}
59\frametitle{Contents}
60\tableofcontents
61\end{frame}
62
63\section{Unification of back-end languages}
64
65\begin{frame}
66\frametitle{Back-end compilation stages}
67\vspace{-1em}
68\begin{small}
69\begin{tabbing}
70\quad \=\,\quad \=\ \\
71\textsf{RTLabs}\\
72\> $\downarrow$ \> copy propagation \color{red}{$\times$} \\
73\> $\downarrow$ \> instruction selection \color{green}{{\checkmark}} \\
74\> $\downarrow$ \> change of memory models in compiler \color{green}{{\checkmark}} \\
75\textsf{RTL}\\
76\> $\downarrow$ \> constant propagation \color{red}{$\times$} \\
77\> $\downarrow$ \> calling convention made explicit \color{green}{{\checkmark}} \\
78\> $\downarrow$ \> layout of activation records \color{green}{{\checkmark}} \\
79\textsf{ERTL}\\
80\> $\downarrow$ \> register allocation and spilling \color{green}{{\checkmark}} \\
81\> $\downarrow$ \> dead code elimination \color{green}{{\checkmark}} \\
82\textsf{LTL}\\
83\> $\downarrow$ \> function linearisation \color{green}{{\checkmark}} \\
84\> $\downarrow$ \> branch compression \color{red}{$\times$} \\
85\textsf{LIN}\\
86\> $\downarrow$ \> relabeling \color{green}{{\checkmark}} \\
87\textsf{ASM}
88\end{tabbing}
89\end{small}
90\end{frame}
91
92\begin{frame}
93\frametitle{\texttt{Joint}: unifying intermediate languages}
94In OCaml prototype each intermediate language is defined separately, but:
95\medskip
96\begin{itemize}
97\item
98Back-end languages are all closely related
99\item
100Compiler stages make small changes
101\item
102Some stages may be repeated or exchanged
103\end{itemize}
104\bigskip
105\texttt{Joint} is a parameterized language which streamlines the back-end
106\begin{itemize}
107\item
108Each back-end language is an instance of \texttt{Joint}
109\item
110Definitions, functions and proofs are shared
111\item
112Each \texttt{Joint} instance can add custom instructions
113\end{itemize}
114\end{frame}
115
116\begin{frame}
117\frametitle{\texttt{Joint}: benefits}
118\begin{itemize}
119\item
120Reduces repeated code
121\item
122Optimizations generalize to multiple languages
123\item
124Unifies proofs, making correctness proof easier
125\item
126Allows a more flexible approach to compiler translation order
127\end{itemize}
128\begin{itemize}
129\item
130New intermediate language \structure{RTLntc} \alert{removes tailcalls}
131\item
132With \texttt{Joint}, incorporating RTLntc is cheap
133\end{itemize}
134\begin{itemize}
135\item
136The prototype, and CompCert, fix where linearisation occurs
137\item
138The Matita compiler is different: any graph-based language can be linearised
139\end{itemize}
140\end{frame}
141
142\section{Correctness proof for optimizing assembler}
143
144\begin{frame}
145\frametitle{Assembler correctness}
146\begin{itemize}
147\item
148We compute costs directly on object code
149\item
150So, unlike CompCert, we formalise the assembler and machine code (75\% complete)
151\end{itemize}
152\begin{itemize}
153\item
154Forces us to address a hard problem: branch displacement
155\item
156Solution: isolate \alert{expansion policy} from generic correctness proof
157\item
158Separately (Boender): well-defined and verified expansion policies (99\% complete)
159\item
160Jaap's work is the first machine formalised treatment of branch displacement
161\end{itemize}
162\end{frame}
163
164\section{Structured traces}
165
166
167\begin{frame}[fragile]
168\frametitle{Who pays? I}
169\begin{columns}
170\begin{column}[b]{0.5\linewidth}
171\centering
172In C:
173\begin{lstlisting}
174int main(int argc, char** argv) {
175  cost_label1:
176    ...
177    some_function();
178  cost_label2:
179    ...
180}
181\end{lstlisting}
182\end{column}
183\begin{column}[b]{0.5\linewidth}
184\centering
185In ASM:
186\begin{lstlisting}
187    ...
188  main:
189    ...
190  cost_label1:
191    ...
192    LCALL some_function
193  cost_label2:
194    ...
195\end{lstlisting}
196\end{column}
197\end{columns}
198\begin{itemize}
199\item
200Where do we put cost labels to capture execution costs?
201\item
202Proof obligations complicated by panoply of labels
203\item
204Doesn't work well with \texttt{g(h() + 2 + f())}
205\item
206Is \texttt{cost\_label2} ever reached?
207\item
208\texttt{some\_function()} may not return correctly
209\end{itemize}
210\end{frame}
211
212\begin{frame}
213\frametitle{Who pays? II}
214\begin{itemize}
215\item
216Solution: omit \texttt{cost\_label2} and just keep \texttt{cost\_label1}
217\item
218We pay for everything `up front' when entering a function
219\item
220No need to prove \texttt{some\_function()} terminates
221\item
222But now execution of functions in CerCo takes a particular form
223\item
224Functions begin with a label, call other functions that begin with a label, eventually return, but \emph{return} to the correct place
225\item
226`Recursive structure'
227\end{itemize}
228\end{frame}
229
230\begin{frame}
231\frametitle{Structured traces}
232\begin{itemize}
233\item
234We introduced a notion of `structured traces'
235\item
236These are needed to statically capture the (good) execution traces of a program
237\item
238They are the `computational content of the proof of well-labelledness'
239\item
240Translation passes \alert{must} preserve structure of traces, not just their flattening
241\end{itemize}
242\end{frame}
243
244\begin{frame}
245\frametitle{Recursive structure of `good' execution}
246\begin{itemize}
247\item
248All calls return to the correct place\\
249\item
250\texttt{CALL}s and \texttt{JMP}s only to labels
251\end{itemize}
252\begin{center}
253\includegraphics[scale=0.33]{recursive_structure.png}
254\end{center}
255\end{frame}
256
257\begin{frame}
258\frametitle{Static and dynamic costs I}
259\begin{center}
260\includegraphics[scale=0.33]{recursive_structure.png}
261\begin{tabular}[b]{ll}
262    & \texttt{emit(L1)} \\
263    & \texttt{MOV r1 0}\\
264    & \texttt{ADD r1 r2}\\
265    & \alert{\texttt{CALL f}} \\
266    & \texttt{ADD r2 r2}\\
267    & \texttt{MOV r2 0}\\
268    & \texttt{RET} \\ \\ \\ \\
269\end{tabular}
270\end{center}
271\makebox[0pt][l]{k($L_1$) = k(\texttt{MOV}) + k (\texttt{ADD}) + \ldots + k(\texttt{RET})}\\
272static-cost(trace) = k($L_1$) + \ldots + k($L_4$)\\
273dynamic-cost(trace) = \texttt{clock}(Final$_1$) - \texttt{clock}(Start$_1$)\\
274\alert{Theorem: static-cost(trace) = dynamic-cost(trace)}
275\end{frame}
276
277\begin{frame}
278\frametitle{Static and dynamic costs II}
279This proof is surprisingly tricky to complete ($\approx$ 3 man months)
280\begin{itemize}
281\item Issues
282 \begin{itemize}
283  \item Finite memory\\
284   the PC can overflow during fetching and/or execution
285  \item Variable length instructions\\
286    not all addresses corresponds to instruction boundaries
287 \end{itemize}
288\item
289Requires predicates defining `good program' and `good program counter'
290\item
291Is now about 95\% complete
292\end{itemize}
293\end{frame}
294
295\section{Feedback to Matita and the OCaml prototype}
296
297\begin{frame}
298\frametitle{Changes ported to OCaml prototype}
299\begin{itemize}
300\item
301Bug fixes spotted in the formalisation so far have been merged back into the OCaml prototype compiler
302\item
303Larger changes like the \texttt{Joint} machinery have so far not
304\item
305Unclear whether they will be: their real value is in streamlining formalisation
306\item
307Further, we want to ensure that the untrusted compiler is as correct as possible, for experiments in e.g. Frama-C
308\item
309Porting a large change to the untrusted compiler would jeopardise these experiments
310\end{itemize}
311\end{frame}
312
313\begin{frame}
314\frametitle{Improvements in Matita}
315\begin{itemize}
316\item
317Part of the motivation for using Matita was for CerCo to act a `stress test'
318\item
319The proofs talked about in this talk have done this
320\item
321Many improvements to Matita have been made since the last project meeting
322\item
323These include major speed-ups of e.g. printing large goals, bug fixes, the porting of CerCo code to standard library, and more options for passing to tactics
324\end{itemize}
325\end{frame}
326
327\begin{frame}
328\frametitle{The next period}
329UNIBO has following pool of remaining manpower (postdocs):
330\begin{center}
331\begin{tabular}{ll}
332Person & Man months remaining \\
333\hline
334Boender & 10 months \\
335Mulligan & 6 months \\
336Tranquilli & 10 months \\
337\end{tabular}
338\end{center}
339\begin{itemize}
340\item
341Boender finishing assembler correctness proof
342\item
343Mulligan proofs of correctness for 1 intermediate language
344\item
345Tranquilli proofs of correctness for 2 intermediate languages
346\item
347Permanents `floating'
348\item
349Believe we have enough manpower to complete back-end (required 21 man months)
350\end{itemize}
351\end{frame}
352
353\begin{frame}
354\frametitle{Summary}
355We have:
356\begin{itemize}
357\item
358Translated the OCaml prototype's back-end intermediate languages into Matita
359\item
360Implemented the translations between languages, and given the intermediate languages a semantics
361\item
362Refactored many of the back-end intermediate languages into a common, parametric `joint' language, that is later specialised
363\item
364Spotted opportunities for possibly commuting back-end translation passes
365\item
366Used six months to define structured traces and start the proof of correctness for the assembler
367\item
368Distinguished our proof from CompCert's by heavy use of dependent types throughout whole compiler
369\end{itemize}
370\end{frame}
371
372\end{document}
Note: See TracBrowser for help on using the repository browser.