source: Deliverables/D1.2/Presentations/WP4-dominic.tex @ 1867

Last change on this file since 1867 was 1867, checked in by mulligan, 8 years ago

Large changes following comments by IS, JMc and CSC

File size: 10.0 KB
RevLine 
[1844]1\documentclass{beamer}
[1813]2
[1840]3\usepackage{amssymb}
[1813]4\usepackage[english]{babel}
5\usepackage{listings}
6\usepackage{microtype}
7
[1822]8\usetheme{Frankfurt}
[1867]9\setbeamertemplate{navigation symbols}{}
[1822]10\logo{\includegraphics[height=1.0cm]{fetopen.png}}
11
[1867]12\author{Dominic Mulligan \\ University of Bologna}
13\title{CerCo Work Package 4 \\ Verified compiler --- Back-end}
[1813]14\date{CerCo project review meeting\\March 2012}
15
[1854]16\AtBeginSection[]
17{
18  \begin{frame}<beamer>
19    \frametitle{Outline}
20    \tableofcontents[currentsection]
21  \end{frame}
22}
23
[1813]24\lstdefinelanguage{matita-ocaml}
25  {keywords={definition,coercion,lemma,theorem,remark,inductive,record,qed,let,in,rec,match,return,with,Type,try},
26   morekeywords={[2]whd,normalize,elim,cases,destruct},
27   morekeywords={[3]type,of},
28   mathescape=true,
29  }
30
31\lstset{language=matita-ocaml,basicstyle=\scriptsize\tt,columns=flexible,breaklines=false,
[1854]32        keywordstyle=\color{blue}\bfseries,
[1813]33        keywordstyle=[2]\color{blue},
34        keywordstyle=[3]\color{blue}\bfseries,
35        commentstyle=\color{green},
36        stringstyle=\color{blue},
[1854]37        showspaces=false,showstringspaces=false,escapechar=\%}
[1813]38
39\begin{document}
40
41\begin{frame}
42\maketitle
43\end{frame}
44
45\begin{frame}
[1867]46\frametitle{Achievements in period 2}
[1813]47
[1867]48\begin{description}[T4.2]
49\item[T4.2] \alert{Completed}: Matita encoding of compiler back-end
50\item[T4.3] \alert{Completed}: Executable semantics for intermediate languages
51\item[T4.4] \alert{In progress}: Correctness proofs
52\end{description}
53
54\bigskip
55Deliverables D4.2 and D4.3 submitted.
[1813]56\end{frame}
57
58\begin{frame}
[1840]59\frametitle{Contents}
60\tableofcontents
[1813]61\end{frame}
62
[1867]63\section{Unification of back-end languages}
[1813]64
65\begin{frame}
[1867]66\frametitle{Back-end compilation stages}
[1847]67\vspace{-1em}
[1813]68\begin{small}
69\begin{tabbing}
[1840]70\quad \=\,\quad \=\ \\
[1813]71\textsf{RTLabs}\\
[1847]72\> $\downarrow$ \> copy propagation \color{red}{$\times$} \\
73\> $\downarrow$ \> instruction selection \color{green}{{\checkmark}} \\
74\> $\downarrow$ \> change of memory models in compiler \color{green}{{\checkmark}} \\
[1813]75\textsf{RTL}\\
[1847]76\> $\downarrow$ \> constant propagation \color{red}{$\times$} \\
77\> $\downarrow$ \> calling convention made explicit \color{green}{{\checkmark}} \\
78\> $\downarrow$ \> layout of activation records \color{green}{{\checkmark}} \\
[1813]79\textsf{ERTL}\\
[1847]80\> $\downarrow$ \> register allocation and spilling \color{green}{{\checkmark}} \\
81\> $\downarrow$ \> dead code elimination \color{green}{{\checkmark}} \\
[1813]82\textsf{LTL}\\
[1847]83\> $\downarrow$ \> function linearisation \color{green}{{\checkmark}} \\
84\> $\downarrow$ \> branch compression \color{red}{$\times$} \\
[1813]85\textsf{LIN}\\
[1847]86\> $\downarrow$ \> relabeling \color{green}{{\checkmark}} \\
[1840]87\textsf{ASM}
[1813]88\end{tabbing}
89\end{small}
90\end{frame}
91
92\begin{frame}
[1867]93\frametitle{\texttt{Joint}: unifying intermediate languages}
94In OCaml prototype each intermediate language is defined separately, but:
95\medskip
[1815]96\begin{itemize}
97\item
[1867]98Back-end languages are all closely related
[1815]99\item
[1867]100Compiler stages make small changes
[1815]101\item
[1867]102Some stages may be repeated or exchanged
[1815]103\end{itemize}
[1867]104\bigskip
105\texttt{Joint} is a parameterized language which streamlines the back-end
[1815]106\begin{itemize}
107\item
[1867]108Each back-end language is an instance of \texttt{Joint}
[1815]109\item
[1867]110Definitions, functions and proofs are shared
[1815]111\item
[1867]112Each \texttt{Joint} instance can add custom instructions
[1815]113\end{itemize}
[1813]114\end{frame}
115
[1840]116\begin{frame}
[1867]117\frametitle{\texttt{Joint}: benefits}
[1824]118\begin{itemize}
119\item
[1867]120Reduces repeated code
[1824]121\item
[1867]122Optimizations generalize to multiple languages
[1840]123\item
[1867]124Unifies proofs, making correctness proof easier
[1815]125\item
[1867]126Allows a more flexible approach to compiler translation order
[1840]127\end{itemize}
128\begin{itemize}
[1815]129\item
[1867]130New intermediate language \structure{RTLntc} \alert{removes tailcalls}
[1815]131\item
[1867]132With \texttt{Joint}, incorporating RTLntc is cheap
[1815]133\end{itemize}
134\begin{itemize}
135\item
[1867]136The prototype, and CompCert, fix where linearisation occurs
[1816]137\item
[1867]138The Matita compiler is different: any graph-based language can be linearised
[1815]139\end{itemize}
[1813]140\end{frame}
141
[1867]142\section{Correctness proof for optimizing assembler}
[1813]143
144\begin{frame}
[1867]145\frametitle{Assembler correctness}
[1813]146\begin{itemize}
147\item
[1867]148We compute costs directly on object code
[1824]149\item
[1867]150So, unlike CompCert, we formalise the assembler and machine code (75\% complete)
[1813]151\end{itemize}
152\begin{itemize}
153\item
[1867]154Forces us to address a hard problem: branch displacement
[1813]155\item
[1867]156Solution: isolate \alert{expansion policy} from generic correctness proof
[1813]157\item
[1867]158Separately (Boender): well-defined and verified expansion policies (99\% complete)
[1824]159\item
[1867]160Jaap's work is the first machine formalised treatment of branch displacement
[1813]161\end{itemize}
162\end{frame}
163
[1854]164\section{Structured traces}
165
166
[1829]167\begin{frame}[fragile]
168\frametitle{Who pays? I}
[1840]169\begin{columns}
170\begin{column}[b]{0.5\linewidth}
171\centering
[1847]172In C:
[1829]173\begin{lstlisting}
174int main(int argc, char** argv) {
[1840]175  cost_label1:
[1829]176    ...
177    some_function();
[1840]178  cost_label2:
[1829]179    ...
180}
181\end{lstlisting}
[1840]182\end{column}
183\begin{column}[b]{0.5\linewidth}
184\centering
[1847]185In ASM:
[1840]186\begin{lstlisting}
[1847]187    ...
[1840]188  main:
189    ...
190  cost_label1:
191    ...
192    LCALL some_function
193  cost_label2:
194    ...
195\end{lstlisting}
196\end{column}
197\end{columns}
[1829]198\begin{itemize}
199\item
[1847]200Where do we put cost labels to capture execution costs?
[1829]201\item
202Proof obligations complicated by panoply of labels
203\item
204Doesn't work well with \texttt{g(h() + 2 + f())}
205\item
[1847]206Is \texttt{cost\_label2} ever reached?
[1829]207\item
208\texttt{some\_function()} may not return correctly
209\end{itemize}
210\end{frame}
211
[1813]212\begin{frame}
[1829]213\frametitle{Who pays? II}
214\begin{itemize}
215\item
[1840]216Solution: omit \texttt{cost\_label2} and just keep \texttt{cost\_label1}
[1829]217\item
218We pay for everything `up front' when entering a function
219\item
220No need to prove \texttt{some\_function()} terminates
221\item
222But now execution of functions in CerCo takes a particular form
223\item
224Functions begin with a label, call other functions that begin with a label, eventually return, but \emph{return} to the correct place
225\item
226`Recursive structure'
227\end{itemize}
228\end{frame}
229
230\begin{frame}
[1867]231\frametitle{Structured traces}
[1813]232\begin{itemize}
233\item
234We introduced a notion of `structured traces'
235\item
[1867]236These are needed to statically capture the (good) execution traces of a program
[1813]237\item
[1867]238They are the `computational content of the proof of well-labelledness'
[1829]239\item
[1867]240Translation passes \alert{must} preserve structure of traces, not just their flattening
[1813]241\end{itemize}
242\end{frame}
243
244\begin{frame}
[1867]245\frametitle{Recursive structure of `good' execution}
[1813]246\begin{itemize}
247\item
[1859]248All calls return to the correct place\\
249\item
250\texttt{CALL}s and \texttt{JMP}s only to labels
251\end{itemize}
[1847]252\begin{center}
253\includegraphics[scale=0.33]{recursive_structure.png}
254\end{center}
255\end{frame}
256
257\begin{frame}
[1867]258\frametitle{Static and dynamic costs I}
[1859]259\begin{center}
[1860]260\includegraphics[scale=0.33]{recursive_structure.png}
261\begin{tabular}[b]{ll}
[1867]262    & \texttt{emit(L1)} \\
[1861]263    & \texttt{MOV r1 0}\\
264    & \texttt{ADD r1 r2}\\
[1867]265    & \alert{\texttt{CALL f}} \\
[1861]266    & \texttt{ADD r2 r2}\\
267    & \texttt{MOV r2 0}\\
268    & \texttt{RET} \\ \\ \\ \\
[1860]269\end{tabular}
[1859]270\end{center}
[1867]271\makebox[0pt][l]{k($L_1$) = k(\texttt{MOV}) + k (\texttt{ADD}) + \ldots + k(\texttt{RET})}\\
272static-cost(trace) = k($L_1$) + \ldots + k($L_4$)\\
[1861]273dynamic-cost(trace) = \texttt{clock}(Final$_1$) - \texttt{clock}(Start$_1$)\\
274\alert{Theorem: static-cost(trace) = dynamic-cost(trace)}
[1859]275\end{frame}
276
277\begin{frame}
[1813]278\frametitle{Static and dynamic costs II}
[1860]279This proof is surprisingly tricky to complete ($\approx$ 3 man months)
[1813]280\begin{itemize}
[1860]281\item Issues
282 \begin{itemize}
283  \item Finite memory\\
284   the PC can overflow during fetching and/or execution
285  \item Variable length instructions\\
286    not all addresses corresponds to instruction boundaries
287 \end{itemize}
[1813]288\item
[1860]289Requires predicates defining `good program' and `good program counter'
[1813]290\item
[1824]291Is now about 95\% complete
[1813]292\end{itemize}
293\end{frame}
294
[1867]295\section{Feedback to Matita and the OCaml prototype}
[1840]296
[1813]297\begin{frame}
[1840]298\frametitle{Changes ported to OCaml prototype}
[1821]299\begin{itemize}
300\item
[1867]301Bug fixes spotted in the formalisation so far have been merged back into the OCaml prototype compiler
[1821]302\item
303Larger changes like the \texttt{Joint} machinery have so far not
304\item
[1867]305Unclear whether they will be: their real value is in streamlining formalisation
[1821]306\item
[1824]307Further, we want to ensure that the untrusted compiler is as correct as possible, for experiments in e.g. Frama-C
308\item
309Porting a large change to the untrusted compiler would jeopardise these experiments
[1821]310\end{itemize}
[1813]311\end{frame}
312
313\begin{frame}
314\frametitle{Improvements in Matita}
315\begin{itemize}
316\item
317Part of the motivation for using Matita was for CerCo to act a `stress test'
318\item
319The proofs talked about in this talk have done this
320\item
321Many improvements to Matita have been made since the last project meeting
322\item
323These include major speed-ups of e.g. printing large goals, bug fixes, the porting of CerCo code to standard library, and more options for passing to tactics
324\end{itemize}
325\end{frame}
326
327\begin{frame}
[1824]328\frametitle{The next period}
[1829]329UNIBO has following pool of remaining manpower (postdocs):
330\begin{center}
331\begin{tabular}{ll}
332Person & Man months remaining \\
333\hline
[1840]334Boender & 10 months \\
[1829]335Mulligan & 6 months \\
336Tranquilli & 10 months \\
337\end{tabular}
338\end{center}
339\begin{itemize}
340\item
341Boender finishing assembler correctness proof
342\item
343Mulligan proofs of correctness for 1 intermediate language
344\item
345Tranquilli proofs of correctness for 2 intermediate languages
346\item
[1861]347Permanents `floating'
[1829]348\item
[1867]349Believe we have enough manpower to complete back-end (required 21 man months)
[1829]350\end{itemize}
[1824]351\end{frame}
352
353\begin{frame}
[1813]354\frametitle{Summary}
355We have:
356\begin{itemize}
357\item
[1867]358Translated the OCaml prototype's back-end intermediate languages into Matita
[1813]359\item
360Implemented the translations between languages, and given the intermediate languages a semantics
361\item
[1867]362Refactored many of the back-end intermediate languages into a common, parametric `joint' language, that is later specialised
[1813]363\item
[1867]364Spotted opportunities for possibly commuting back-end translation passes
[1813]365\item
[1824]366Used six months to define structured traces and start the proof of correctness for the assembler
367\item
368Distinguished our proof from CompCert's by heavy use of dependent types throughout whole compiler
[1813]369\end{itemize}
370\end{frame}
371
372\end{document}
Note: See TracBrowser for help on using the repository browser.