source: Deliverables/D1.2/Presentations/WP3.tex

Last change on this file was 1866, checked in by campbell, 8 years ago

Drop "extra detail" from WP3.

File size: 16.9 KB
RevLine 
[1830]1\documentclass[nopanel]{beamer}
[1845]2\usepackage{graphicx,color}
[1825]3
4\usetheme{Frankfurt}
5\logo{\includegraphics[height=1.0cm]{fetopen.png}}
6
7\usepackage[english]{babel}
8%\usepackage{inputenc}
9
10
11%\documentclass[nopanel,ucs]{beamer}
12\usepackage[utf8x]{inputenc}
13%\usepackage[T1,OT1]{fontenc}
14%\usepackage{pifont}
15%\usepackage{aeguill}
16% using aeguill forces us to T1 everywhere, resulting in horribleness
17%  \def\selectguillfont{\fontencoding{T1}\fontfamily{cmtt}\selectfont}%
18%\def\guillemotleft{{\selectguillfont\symbol{19}}}%
19%\def\guillemotright{{\selectguillfont\symbol{20}}}%
20%\SetUnicodeOption{postscript}
21%\usepackage{hyperref}
22\usepackage{latexsym}
23\usepackage{listings}
24
25\def\cerco{CerCo}
26\def\ocaml{OCaml}
27
28\newcommand{\red}[1]{\textcolor{red}{#1}}
29\newcommand{\blue}[1]{\textcolor{blue}{#1}}
30\definecolor{darkgreen}{rgb}{0,0.4,0}
31\newcommand{\green}[1]{\textcolor{darkgreen}{#1}}
32\newcommand{\gray}[1]{\textcolor{gray}{#1}}
33
34%\setbeamertemplate{footline}[page number]
35%\setbeamertemplate{sidebar right}{}
[1852]36\setbeamertemplate{navigation symbols}{}
[1825]37
38\lstdefinelanguage{coq}
39  {keywords={Definition,Lemma,Theorem,Remark,Qed,Save,Inductive,Record},
40   morekeywords={[2]if,then,else},
41  }
42
43\lstdefinelanguage{matita}
44  {keywords={definition,lemma,theorem,remark,inductive,record,qed,let,rec,match,with,Type,and,on,return},
45   morekeywords={[2]whd,normalize,elim,cases,destruct},
46   mathescape=true,
47   morecomment=[n]{(*}{*)},
48  }
49
50\lstset{language=matita,basicstyle=\small\tt,columns=flexible,breaklines=false,
51        keywordstyle=\color{red}\bfseries,
52        keywordstyle=[2]\color{blue},
[1858]53        commentstyle=\color{darkgreen},
[1825]54        stringstyle=\color{blue},
55        showspaces=false,showstringspaces=false}
56
57\lstset{extendedchars=false}
58\lstset{inputencoding=utf8x}
59\DeclareUnicodeCharacter{8797}{:=}
60\DeclareUnicodeCharacter{10746}{++}
61\DeclareUnicodeCharacter{9001}{\ensuremath{\langle}}
62\DeclareUnicodeCharacter{9002}{\ensuremath{\rangle}}
63
64
65\begin{document}
66
67\title{CerCo Work Package 3\\
68Verified Compiler --- Front-end}
69\author{Brian Campbell\\ \scriptsize\smallskip
70\href{mailto:Brian.Campbell@ed.ac.uk}{\texttt{Brian.Campbell@ed.ac.uk}}}
71\institute{Laboratory for Foundations of Computer Science\\
72The University of Edinburgh}
73\date{March 16, 2011}
74\frame{\titlepage}
75
76
77\frame{
[1852]78\frametitle{Achievements in period 2}
[1825]79
[1852]80\begin{description}[T3.2]
81\item[T3.2] \alert{Completed}: Matita encoding of compiler front-end
82\item[T3.3] \alert{Completed}: Executable semantics for intermediate  languages
83\item[T3.4] \alert{In progress}: Correctness proofs
[1825]84\end{description}
85
86\bigskip
[1852]87Deliverables D3.2 and D3.3 submitted.
[1825]88
[1852]89\bigskip
90This talk is an interleaved presentation of these results.
91
92
[1825]93}
94
95
96\frame{
97\frametitle{Outline}
98
99\begin{itemize}
[1852]100\item Shared definitions
[1863]101%\item The front-end:
[1852]102%\end{itemize}
103%\vspace{-3ex}
104\item \begin{tabbing}
[1825]105\quad \= $\downarrow$ \quad \= \kill
106\gray{\textsf{C} (unformalized)}\\
107\> \gray{$\downarrow$} \> \gray{CIL parser (unformalized \ocaml)}\\
108\textsf{Clight}\\
109\> $\downarrow$ \> cast removal\\
[1836]110\> \gray{$\downarrow$} \> \gray{add runtime functions}\\
111\> $\downarrow$ \> cost labelling\\
[1825]112\> $\downarrow$ \> stack variable allocation and control structure
113 simplification\\
114\textsf{Cminor}\\
115\> $\downarrow$ \> generate global variable initialisation code\\
116\> $\downarrow$ \> transform to RTL graph\\
117\textsf{RTLabs}\\
[1863]118\> \,\gray{\vdots} \> \gray{start of target specific back-end}
[1825]119\end{tabbing}
[1852]120%\vspace{-3ex}
121%\begin{itemize}
[1836]122\item Structured traces
[1825]123\end{itemize}
124
125}
126
[1852]127\section{Shared}
[1825]128
129\begin{frame}[fragile]
[1852]130\frametitle{Shared definitions}
[1825]131
[1852]132Identifiers: variables, cost labels, CFG labels, \dots
[1825]133
134\begin{itemize}
135\item Represented by arbitrarily large binary numbers and trees
136\begin{itemize}
[1852]137\item D3.3 `lazy failure' approach reusing existing structures
138\item Invariant's types may depend on success of name generation
139%\item Use this CompCert-like system instead
[1825]140\end{itemize}
141\item Tags for some type safety:
142\begin{lstlisting}[language=matita]
143inductive identifier (tag:String) : Type[0] ≝
144  an_identifier : Word $\rightarrow$ identifier tag.
145\end{lstlisting}
146\end{itemize}
147
[1852]148Memory, global environments from D3.1\\
149Bit vector based arithmetic from D4.1
[1825]150  \begin{itemize}
151  \item added extra operations, increased efficiency
152  \end{itemize}
[1852]153%Common record type for small-step executable semantics
154%  \begin{itemize}
155%  \item used for animation
156%  \item intended to be used when defining simulations
157%  \end{itemize}
[1825]158
159
[1852]160
[1825]161\textsf{Cminor} and \textsf{RTLabs} share operations on values.
162
163
[1852]164\end{frame}
[1825]165
[1830]166\section{Clight}
[1825]167\frame{
168\frametitle{Clight: syntax and semantics}
169
[1863]170Modest evolution from D3.1:
[1825]171
172\medskip
173\begin{itemize}
174\item use precise bit vectors instead of integer and range proof
175\item adapting to changes in common definitions
176\item added function to produce fresh name generator
177\begin{itemize}
178\item used for adding temporary variables
179\item works by finding largest existing identifier
180\item have shown necessary freshness proof
181\end{itemize}
182\end{itemize}
183}
184
185\begin{frame}[fragile]
186\frametitle{Clight: cast simplification}
187
[1852]188C insists on arithmetic promotion
[1825]189\begin{itemize}
190\item CIL-based \ocaml{} parser adds suitable casts
191\item bad for our target (32-bit ops instead of 8-bit)
192\end{itemize}
193
[1852]194\medskip
195Prototype recognises fixed patterns to simplify:
[1825]196\[ (t)((t_1)e_1\ op\ (t_2)e_2) \]
[1852]197\vspace{-4ex}
[1825]198\begin{itemize}
199\item Deep pattern matching is awkward in Matita
200\item Misses (e.g.) \lstinline[language=C]'(char)((int)a+(int)b+(int)c)'
201\end{itemize}
202
[1852]203\medskip
204\red{Instead}: recursive `coerce to desired type' approach.
205
206Have done some preliminary proofs that this
207works.
[1845]208% (Doesn't get some things,
209% e.g., \lstinline'==')
[1825]210\end{frame}
211
212\frame{
[1836]213\frametitle{Clight: cost labelling}
[1825]214
215Adds cost labels to Clight program.
216
217\begin{itemize}
[1852]218\item a simple recursive function, like prototype
[1825]219\item uses common identifiers definition to produce fresh cost labels
220\end{itemize}
221
222The cost labelling is sound and precise, so we will be able to prove
223some syntactic properties required later:
224\begin{enumerate}
225\item every function starts with a cost label
226\item every branching instruction is followed by a cost label
227\item the head of every loop (including \lstinline'goto's) is a cost label
228\end{enumerate}
229
230}
231
[1830]232\section{Cminor}
[1825]233\begin{frame}[fragile]
234\frametitle{Cminor syntax and semantics}
235
[1852]236Similar to CompCert's --- but developed from prototype.
[1825]237
238\begin{itemize}
239\item Some type enforcement for expressions:
240\end{itemize}
241
[1836]242\begin{lstlisting}[language=matita,basicstyle=\footnotesize\tt]
[1825]243inductive expr : typ $\rightarrow$ Type[0] ≝
244| Id : $\forall$t. ident $\rightarrow$ expr t
245| Cst : $\forall$t. constant $\rightarrow$ expr t
[1836]246| Op1 : $\forall$t,t'. unary_operation t t' $\rightarrow$ expr t $\rightarrow$ expr t'
[1825]247| Op2 : $\forall$t1,t2,t'. binary_operation $\rightarrow$ expr t1 $\rightarrow$ expr t2 $\rightarrow$ expr t'
248| Mem : $\forall$t,r. memory_chunk $\rightarrow$ expr (ASTptr r) $\rightarrow$ expr t
249| Cond : $\forall$sz,sg,t. expr (ASTint sz sg) $\rightarrow$ expr t $\rightarrow$ expr t $\rightarrow$ expr t
250| Ecost : $\forall$t. costlabel $\rightarrow$ expr t $\rightarrow$ expr t.
251\end{lstlisting}
252\begin{itemize}
[1863]253%\item enforces some checking during production
254\item indirectly forces checking that temporaries are fresh
255\item[$\star$] reduces failure cases in translation to RTLabs
[1825]256\end{itemize}
257
258Semantics unexciting small-step function.
259\end{frame}
260
261\begin{frame}
262\frametitle{Adding invariants}
263
264Lots of sources of failure in front-end due to badly formed programs:
265\begin{itemize}
266\item missing variables, graph nodes, \dots
267\item badly structured programs
268\end{itemize}
269
270\bigskip
271Have been refining the intermediate languages to rule these out:
272\begin{enumerate}
273\item \gray{Indexing Cminor syntax by nesting depth}
274\item adding type constraints
275\item adding invariants asserting presence of variables, etc.
276\end{enumerate}
277
278\end{frame}
279
280\begin{frame}[fragile]
281\frametitle{Invariants for identifiers}
282
283
[1852]284Invariants change between languages and compilation:
285
[1825]286\begin{description}
287\item[syntax] constraints between parts of function body and other information
288about the function
289\item[compilation] constraints between statements and expressions and data structures in the translation
290\end{description}
291
[1852]292When using or creating function definitions have proved that we can switch between
293invariants:
[1836]294\begin{lstlisting}[language=matita,basicstyle=\footnotesize\tt]
[1825]295lemma populates_env : $\forall$l,e,u,l',e',u'.
[1836]296  distinct_env ?? l $\rightarrow$                    (* distinct names in l *)
297  populate_env e u l = $\langle$l',e',u'$\rangle$ $\rightarrow$ (* build register mapping *)
298  $\forall$i,t. Exists ? ($\lambda$x.x = $\langle$i,t$\rangle$) l $\rightarrow$ (* Anything in the environment... *)
[1852]299      Env_has e' i t.      (* maps to something of the correct type *)
[1825]300\end{lstlisting}
301
[1852]302\vfill
[1825]303\end{frame}
304
305\begin{frame}[fragile]
306\frametitle{Invariants in Cminor}
307
308Embedded invariant in the function record:
[1852]309\begin{lstlisting}[language=matita,basicstyle=\scriptsize\tt,morekeywords=f_inv]
[1825]310record internal_function : Type[0] ≝
311{ f_return     : option typ
312; f_params     : list (ident $\times$ typ)
313; f_vars       : list (ident $\times$ typ)
314; f_stacksize : nat
315; f_body       : stmt
316; f_inv        :
317  stmt_P ($\lambda$s.
318    stmt_vars   ($\lambda$i,t.Exists ? ($\lambda$x. x = $\langle$i,t$\rangle$) (f_params @ f_vars)) s $\wedge$
319    stmt_labels ($\lambda$l.Exists ? ($\lambda$l'.l' = l) (labels_of f_body))     s)
320  f_body
321}.
322\end{lstlisting}
323\begin{enumerate}
324\item All variables are in the parameters or locals lists\\
[1836]325\quad --- with the correct type
[1825]326\item All \lstinline'goto' labels mentioned are defined in the body
327\end{enumerate}
328
329\end{frame}
330
331
[1830]332\section{Clight to Cminor}
333
[1825]334\begin{frame}
335\frametitle{Clight to Cminor}
336
337Two main jobs:
338\begin{enumerate}
339\item make memory allocation of variables explicit
340\item use simpler control structures
341\end{enumerate}
342Again, based on prototype rather than CompCert.
343
344\bigskip
345Added type checking:
346\begin{itemize}
347\item satisfies the restrictions for Cminor expressions
348\item could separate out, have a `nice Clight' language
349\end{itemize}
[1852]350Similarly, code checks
351\begin{itemize}
352\item variable environments are well-formed
353\item all \lstinline'goto' labels are translated
354\end{itemize}
[1825]355\end{frame}
356
357\begin{frame}
358\frametitle{Clight to Cminor proofs}
359
[1852]360Beyond the invariants already shown, we will prove:
361
362\medskip
[1825]363\begin{enumerate}
[1852]364\item a simulation relation
[1825]365  \begin{itemize}
[1852]366  \item between statement and continuation pairs
367  \item using memory injection (similar to CompCert)
[1825]368  \end{itemize}
369\item syntactic cost labelling properties are preserved
370  \begin{itemize}
371  \item structural induction on function body
372  \end{itemize}
373\end{enumerate}
374\end{frame}
375
[1830]376\section{Initialisation}
[1825]377\begin{frame}
378\frametitle{Cminor: initialization}
379
380Replace initialization data by code in the initial function.
381
382\begin{itemize}
383\item straightforward to define
384\item instantiate a slightly different version of the semantics for it
385\begin{itemize}
386\item Can't accidentally forget the pass
387\end{itemize}
388\end{itemize}
389
390\bigskip
391Correctness should follow because the state after the initialisation will
392be the same as the initial state of the original.
393\end{frame}
394
[1830]395\section{RTLabs}
[1825]396\begin{frame}[fragile]
397\frametitle{RTLabs: syntax and semantics}
398
399Register Transfer Language with front-end operations.
400
401Control flow graph implemented by generic identifiers map:
402
[1836]403\begin{lstlisting}[language=matita,basicstyle=\footnotesize\tt]
[1852]404definition label := identifier LabelTag.
405definition graph : Type[0] $\rightarrow$ Type[0] := identifier_map LabelTag.
[1825]406
407inductive statement : Type[0] ≝
408| St_skip : label $\rightarrow$ statement
409| St_const : register $\rightarrow$ constant $\rightarrow$ label $\rightarrow$ statement
410| St_op1 : $\forall$t,t'. unary_operation t' t $\rightarrow$ register
411   $\rightarrow$ register $\rightarrow$ label $\rightarrow$ statement
412...
413\end{lstlisting}
[1852]414\vspace{-1.5ex}
[1825]415\begin{itemize}
416\item Shares basic operations (including semantics) with Cminor.
417\item Tags prevent confusion of labels (graph vs.~goto vs.~cost).
418\end{itemize}
419
[1852]420Semantics straightforward interpretation of statements.
[1825]421\end{frame}
422
423\begin{frame}[fragile]
424\frametitle{RTLabs: syntax and semantics}
425
426\begin{lstlisting}[language=matita]
427record internal_function : Type[0] ≝
428{ f_labgen    : universe LabelTag
429; f_reggen    : universe RegisterTag
430...
431; f_graph     : graph statement
432; f_closed    : graph_closed f_graph
433; f_typed     : graph_typed (f_locals @ f_params) f_graph
434...
435\end{lstlisting}
436
437Enforce that
438\begin{itemize}
439\item every statement's successor labels are present in the CFG
440\item every statement should be well-typed (for limited type system)
441\end{itemize}
442
443\end{frame}
444
[1830]445\section{Cminor to RTLabs}
[1825]446\begin{frame}
447\frametitle{Cminor to RTLabs}
448
449Break down statements and expressions into RTL graph.
450
451\begin{itemize}
452\item Incrementally build function backwards
453\item all state is in function record, like prototype
454\end{itemize}
455
[1852]456%\bigskip
457%May investigate whether a state monad makes invariant management easier.
[1825]458
459\bigskip
460Showing graph closure requires
461\begin{itemize}
462\item monotonicity of graph construction
463\item eventual insertion of all \lstinline'goto' destinations
464\end{itemize}
465
466Carry these along in the partially built function record.
467
468\end{frame}
469
470% \begin{frame}[fragile]
471% \frametitle{Cminor nesting depth}
472
473% Want to prevent
474% \begin{lstlisting}[language=C]
475% void f() {
476%   block { loop { exit 5; } }
477% }
478% \end{lstlisting}
479
480% Index statements by block depth
481% \begin{lstlisting}[language=matita]
482% inductive stmt : $\forall$blockdepth:nat. Type[0] ≝
483% | St_skip : $\forall$n. stmt n
484% ...
485% | St_block : $\forall$n. stmt (S n) $\rightarrow$ stmt n
486% | St_exit : $\forall$n. Fin n $\rightarrow$ stmt n
487% ...
488% \end{lstlisting}
489
490% \begin{itemize}
491% \item In semantics index continuations too
492% \item Use \lstinline[language=matita]'Fin n' to make \lstinline'k_exit'
493% a total function
494% \end{itemize}
495
496% (May be able to remove entirely if we redo \lstinline[language=C]{switch}?)
497% \end{frame}
498
499\begin{frame}[fragile]
500\frametitle{Establishing RTLabs invariants}
501
502Use dependent pairs to show invariant along with results.
503
[1836]504\begin{lstlisting}[language=matita,basicstyle=\footnotesize\tt,escapechar=\%]
[1852]505let rec add_expr ... (e:expr ty)
[1858]506   (Env:expr_vars ty e (present ?? env))   %\quad%  (* invariant *)
[1825]507   (f:partial_fn le)
508 on e: $\Sigma$f':partial_fn le. fn_graph_included le f f' ≝
509
510match e return
[1852]511   $\lambda$ty,e.expr_vars ty e (present ?? env) $\rightarrow$ ... with
[1825]512[ ...
513| Cst _ c $\Rightarrow$ $\lambda$_. %\guillemotleft%add_fresh_to_graph ? (St_const dst c) f ?, ?%\guillemotright%
514...
515\end{lstlisting}
516
[1852]517Continually appeal to monotonicity (\lstinline'fn_graph_included').\\
518Use unification hints to simplify stepping back.
[1825]519
520\end{frame}
521
522\frame{
523\frametitle{Cminor to RTLabs: cost labels}
524
[1852]525Two cost label properties are the same, the third will require more
[1830]526work:
527\begin{enumerate}
528\item cost label at head of function
529\item cost label after branching instructions
530\item cost labels at the head of each loop / \lstinline'goto' destination
531\end{enumerate}
532
533No simple notion of the head of a loop or \lstinline'goto' any more.
534
[1836]535\medskip
[1852]536Instead: will prove in \alert{Cminor} that after following a finite
[1830]537number of instructions we reach either
538\begin{itemize}
539\item a cost label, or
540\item the end of the function
541\end{itemize}
542
[1825]543}
544
[1830]545\section{Structured traces}
[1825]546\frame{
547\frametitle{RTLabs structured traces}
548
[1830]549Front-end only uses flat traces consisting of single steps.
550
[1836]551\bigskip
[1830]552The back-end will need the function call structure and the labelling
553properties in order to show that the generated costs are correct.
554
555\begin{itemize}
556\item Traces are structured in sections from cost label to cost label,
557\item the entire execution of function calls nested as a single `step',
[1836]558\item a coinductive definition presents non-terminating traces, using the
[1830]559  inductive definition for all terminating function calls
560\end{itemize}
561
[1836]562\bigskip
563RTLabs chosen because it is the first languages where statements:
564\begin{itemize}
565\item take one step each (modulo function calls)
566\item have individual `addresses'
567\end{itemize}
568}
569
570\frame{
571\frametitle{RTLabs structured traces}
[1852]572Have already established the existence of such traces
[1830]573\begin{itemize}
574\item termination decided classically
[1852]575\item syntactic labelling properties used to build semantic structure
[1836]576\item show stack preservation to ensure that function calls \texttt{return}
577  to the correct location
[1830]578\item tricky to establish guardedness of definitions
579\end{itemize}
[1836]580
581\bigskip
[1845]582Next, prove that flattening these traces yields the original.
[1825]583}
584
585\frame{
586\frametitle{Conclusion}
587
[1852]588Syntax, semantics and translations of prototype now implemented in Matita.
[1830]589
[1836]590\bigskip
[1852]591Have defined and established
[1825]592\begin{itemize}
[1830]593\item invariants regarding variables, typing and program structure
594\item a rich form of execution trace to pass to the back-end
[1825]595\end{itemize}
596
[1836]597\medskip
[1852]598Work in progress:
[1830]599\begin{itemize}
600\item showing functional correctness of the front-end
[1852]601\item proving that cost labelling is appropriate, and preserved
[1830]602\end{itemize}
[1825]603
[1852]604Finally, \alert{end-to-end} functional and cost correctness results.
605
[1825]606}
607
608\end{document}
Note: See TracBrowser for help on using the repository browser.