source: Deliverables/D1.2/Presentations/WP3.tex @ 1845

Last change on this file since 1845 was 1845, checked in by campbell, 8 years ago

Minor WP3 revisions prior to more major stuff.

File size: 18.1 KB
RevLine 
[1830]1\documentclass[nopanel]{beamer}
[1845]2\usepackage{graphicx,color}
[1825]3
4\usetheme{Frankfurt}
5\logo{\includegraphics[height=1.0cm]{fetopen.png}}
6
7\usepackage[english]{babel}
8%\usepackage{inputenc}
9
10
11%\documentclass[nopanel,ucs]{beamer}
12\usepackage[utf8x]{inputenc}
13%\usepackage[T1,OT1]{fontenc}
14%\usepackage{pifont}
15%\usepackage{aeguill}
16% using aeguill forces us to T1 everywhere, resulting in horribleness
17%  \def\selectguillfont{\fontencoding{T1}\fontfamily{cmtt}\selectfont}%
18%\def\guillemotleft{{\selectguillfont\symbol{19}}}%
19%\def\guillemotright{{\selectguillfont\symbol{20}}}%
20%\SetUnicodeOption{postscript}
21%\usepackage{hyperref}
22\usepackage{latexsym}
23\usepackage{listings}
24
25\def\cerco{CerCo}
26\def\ocaml{OCaml}
27
28\newcommand{\red}[1]{\textcolor{red}{#1}}
29\newcommand{\blue}[1]{\textcolor{blue}{#1}}
30\definecolor{darkgreen}{rgb}{0,0.4,0}
31\newcommand{\green}[1]{\textcolor{darkgreen}{#1}}
32\newcommand{\gray}[1]{\textcolor{gray}{#1}}
33
34%\setbeamertemplate{footline}[page number]
35%\setbeamertemplate{sidebar right}{}
36
37\lstdefinelanguage{coq}
38  {keywords={Definition,Lemma,Theorem,Remark,Qed,Save,Inductive,Record},
39   morekeywords={[2]if,then,else},
40  }
41
42\lstdefinelanguage{matita}
43  {keywords={definition,lemma,theorem,remark,inductive,record,qed,let,rec,match,with,Type,and,on,return},
44   morekeywords={[2]whd,normalize,elim,cases,destruct},
45   mathescape=true,
46   morecomment=[n]{(*}{*)},
47  }
48
49\lstset{language=matita,basicstyle=\small\tt,columns=flexible,breaklines=false,
50        keywordstyle=\color{red}\bfseries,
51        keywordstyle=[2]\color{blue},
52        commentstyle=\color{green},
53        stringstyle=\color{blue},
54        showspaces=false,showstringspaces=false}
55
56\lstset{extendedchars=false}
57\lstset{inputencoding=utf8x}
58\DeclareUnicodeCharacter{8797}{:=}
59\DeclareUnicodeCharacter{10746}{++}
60\DeclareUnicodeCharacter{9001}{\ensuremath{\langle}}
61\DeclareUnicodeCharacter{9002}{\ensuremath{\rangle}}
62
63
64\begin{document}
65
66\title{CerCo Work Package 3\\
67Verified Compiler --- Front-end}
68\author{Brian Campbell\\ \scriptsize\smallskip
69\href{mailto:Brian.Campbell@ed.ac.uk}{\texttt{Brian.Campbell@ed.ac.uk}}}
70\institute{Laboratory for Foundations of Computer Science\\
71The University of Edinburgh}
72\date{March 16, 2011}
73\frame{\titlepage}
74
75
76\frame{
77\frametitle{Introduction}
78
79An interleaved presentation of work on:
80\begin{description}
81\item[T3.2] The encoding of the front-end of the \cerco{} compiler in CIC
82\item[T3.3] Defining the executable semantics of the front-end's intermediate
83  language
84\item[T3.4] Correctness proofs
85\end{description}
86
87\bigskip
88Deliverables D3.2 and D3.3 have been produced.
89
90}
91
92
93\frame{
94\frametitle{Outline}
95
96\begin{itemize}
97\item Common definitions for everything
98\item The front end:
99\end{itemize}
100
101\begin{tabbing}
102\quad \= $\downarrow$ \quad \= \kill
103\gray{\textsf{C} (unformalized)}\\
104\> \gray{$\downarrow$} \> \gray{CIL parser (unformalized \ocaml)}\\
105\textsf{Clight}\\
106\> $\downarrow$ \> cast removal\\
[1836]107\> \gray{$\downarrow$} \> \gray{add runtime functions}\\
108\> $\downarrow$ \> cost labelling\\
[1825]109\> $\downarrow$ \> stack variable allocation and control structure
110 simplification\\
111\textsf{Cminor}\\
112\> $\downarrow$ \> generate global variable initialisation code\\
113\> $\downarrow$ \> transform to RTL graph\\
114\textsf{RTLabs}\\
115\> \,\gray{\vdots} \> \gray{start of target specific backend}
116\end{tabbing}
117
118\begin{itemize}
[1836]119\item Structured traces
[1825]120\end{itemize}
121
122}
123
[1830]124\section{Common}
[1825]125
126\begin{frame}[fragile]
127\frametitle{Common: identifiers}
128
129Variable and function names, cost labels, registers, CFG labels, \dots
130
131\begin{itemize}
132\item Represented by arbitrarily large binary numbers and trees
133\begin{itemize}
134\item D3.3 described a `lazy failure' approach reusing existing structures
135\item Impractical for adding invariants (types may depend on success of
136  name generation)
137\item Use this CompCert-like system instead
138\end{itemize}
139\item Tags for some type safety:
140\begin{lstlisting}[language=matita]
141inductive identifier (tag:String) : Type[0] ≝
142  an_identifier : Word $\rightarrow$ identifier tag.
143\end{lstlisting}
144\end{itemize}
145
146\end{frame}
147
148\frame{
149\frametitle{Other common definitions}
150
151\begin{itemize}
[1836]152\item Memory, global environments from D3.1
[1825]153\item Bit vector based arithmetic from D4.1
154  \begin{itemize}
155  \item added extra operations, increased efficiency
156  \end{itemize}
157\item Common record type for small-step executable semantics
158  \begin{itemize}
159  \item used for animation
160  \item intended to be used when defining simulations
161  \end{itemize}
162\end{itemize}
163
164\bigskip
165
166\bigskip
167\textsf{Cminor} and \textsf{RTLabs} share operations on values.
168
169\begin{itemize}
170\item one syntax, one semantics
171\item straightforward
172\item no overloading (unlike \textsf{Clight})
173\end{itemize}
174
175}
176
[1830]177\section{Clight}
[1825]178\frame{
179\frametitle{Clight: syntax and semantics}
180
181Largely unchanged from D3.1, except:
182
183\medskip
184\begin{itemize}
185\item use precise bit vectors instead of integer and range proof
186\item adapting to changes in common definitions
187\item added function to produce fresh name generator
188\begin{itemize}
189\item used for adding temporary variables
190\item works by finding largest existing identifier
191\item have shown necessary freshness proof
192\end{itemize}
193\end{itemize}
194}
195
196\begin{frame}[fragile]
197\frametitle{Clight: cast simplification}
198
199\begin{itemize}
200\item C insists on arithmetic promotion
201\item CIL-based \ocaml{} parser adds suitable casts
202\item bad for our target (32-bit ops instead of 8-bit)
203\end{itemize}
204
205Prototype recognises fixed patterns:
206\[ (t)((t_1)e_1\ op\ (t_2)e_2) \]
207and replaces when types are right.  Have done some preliminary proofs that this
208works.
209\begin{itemize}
210\item Deep pattern matching is awkward in Matita
211\item Misses (e.g.) \lstinline[language=C]'(char)((int)a+(int)b+(int)c)'
212\end{itemize}
213
214\red{Instead}: recursive `coerce to desired type' approach.\\
[1845]215% (Doesn't get some things,
216% e.g., \lstinline'==')
[1825]217\end{frame}
218
219\frame{
[1836]220\frametitle{Clight: cost labelling}
[1825]221
222Adds cost labels to Clight program.
223
224\begin{itemize}
225\item Implemented as a simple recursive function, like prototype
226\item uses common identifiers definition to produce fresh cost labels
227\end{itemize}
228
229The cost labelling is sound and precise, so we will be able to prove
230some syntactic properties required later:
231\begin{enumerate}
232\item every function starts with a cost label
233\item every branching instruction is followed by a cost label
234\item the head of every loop (including \lstinline'goto's) is a cost label
235\end{enumerate}
236
237}
238
[1830]239\section{Cminor}
[1825]240\begin{frame}[fragile]
241\frametitle{Cminor syntax and semantics}
242
243Similar language to CompCert's but developed from prototype.
244
245\begin{itemize}
246\item Some type enforcement for expressions:
247\end{itemize}
248
[1836]249\begin{lstlisting}[language=matita,basicstyle=\footnotesize\tt]
[1825]250inductive expr : typ $\rightarrow$ Type[0] ≝
251| Id : $\forall$t. ident $\rightarrow$ expr t
252| Cst : $\forall$t. constant $\rightarrow$ expr t
[1836]253| Op1 : $\forall$t,t'. unary_operation t t' $\rightarrow$ expr t $\rightarrow$ expr t'
[1825]254| Op2 : $\forall$t1,t2,t'. binary_operation $\rightarrow$ expr t1 $\rightarrow$ expr t2 $\rightarrow$ expr t'
255| Mem : $\forall$t,r. memory_chunk $\rightarrow$ expr (ASTptr r) $\rightarrow$ expr t
256| Cond : $\forall$sz,sg,t. expr (ASTint sz sg) $\rightarrow$ expr t $\rightarrow$ expr t $\rightarrow$ expr t
257| Ecost : $\forall$t. costlabel $\rightarrow$ expr t $\rightarrow$ expr t.
258\end{lstlisting}
259\begin{itemize}
260\item enforces some checking during production
261\item directly forces checking that temporaries are fresh
262\item gets rid of some corner cases in translation to RTLabs
263\end{itemize}
264
265Semantics unexciting small-step function.
266\end{frame}
267
268\begin{frame}
269\frametitle{Adding invariants}
270
271Lots of sources of failure in front-end due to badly formed programs:
272\begin{itemize}
273\item missing variables, graph nodes, \dots
274\item badly structured programs
275\end{itemize}
276
277\bigskip
278Have been refining the intermediate languages to rule these out:
279\begin{enumerate}
280\item \gray{Indexing Cminor syntax by nesting depth}
281\item adding type constraints
282\item adding invariants asserting presence of variables, etc.
283\end{enumerate}
284
285\end{frame}
286
287\begin{frame}[fragile]
288\frametitle{Invariants for identifiers}
289
290Invariants change between language syntax/semantics and compilation stages:
291
292\begin{description}
293\item[syntax] constraints between parts of function body and other information
294about the function
295\item[compilation] constraints between statements and expressions and data structures in the translation
296\end{description}
297
298When using or creating function records we prove that we can switch between
299them:
[1836]300\begin{lstlisting}[language=matita,basicstyle=\footnotesize\tt]
[1825]301lemma populates_env : $\forall$l,e,u,l',e',u'.
[1836]302  distinct_env ?? l $\rightarrow$                    (* distinct names in l *)
303  populate_env e u l = $\langle$l',e',u'$\rangle$ $\rightarrow$ (* build register mapping *)
304  $\forall$i,t. Exists ? ($\lambda$x.x = $\langle$i,t$\rangle$) l $\rightarrow$ (* Anything in the environment... *)
305      Env_has e' i t.
306      (* maps to something of the correct type *)
[1825]307\end{lstlisting}
308
309\end{frame}
310
311\begin{frame}[fragile]
312\frametitle{Invariants in Cminor}
313
314Embedded invariant in the function record:
315\begin{lstlisting}[language=matita,basicstyle=\scriptsize\tt]
316record internal_function : Type[0] ≝
317{ f_return     : option typ
318; f_params     : list (ident $\times$ typ)
319; f_vars       : list (ident $\times$ typ)
320; f_stacksize : nat
321; f_body       : stmt
322; f_inv        :
323  stmt_P ($\lambda$s.
324    stmt_vars   ($\lambda$i,t.Exists ? ($\lambda$x. x = $\langle$i,t$\rangle$) (f_params @ f_vars)) s $\wedge$
325    stmt_labels ($\lambda$l.Exists ? ($\lambda$l'.l' = l) (labels_of f_body))     s)
326  f_body
327}.
328\end{lstlisting}
329\begin{enumerate}
330\item All variables are in the parameters or locals lists\\
[1836]331\quad --- with the correct type
[1825]332\item All \lstinline'goto' labels mentioned are defined in the body
333\end{enumerate}
334
[1836]335How is \lstinline'stmt_P' defined?
[1825]336\end{frame}
337
338\begin{frame}[fragile]
339\frametitle{Invariants in Cminor}
340
[1836]341\begin{lstlisting}[language=matita,basicstyle=\footnotesize\tt]
[1825]342let rec stmt_P (P:stmt $\rightarrow$ Prop) (s:stmt) on s : Prop ≝
343match s with
344[ St_seq s1 s2 $\Rightarrow$ P s $\wedge$ stmt_P P s1 $\wedge$ stmt_P P s2
345| St_ifthenelse _ _ _ s1 s2 $\Rightarrow$ P s $\wedge$ stmt_P P s1 $\wedge$ stmt_P P s2
346| St_loop s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
347| St_block s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
348| St_label _ s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
349| St_cost _ s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
350| _ $\Rightarrow$ P s
351].
352\end{lstlisting}
353
354Higher order predicate
355\begin{itemize}
[1836]356\item When we pattern match on the statement the predicate can unfold
357\item Easy to discharge proof obligations for substatements
[1825]358\item similar definition for expressions
359\end{itemize}
360
361\end{frame}
362
[1830]363
364\section{Clight to Cminor}
365
[1825]366\begin{frame}
367\frametitle{Clight to Cminor}
368
369Two main jobs:
370\begin{enumerate}
371\item make memory allocation of variables explicit
372\item use simpler control structures
373\end{enumerate}
374Again, based on prototype rather than CompCert.
375
376\bigskip
377Added type checking:
378\begin{itemize}
379\item satisfies the restrictions for Cminor expressions
380\item could separate out, have a `nice Clight' language
381\end{itemize}
[1836]382Similarly, check variable environments are sane, check all \lstinline'goto'
383labels are translated.
[1825]384\end{frame}
385
386\begin{frame}
387\frametitle{Clight to Cminor proofs}
388
389Beyond the invariants already shown, we need to prove:
390\begin{enumerate}
391\item a simulation using
392  \begin{itemize}
393  \item memory injection (similar to CompCert)
394  \item relation based around source and target's statements and continuations
395  \end{itemize}
396\item syntactic cost labelling properties are preserved
397  \begin{itemize}
398  \item structural induction on function body
399  \end{itemize}
400\end{enumerate}
401\end{frame}
402
[1830]403\section{Initialisation}
[1825]404\begin{frame}
405\frametitle{Cminor: initialization}
406
407Replace initialization data by code in the initial function.
408
409\begin{itemize}
410\item straightforward to define
411\item instantiate a slightly different version of the semantics for it
412\begin{itemize}
413\item Can't accidentally forget the pass
414\end{itemize}
415\end{itemize}
416
417\bigskip
418Correctness should follow because the state after the initialisation will
419be the same as the initial state of the original.
420\end{frame}
421
[1830]422\section{RTLabs}
[1825]423\begin{frame}[fragile]
424\frametitle{RTLabs: syntax and semantics}
425
426Register Transfer Language with front-end operations.
427
428Control flow graph implemented by generic identifiers map:
429
[1836]430\begin{lstlisting}[language=matita,basicstyle=\footnotesize\tt]
[1825]431definition label ≝ identifier LabelTag.
432definition graph : Type[0] $\rightarrow$ Type[0] ≝ identifier_map LabelTag.
433
434inductive statement : Type[0] ≝
435| St_skip : label $\rightarrow$ statement
436| St_cost : costlabel $\rightarrow$ label $\rightarrow$ statement
437| St_const : register $\rightarrow$ constant $\rightarrow$ label $\rightarrow$ statement
438| St_op1 : $\forall$t,t'. unary_operation t' t $\rightarrow$ register
439   $\rightarrow$ register $\rightarrow$ label $\rightarrow$ statement
440...
441\end{lstlisting}
442\begin{itemize}
443\item Shares basic operations (including semantics) with Cminor.
444\item Tags prevent confusion of labels (graph vs.~goto vs.~cost).
445\end{itemize}
446
447Semantics are straightforward interpretation of statements.
448\end{frame}
449
450\begin{frame}[fragile]
451\frametitle{RTLabs: syntax and semantics}
452
453\begin{lstlisting}[language=matita]
454record internal_function : Type[0] ≝
455{ f_labgen    : universe LabelTag
456; f_reggen    : universe RegisterTag
457...
458; f_graph     : graph statement
459; f_closed    : graph_closed f_graph
460; f_typed     : graph_typed (f_locals @ f_params) f_graph
461...
462\end{lstlisting}
463
464Enforce that
465\begin{itemize}
466\item every statement's successor labels are present in the CFG
467\item every statement should be well-typed (for limited type system)
468\end{itemize}
469
470\end{frame}
471
[1830]472\section{Cminor to RTLabs}
[1825]473\begin{frame}
474\frametitle{Cminor to RTLabs}
475
476Break down statements and expressions into RTL graph.
477
478\begin{itemize}
479\item Incrementally build function backwards
480\item all state is in function record, like prototype
481\end{itemize}
482
483\bigskip
484May investigate whether a state monad makes invariant management easier.
485
486\bigskip
487Showing graph closure requires
488\begin{itemize}
489\item monotonicity of graph construction
490\item eventual insertion of all \lstinline'goto' destinations
491\end{itemize}
492
493Carry these along in the partially built function record.
494
495\end{frame}
496
497% \begin{frame}[fragile]
498% \frametitle{Cminor nesting depth}
499
500% Want to prevent
501% \begin{lstlisting}[language=C]
502% void f() {
503%   block { loop { exit 5; } }
504% }
505% \end{lstlisting}
506
507% Index statements by block depth
508% \begin{lstlisting}[language=matita]
509% inductive stmt : $\forall$blockdepth:nat. Type[0] ≝
510% | St_skip : $\forall$n. stmt n
511% ...
512% | St_block : $\forall$n. stmt (S n) $\rightarrow$ stmt n
513% | St_exit : $\forall$n. Fin n $\rightarrow$ stmt n
514% ...
515% \end{lstlisting}
516
517% \begin{itemize}
518% \item In semantics index continuations too
519% \item Use \lstinline[language=matita]'Fin n' to make \lstinline'k_exit'
520% a total function
521% \end{itemize}
522
523% (May be able to remove entirely if we redo \lstinline[language=C]{switch}?)
524% \end{frame}
525
526\begin{frame}[fragile]
527\frametitle{Establishing RTLabs invariants}
528
529Use dependent pairs to show invariant along with results.
530
[1836]531\begin{lstlisting}[language=matita,basicstyle=\footnotesize\tt,escapechar=\%]
[1825]532let rec add_expr (le:label_env) (env:env) (ty:typ) (e:expr ty)
533   (Env:expr_vars ty e (present ?? env))
534   (dst:register)
535   (f:partial_fn le)
536 on e: $\Sigma$f':partial_fn le. fn_graph_included le f f' ≝
537
538match e return
539   $\lambda$ty,e.expr_vars ty e (present ?? env) $\rightarrow$
540         $\Sigma$f':partial_fn le. fn_graph_included le f f' with
541[ ...
542| Cst _ c $\Rightarrow$ $\lambda$_. %\guillemotleft%add_fresh_to_graph ? (St_const dst c) f ?, ?%\guillemotright%
543...
544\end{lstlisting}
545
546Continually appeal to monotonicity (\lstinline'fn_graph_included'),
547use unification hints to simplify stepping back.
548
549\end{frame}
550
551\frame{
552\frametitle{Cminor to RTLabs: cost labels}
553
[1836]554Two of the cost label properties are easy to deal with, the third will require more
[1830]555work:
556\begin{enumerate}
557\item cost label at head of function
558\item cost label after branching instructions
559\item cost labels at the head of each loop / \lstinline'goto' destination
560\end{enumerate}
561
562No simple notion of the head of a loop or \lstinline'goto' any more.
563
[1836]564\medskip
[1830]565Instead will prove in \alert{Cminor} that after following a finite
566number of instructions we reach either
567\begin{itemize}
568\item a cost label, or
569\item the end of the function
570\end{itemize}
571
[1825]572}
573
[1830]574\section{Structured traces}
[1825]575\frame{
576\frametitle{RTLabs structured traces}
577
[1830]578Front-end only uses flat traces consisting of single steps.
579
[1836]580\bigskip
[1830]581The back-end will need the function call structure and the labelling
582properties in order to show that the generated costs are correct.
583
584\begin{itemize}
585\item Traces are structured in sections from cost label to cost label,
586\item the entire execution of function calls nested as a single `step',
[1836]587\item a coinductive definition presents non-terminating traces, using the
[1830]588  inductive definition for all terminating function calls
589\end{itemize}
590
[1836]591\bigskip
592RTLabs chosen because it is the first languages where statements:
593\begin{itemize}
594\item take one step each (modulo function calls)
595\item have individual `addresses'
596\end{itemize}
597}
598
599\frame{
600\frametitle{RTLabs structured traces}
[1830]601Have already established the existence of these traces
602\begin{itemize}
603\item termination decided classically
[1836]604\item syntactic labelling properties used to build the semantic structure
605\item show stack preservation to ensure that function calls \texttt{return}
606  to the correct location
[1830]607\item tricky to establish guardedness of definitions
608\end{itemize}
[1836]609
610\bigskip
[1845]611Next, prove that flattening these traces yields the original.
[1825]612}
613
614\frame{
615\frametitle{Conclusion}
616
[1830]617The syntax, semantics and translations of the prototype compiler have
618been implemented in Matita.
619
[1836]620\bigskip
[1830]621We have already defined and established
[1825]622\begin{itemize}
[1830]623\item invariants regarding variables, typing and program structure
624\item a rich form of execution trace to pass to the back-end
[1825]625\end{itemize}
626
[1836]627\medskip
[1830]628We have plans for
629\begin{itemize}
630\item showing functional correctness of the front-end
631\item proving that the cost labelling is appropriate, and is preserved
632\item using the above in the whole compiler functional and cost correctness results.
633\end{itemize}
[1825]634
635}
636
637\end{document}
Note: See TracBrowser for help on using the repository browser.