source: Deliverables/D1.2/Presentations/WP3.tex @ 1836

Last change on this file since 1836 was 1836, checked in by campbell, 9 years ago

Revise WP3 presentation.

File size: 19.7 KB
Line 
1\documentclass[nopanel]{beamer}
2
3\usetheme{Frankfurt}
4\logo{\includegraphics[height=1.0cm]{fetopen.png}}
5
6\usepackage[english]{babel}
7%\usepackage{inputenc}
8
9
10%\documentclass[nopanel,ucs]{beamer}
11\usepackage[utf8x]{inputenc}
12%\usepackage[T1,OT1]{fontenc}
13%\usepackage{pifont}
14%\usepackage{aeguill}
15% using aeguill forces us to T1 everywhere, resulting in horribleness
16%  \def\selectguillfont{\fontencoding{T1}\fontfamily{cmtt}\selectfont}%
17%\def\guillemotleft{{\selectguillfont\symbol{19}}}%
18%\def\guillemotright{{\selectguillfont\symbol{20}}}%
19%\SetUnicodeOption{postscript}
20%\usepackage{hyperref}
21\usepackage{graphicx,color}
22\usepackage{latexsym}
23\usepackage{listings}
24
25\def\cerco{CerCo}
26\def\ocaml{OCaml}
27
28\newcommand{\red}[1]{\textcolor{red}{#1}}
29\newcommand{\blue}[1]{\textcolor{blue}{#1}}
30\definecolor{darkgreen}{rgb}{0,0.4,0}
31\newcommand{\green}[1]{\textcolor{darkgreen}{#1}}
32\newcommand{\gray}[1]{\textcolor{gray}{#1}}
33
34%\setbeamertemplate{footline}[page number]
35%\setbeamertemplate{sidebar right}{}
36
37\lstdefinelanguage{coq}
38  {keywords={Definition,Lemma,Theorem,Remark,Qed,Save,Inductive,Record},
39   morekeywords={[2]if,then,else},
40  }
41
42\lstdefinelanguage{matita}
43  {keywords={definition,lemma,theorem,remark,inductive,record,qed,let,rec,match,with,Type,and,on,return},
44   morekeywords={[2]whd,normalize,elim,cases,destruct},
45   mathescape=true,
46   morecomment=[n]{(*}{*)},
47  }
48
49\lstset{language=matita,basicstyle=\small\tt,columns=flexible,breaklines=false,
50        keywordstyle=\color{red}\bfseries,
51        keywordstyle=[2]\color{blue},
52        commentstyle=\color{green},
53        stringstyle=\color{blue},
54        showspaces=false,showstringspaces=false}
55
56\lstset{extendedchars=false}
57\lstset{inputencoding=utf8x}
58\DeclareUnicodeCharacter{8797}{:=}
59\DeclareUnicodeCharacter{10746}{++}
60\DeclareUnicodeCharacter{9001}{\ensuremath{\langle}}
61\DeclareUnicodeCharacter{9002}{\ensuremath{\rangle}}
62
63
64\begin{document}
65
66\title{CerCo Work Package 3\\
67Verified Compiler --- Front-end}
68\author{Brian Campbell\\ \scriptsize\smallskip
69\href{mailto:Brian.Campbell@ed.ac.uk}{\texttt{Brian.Campbell@ed.ac.uk}}}
70\institute{Laboratory for Foundations of Computer Science\\
71The University of Edinburgh}
72\date{March 16, 2011}
73\frame{\titlepage}
74
75
76\frame{
77\frametitle{Introduction}
78
79An interleaved presentation of work on:
80\begin{description}
81\item[T3.2] The encoding of the front-end of the \cerco{} compiler in CIC
82\item[T3.3] Defining the executable semantics of the front-end's intermediate
83  language
84\item[T3.4] Correctness proofs
85\end{description}
86
87\bigskip
88Deliverables D3.2 and D3.3 have been produced.
89
90}
91
92
93\frame{
94\frametitle{Outline}
95
96\begin{itemize}
97\item Common definitions for everything
98\item The front end:
99\end{itemize}
100
101\begin{tabbing}
102\quad \= $\downarrow$ \quad \= \kill
103\gray{\textsf{C} (unformalized)}\\
104\> \gray{$\downarrow$} \> \gray{CIL parser (unformalized \ocaml)}\\
105\textsf{Clight}\\
106\> $\downarrow$ \> cast removal\\
107\> \gray{$\downarrow$} \> \gray{add runtime functions}\\
108\> $\downarrow$ \> cost labelling\\
109\> $\downarrow$ \> stack variable allocation and control structure
110 simplification\\
111\textsf{Cminor}\\
112\> $\downarrow$ \> generate global variable initialisation code\\
113\> $\downarrow$ \> transform to RTL graph\\
114\textsf{RTLabs}\\
115\> \,\gray{\vdots} \> \gray{start of target specific backend}
116\end{tabbing}
117
118\begin{itemize}
119\item Structured traces
120\end{itemize}
121
122}
123
124\section{Common}
125
126\begin{frame}[fragile]
127\frametitle{Common: identifiers}
128
129Variable and function names, cost labels, registers, CFG labels, \dots
130
131\begin{itemize}
132\item Represented by arbitrarily large binary numbers and trees
133\begin{itemize}
134\item D3.3 described a `lazy failure' approach reusing existing structures
135\item Impractical for adding invariants (types may depend on success of
136  name generation)
137\item Use this CompCert-like system instead
138\end{itemize}
139\item Tags for some type safety:
140\begin{lstlisting}[language=matita]
141inductive identifier (tag:String) : Type[0] ≝
142  an_identifier : Word $\rightarrow$ identifier tag.
143\end{lstlisting}
144\end{itemize}
145
146\end{frame}
147
148\frame{
149\frametitle{Other common definitions}
150
151\begin{itemize}
152\item Memory, global environments from D3.1
153\item Bit vector based arithmetic from D4.1
154  \begin{itemize}
155  \item added extra operations, increased efficiency
156  \end{itemize}
157\item Common record type for small-step executable semantics
158  \begin{itemize}
159  \item used for animation
160  \item intended to be used when defining simulations
161  \end{itemize}
162\end{itemize}
163
164\bigskip
165
166\bigskip
167\textsf{Cminor} and \textsf{RTLabs} share operations on values.
168
169\begin{itemize}
170\item one syntax, one semantics
171\item straightforward
172\item no overloading (unlike \textsf{Clight})
173\end{itemize}
174
175}
176
177\section{Clight}
178\frame{
179\frametitle{Clight: syntax and semantics}
180
181Largely unchanged from D3.1, except:
182
183\medskip
184\begin{itemize}
185\item use precise bit vectors instead of integer and range proof
186\item adapting to changes in common definitions
187\item added function to produce fresh name generator
188\begin{itemize}
189\item used for adding temporary variables
190\item works by finding largest existing identifier
191\item have shown necessary freshness proof
192\end{itemize}
193\end{itemize}
194}
195
196\begin{frame}[fragile]
197\frametitle{Clight: cast simplification}
198
199\begin{itemize}
200\item C insists on arithmetic promotion
201\item CIL-based \ocaml{} parser adds suitable casts
202\item bad for our target (32-bit ops instead of 8-bit)
203\end{itemize}
204
205Prototype recognises fixed patterns:
206\[ (t)((t_1)e_1\ op\ (t_2)e_2) \]
207and replaces when types are right.  Have done some preliminary proofs that this
208works.
209\begin{itemize}
210\item Deep pattern matching is awkward in Matita
211\item Misses (e.g.) \lstinline[language=C]'(char)((int)a+(int)b+(int)c)'
212\end{itemize}
213
214\red{Instead}: recursive `coerce to desired type' approach.\\
215 (Doesn't get some things,
216 e.g., \lstinline'==')
217% TODO: would like to separate, show example properly, note proofs
218\end{frame}
219
220\frame{
221\frametitle{Clight: cost labelling}
222
223Adds cost labels to Clight program.
224
225\begin{itemize}
226\item Implemented as a simple recursive function, like prototype
227\item uses common identifiers definition to produce fresh cost labels
228\end{itemize}
229
230The cost labelling is sound and precise, so we will be able to prove
231some syntactic properties required later:
232\begin{enumerate}
233\item every function starts with a cost label
234\item every branching instruction is followed by a cost label
235\item the head of every loop (including \lstinline'goto's) is a cost label
236\end{enumerate}
237
238}
239
240% \frame{
241% \frametitle{Clight: adding runtime functions}
242
243% Replaces ops not supported by target with function calls.
244
245% \begin{itemize}
246% \item Add functions as Clight ASTs (rather than prototype's reparsing approach)
247% \item substituting function calls is horrible
248% \begin{itemize}
249% \item Clight does not permit calls in expressions
250% \item Need to add statements in front of the target expression
251% \item Messy, duplication
252% \item Hard: just noticed that even the prototype gets it wrong
253% \end{itemize}
254% \item Implemented, but\dots
255% \end{itemize}
256
257% \textsc{Proposal}: shift to later in the compilation process
258% \begin{itemize}
259% \item easy substitution
260% \item adjust semantics to emit extra cost labels
261% \item get to use lower level features
262% \end{itemize}
263% }
264\section{Cminor}
265\begin{frame}[fragile]
266\frametitle{Cminor syntax and semantics}
267
268Similar language to CompCert's but developed from prototype.
269
270\begin{itemize}
271\item Some type enforcement for expressions:
272\end{itemize}
273
274% TODO: update!
275\begin{lstlisting}[language=matita,basicstyle=\footnotesize\tt]
276inductive expr : typ $\rightarrow$ Type[0] ≝
277| Id : $\forall$t. ident $\rightarrow$ expr t
278| Cst : $\forall$t. constant $\rightarrow$ expr t
279| Op1 : $\forall$t,t'. unary_operation t t' $\rightarrow$ expr t $\rightarrow$ expr t'
280| Op2 : $\forall$t1,t2,t'. binary_operation $\rightarrow$ expr t1 $\rightarrow$ expr t2 $\rightarrow$ expr t'
281| Mem : $\forall$t,r. memory_chunk $\rightarrow$ expr (ASTptr r) $\rightarrow$ expr t
282| Cond : $\forall$sz,sg,t. expr (ASTint sz sg) $\rightarrow$ expr t $\rightarrow$ expr t $\rightarrow$ expr t
283| Ecost : $\forall$t. costlabel $\rightarrow$ expr t $\rightarrow$ expr t.
284\end{lstlisting}
285\begin{itemize}
286\item enforces some checking during production
287\item directly forces checking that temporaries are fresh
288\item gets rid of some corner cases in translation to RTLabs
289\end{itemize}
290
291Semantics unexciting small-step function.
292\end{frame}
293
294\begin{frame}
295\frametitle{Adding invariants}
296
297Lots of sources of failure in front-end due to badly formed programs:
298\begin{itemize}
299\item missing variables, graph nodes, \dots
300\item badly structured programs
301\end{itemize}
302
303\bigskip
304Have been refining the intermediate languages to rule these out:
305\begin{enumerate}
306\item \gray{Indexing Cminor syntax by nesting depth}
307\item adding type constraints
308\item adding invariants asserting presence of variables, etc.
309\end{enumerate}
310
311\end{frame}
312
313\begin{frame}[fragile]
314\frametitle{Invariants for identifiers}
315
316Invariants change between language syntax/semantics and compilation stages:
317
318\begin{description}
319\item[syntax] constraints between parts of function body and other information
320about the function
321\item[compilation] constraints between statements and expressions and data structures in the translation
322\end{description}
323
324When using or creating function records we prove that we can switch between
325them:
326\begin{lstlisting}[language=matita,basicstyle=\footnotesize\tt]
327lemma populates_env : $\forall$l,e,u,l',e',u'.
328  distinct_env ?? l $\rightarrow$                    (* distinct names in l *)
329  populate_env e u l = $\langle$l',e',u'$\rangle$ $\rightarrow$ (* build register mapping *)
330  $\forall$i,t. Exists ? ($\lambda$x.x = $\langle$i,t$\rangle$) l $\rightarrow$ (* Anything in the environment... *)
331      Env_has e' i t.
332      (* maps to something of the correct type *)
333\end{lstlisting}
334
335\end{frame}
336
337\begin{frame}[fragile]
338\frametitle{Invariants in Cminor}
339
340Embedded invariant in the function record:
341\begin{lstlisting}[language=matita,basicstyle=\scriptsize\tt]
342record internal_function : Type[0] ≝
343{ f_return     : option typ
344; f_params     : list (ident $\times$ typ)
345; f_vars       : list (ident $\times$ typ)
346; f_stacksize : nat
347; f_body       : stmt
348; f_inv        :
349  stmt_P ($\lambda$s.
350    stmt_vars   ($\lambda$i,t.Exists ? ($\lambda$x. x = $\langle$i,t$\rangle$) (f_params @ f_vars)) s $\wedge$
351    stmt_labels ($\lambda$l.Exists ? ($\lambda$l'.l' = l) (labels_of f_body))     s)
352  f_body
353}.
354\end{lstlisting}
355\begin{enumerate}
356\item All variables are in the parameters or locals lists\\
357\quad --- with the correct type
358\item All \lstinline'goto' labels mentioned are defined in the body
359\end{enumerate}
360
361How is \lstinline'stmt_P' defined?
362\end{frame}
363
364\begin{frame}[fragile]
365\frametitle{Invariants in Cminor}
366
367\begin{lstlisting}[language=matita,basicstyle=\footnotesize\tt]
368let rec stmt_P (P:stmt $\rightarrow$ Prop) (s:stmt) on s : Prop ≝
369match s with
370[ St_seq s1 s2 $\Rightarrow$ P s $\wedge$ stmt_P P s1 $\wedge$ stmt_P P s2
371| St_ifthenelse _ _ _ s1 s2 $\Rightarrow$ P s $\wedge$ stmt_P P s1 $\wedge$ stmt_P P s2
372| St_loop s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
373| St_block s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
374| St_label _ s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
375| St_cost _ s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
376| _ $\Rightarrow$ P s
377].
378\end{lstlisting}
379
380Higher order predicate
381\begin{itemize}
382\item When we pattern match on the statement the predicate can unfold
383\item Easy to discharge proof obligations for substatements
384\item similar definition for expressions
385\end{itemize}
386
387\end{frame}
388
389
390\section{Clight to Cminor}
391
392\begin{frame}
393\frametitle{Clight to Cminor}
394
395Two main jobs:
396\begin{enumerate}
397\item make memory allocation of variables explicit
398\item use simpler control structures
399\end{enumerate}
400Again, based on prototype rather than CompCert.
401
402\bigskip
403Added type checking:
404\begin{itemize}
405\item satisfies the restrictions for Cminor expressions
406\item could separate out, have a `nice Clight' language
407\end{itemize}
408Similarly, check variable environments are sane, check all \lstinline'goto'
409labels are translated.
410\end{frame}
411
412\begin{frame}
413\frametitle{Clight to Cminor proofs}
414
415Beyond the invariants already shown, we need to prove:
416\begin{enumerate}
417\item a simulation using
418  \begin{itemize}
419  \item memory injection (similar to CompCert)
420  \item relation based around source and target's statements and continuations
421  \end{itemize}
422\item syntactic cost labelling properties are preserved
423  \begin{itemize}
424  \item structural induction on function body
425  \end{itemize}
426\end{enumerate}
427\end{frame}
428
429\section{Initialisation}
430\begin{frame}
431\frametitle{Cminor: initialization}
432
433Replace initialization data by code in the initial function.
434
435\begin{itemize}
436\item straightforward to define
437\item instantiate a slightly different version of the semantics for it
438\begin{itemize}
439\item Can't accidentally forget the pass
440\end{itemize}
441\end{itemize}
442
443\bigskip
444Correctness should follow because the state after the initialisation will
445be the same as the initial state of the original.
446\end{frame}
447
448\section{RTLabs}
449\begin{frame}[fragile]
450\frametitle{RTLabs: syntax and semantics}
451
452Register Transfer Language with front-end operations.
453
454Control flow graph implemented by generic identifiers map:
455
456\begin{lstlisting}[language=matita,basicstyle=\footnotesize\tt]
457definition label ≝ identifier LabelTag.
458definition graph : Type[0] $\rightarrow$ Type[0] ≝ identifier_map LabelTag.
459
460inductive statement : Type[0] ≝
461| St_skip : label $\rightarrow$ statement
462| St_cost : costlabel $\rightarrow$ label $\rightarrow$ statement
463| St_const : register $\rightarrow$ constant $\rightarrow$ label $\rightarrow$ statement
464| St_op1 : $\forall$t,t'. unary_operation t' t $\rightarrow$ register
465   $\rightarrow$ register $\rightarrow$ label $\rightarrow$ statement
466...
467\end{lstlisting}
468\begin{itemize}
469\item Shares basic operations (including semantics) with Cminor.
470\item Tags prevent confusion of labels (graph vs.~goto vs.~cost).
471\end{itemize}
472
473Semantics are straightforward interpretation of statements.
474\end{frame}
475
476\begin{frame}[fragile]
477\frametitle{RTLabs: syntax and semantics}
478
479\begin{lstlisting}[language=matita]
480record internal_function : Type[0] ≝
481{ f_labgen    : universe LabelTag
482; f_reggen    : universe RegisterTag
483...
484; f_graph     : graph statement
485; f_closed    : graph_closed f_graph
486; f_typed     : graph_typed (f_locals @ f_params) f_graph
487...
488\end{lstlisting}
489
490Enforce that
491\begin{itemize}
492\item every statement's successor labels are present in the CFG
493\item every statement should be well-typed (for limited type system)
494\end{itemize}
495
496\end{frame}
497
498\section{Cminor to RTLabs}
499\begin{frame}
500\frametitle{Cminor to RTLabs}
501
502Break down statements and expressions into RTL graph.
503
504\begin{itemize}
505\item Incrementally build function backwards
506\item all state is in function record, like prototype
507\end{itemize}
508
509\bigskip
510May investigate whether a state monad makes invariant management easier.
511
512\bigskip
513Showing graph closure requires
514\begin{itemize}
515\item monotonicity of graph construction
516\item eventual insertion of all \lstinline'goto' destinations
517\end{itemize}
518
519Carry these along in the partially built function record.
520
521\end{frame}
522
523% \begin{frame}[fragile]
524% \frametitle{Cminor nesting depth}
525
526% Want to prevent
527% \begin{lstlisting}[language=C]
528% void f() {
529%   block { loop { exit 5; } }
530% }
531% \end{lstlisting}
532
533% Index statements by block depth
534% \begin{lstlisting}[language=matita]
535% inductive stmt : $\forall$blockdepth:nat. Type[0] ≝
536% | St_skip : $\forall$n. stmt n
537% ...
538% | St_block : $\forall$n. stmt (S n) $\rightarrow$ stmt n
539% | St_exit : $\forall$n. Fin n $\rightarrow$ stmt n
540% ...
541% \end{lstlisting}
542
543% \begin{itemize}
544% \item In semantics index continuations too
545% \item Use \lstinline[language=matita]'Fin n' to make \lstinline'k_exit'
546% a total function
547% \end{itemize}
548
549% (May be able to remove entirely if we redo \lstinline[language=C]{switch}?)
550% \end{frame}
551
552\begin{frame}[fragile]
553\frametitle{Establishing RTLabs invariants}
554
555Use dependent pairs to show invariant along with results.
556
557\begin{lstlisting}[language=matita,basicstyle=\footnotesize\tt,escapechar=\%]
558let rec add_expr (le:label_env) (env:env) (ty:typ) (e:expr ty)
559   (Env:expr_vars ty e (present ?? env))
560   (dst:register)
561   (f:partial_fn le)
562 on e: $\Sigma$f':partial_fn le. fn_graph_included le f f' ≝
563
564match e return
565   $\lambda$ty,e.expr_vars ty e (present ?? env) $\rightarrow$
566         $\Sigma$f':partial_fn le. fn_graph_included le f f' with
567[ ...
568| Cst _ c $\Rightarrow$ $\lambda$_. %\guillemotleft%add_fresh_to_graph ? (St_const dst c) f ?, ?%\guillemotright%
569...
570\end{lstlisting}
571
572Continually appeal to monotonicity (\lstinline'fn_graph_included'),
573use unification hints to simplify stepping back.
574
575\end{frame}
576
577\frame{
578\frametitle{Cminor to RTLabs: cost labels}
579
580Two of the cost label properties are easy to deal with, the third will require more
581work:
582\begin{enumerate}
583\item cost label at head of function
584\item cost label after branching instructions
585\item cost labels at the head of each loop / \lstinline'goto' destination
586\end{enumerate}
587
588No simple notion of the head of a loop or \lstinline'goto' any more.
589
590\medskip
591Instead will prove in \alert{Cminor} that after following a finite
592number of instructions we reach either
593\begin{itemize}
594\item a cost label, or
595\item the end of the function
596\end{itemize}
597
598}
599
600\section{Structured traces}
601\frame{
602\frametitle{RTLabs structured traces}
603
604Front-end only uses flat traces consisting of single steps.
605
606\bigskip
607The back-end will need the function call structure and the labelling
608properties in order to show that the generated costs are correct.
609
610\begin{itemize}
611\item Traces are structured in sections from cost label to cost label,
612\item the entire execution of function calls nested as a single `step',
613\item a coinductive definition presents non-terminating traces, using the
614  inductive definition for all terminating function calls
615\end{itemize}
616
617\bigskip
618RTLabs chosen because it is the first languages where statements:
619\begin{itemize}
620\item take one step each (modulo function calls)
621\item have individual `addresses'
622\end{itemize}
623}
624
625\frame{
626\frametitle{RTLabs structured traces}
627Have already established the existence of these traces
628\begin{itemize}
629\item termination decided classically
630\item syntactic labelling properties used to build the semantic structure
631\item show stack preservation to ensure that function calls \texttt{return}
632  to the correct location
633\item tricky to establish guardedness of definitions
634\end{itemize}
635
636\bigskip
637It remains to prove that flattening these traces yields the original.
638}
639
640% \frame{
641% \frametitle{Difficulties with invariants}
642
643% \begin{itemize}
644% \item Not clear if we can use Russell
645% \begin{itemize}
646% \item Want to use existing proofs rather than generated equalities
647% \item Some non-uniform treatment of cases
648% \end{itemize}
649% \item Stresses Matita somewhat (try forgetting to make something a dependent pair)
650% \item Lots of fiddly proof obligations that are simple, but a pain to deal with
651% \begin{itemize}
652% \item Appeal to monotonicity 3 times, then note that the label was added to the graph here
653% \end{itemize}
654% \item Already verbose, yet don't cover globals.
655% \end{itemize}
656
657% I'd like to see if better use of Matita's features can help with this stuff.
658
659% \bigskip
660% Open to suggestions for better methods.
661% }
662
663\frame{
664\frametitle{Conclusion}
665
666The syntax, semantics and translations of the prototype compiler have
667been implemented in Matita.
668
669\bigskip
670We have already defined and established
671\begin{itemize}
672\item invariants regarding variables, typing and program structure
673\item a rich form of execution trace to pass to the back-end
674\end{itemize}
675
676\medskip
677We have plans for
678\begin{itemize}
679\item showing functional correctness of the front-end
680\item proving that the cost labelling is appropriate, and is preserved
681\item using the above in the whole compiler functional and cost correctness results.
682\end{itemize}
683
684}
685
686\end{document}
Note: See TracBrowser for help on using the repository browser.