source: Deliverables/D1.2/Presentations/WP3.tex @ 1825

Last change on this file since 1825 was 1825, checked in by campbell, 7 years ago

WP3 draft slides.
Fix fetopen.png.

File size: 17.4 KB
Line 
1\documentclass{beamer}
2
3\usetheme{Frankfurt}
4\logo{\includegraphics[height=1.0cm]{fetopen.png}}
5
6\usepackage[english]{babel}
7%\usepackage{inputenc}
8
9
10%\documentclass[nopanel,ucs]{beamer}
11\usepackage[utf8x]{inputenc}
12%\usepackage[T1,OT1]{fontenc}
13%\usepackage{pifont}
14%\usepackage{aeguill}
15% using aeguill forces us to T1 everywhere, resulting in horribleness
16%  \def\selectguillfont{\fontencoding{T1}\fontfamily{cmtt}\selectfont}%
17%\def\guillemotleft{{\selectguillfont\symbol{19}}}%
18%\def\guillemotright{{\selectguillfont\symbol{20}}}%
19%\SetUnicodeOption{postscript}
20%\usepackage{hyperref}
21\usepackage{graphicx,color}
22\usepackage{latexsym}
23\usepackage{listings}
24
25\def\cerco{CerCo}
26\def\ocaml{OCaml}
27
28\newcommand{\red}[1]{\textcolor{red}{#1}}
29\newcommand{\blue}[1]{\textcolor{blue}{#1}}
30\definecolor{darkgreen}{rgb}{0,0.4,0}
31\newcommand{\green}[1]{\textcolor{darkgreen}{#1}}
32\newcommand{\gray}[1]{\textcolor{gray}{#1}}
33
34%\setbeamertemplate{footline}[page number]
35%\setbeamertemplate{sidebar right}{}
36
37\lstdefinelanguage{coq}
38  {keywords={Definition,Lemma,Theorem,Remark,Qed,Save,Inductive,Record},
39   morekeywords={[2]if,then,else},
40  }
41
42\lstdefinelanguage{matita}
43  {keywords={definition,lemma,theorem,remark,inductive,record,qed,let,rec,match,with,Type,and,on,return},
44   morekeywords={[2]whd,normalize,elim,cases,destruct},
45   mathescape=true,
46   morecomment=[n]{(*}{*)},
47  }
48
49\lstset{language=matita,basicstyle=\small\tt,columns=flexible,breaklines=false,
50        keywordstyle=\color{red}\bfseries,
51        keywordstyle=[2]\color{blue},
52        commentstyle=\color{green},
53        stringstyle=\color{blue},
54        showspaces=false,showstringspaces=false}
55
56\lstset{extendedchars=false}
57\lstset{inputencoding=utf8x}
58\DeclareUnicodeCharacter{8797}{:=}
59\DeclareUnicodeCharacter{10746}{++}
60\DeclareUnicodeCharacter{9001}{\ensuremath{\langle}}
61\DeclareUnicodeCharacter{9002}{\ensuremath{\rangle}}
62
63
64\begin{document}
65
66\title{CerCo Work Package 3\\
67Verified Compiler --- Front-end}
68\author{Brian Campbell\\ \scriptsize\smallskip
69\href{mailto:Brian.Campbell@ed.ac.uk}{\texttt{Brian.Campbell@ed.ac.uk}}}
70\institute{Laboratory for Foundations of Computer Science\\
71The University of Edinburgh}
72\date{March 16, 2011}
73\frame{\titlepage}
74
75
76\frame{
77\frametitle{Introduction}
78
79An interleaved presentation of work on:
80\begin{description}
81\item[T3.2] The encoding of the front-end of the \cerco{} compiler in CIC
82\item[T3.3] Defining the executable semantics of the front-end's intermediate
83  language
84\item[T3.4] Correctness proofs
85\end{description}
86
87\bigskip
88Deliverables D3.2 and D3.3 have been produced.
89
90}
91
92
93\frame{
94\frametitle{Outline}
95
96\begin{itemize}
97\item Common definitions for everything
98\item The front end:
99\end{itemize}
100
101\begin{tabbing}
102\quad \= $\downarrow$ \quad \= \kill
103\gray{\textsf{C} (unformalized)}\\
104\> \gray{$\downarrow$} \> \gray{CIL parser (unformalized \ocaml)}\\
105\textsf{Clight}\\
106\> $\downarrow$ \> cast removal\\
107\> $\downarrow$ \> add runtime functions\\
108\> $\downarrow$ \> labelling\\
109\> $\downarrow$ \> stack variable allocation and control structure
110 simplification\\
111\textsf{Cminor}\\
112\> $\downarrow$ \> generate global variable initialisation code\\
113\> $\downarrow$ \> transform to RTL graph\\
114\textsf{RTLabs}\\
115\> \,\gray{\vdots} \> \gray{start of target specific backend}
116\end{tabbing}
117
118\begin{itemize}
119\item Adding invariants
120\end{itemize}
121
122}
123
124
125\begin{frame}[fragile]
126\frametitle{Common: identifiers}
127
128Variable and function names, cost labels, registers, CFG labels, \dots
129
130\begin{itemize}
131\item Represented by arbitrarily large binary numbers and trees
132\begin{itemize}
133\item D3.3 described a `lazy failure' approach reusing existing structures
134\item Impractical for adding invariants (types may depend on success of
135  name generation)
136\item Use this CompCert-like system instead
137\end{itemize}
138\item Tags for some type safety:
139\begin{lstlisting}[language=matita]
140inductive identifier (tag:String) : Type[0] ≝
141  an_identifier : Word $\rightarrow$ identifier tag.
142\end{lstlisting}
143\end{itemize}
144
145\end{frame}
146
147\frame{
148\frametitle{Other common definitions}
149
150\begin{itemize}
151\item Memory, global environments from before.
152\item Bit vector based arithmetic from D4.1
153  \begin{itemize}
154  \item added extra operations, increased efficiency
155  \end{itemize}
156\item Common record type for small-step executable semantics
157  \begin{itemize}
158  \item used for animation
159  \item intended to be used when defining simulations
160  \end{itemize}
161\end{itemize}
162
163\bigskip
164\bigskip
165{\Large \blue{Front end operations}}
166
167\bigskip
168\textsf{Cminor} and \textsf{RTLabs} share operations on values.
169
170\begin{itemize}
171\item one syntax, one semantics
172\item straightforward
173\item no overloading (unlike \textsf{Clight})
174\end{itemize}
175
176}
177
178
179\frame{
180\frametitle{Clight: syntax and semantics}
181
182Largely unchanged from D3.1, except:
183
184\medskip
185\begin{itemize}
186\item use precise bit vectors instead of integer and range proof
187\item adapting to changes in common definitions
188\item added function to produce fresh name generator
189\begin{itemize}
190\item used for adding temporary variables
191\item works by finding largest existing identifier
192\item have shown necessary freshness proof
193\end{itemize}
194\end{itemize}
195}
196
197\begin{frame}[fragile]
198\frametitle{Clight: cast simplification}
199
200\begin{itemize}
201\item C insists on arithmetic promotion
202\item CIL-based \ocaml{} parser adds suitable casts
203\item bad for our target (32-bit ops instead of 8-bit)
204\end{itemize}
205
206Prototype recognises fixed patterns:
207\[ (t)((t_1)e_1\ op\ (t_2)e_2) \]
208and replaces when types are right.  Have done some preliminary proofs that this
209works.
210\begin{itemize}
211\item Deep pattern matching is awkward in Matita
212\item Misses (e.g.) \lstinline[language=C]'(char)((int)a+(int)b+(int)c)'
213\end{itemize}
214
215\red{Instead}: recursive `coerce to desired type' approach.\\
216 (Doesn't get some things,
217 e.g., \lstinline'==')
218% TODO: would like to separate, show example properly, note proofs
219\end{frame}
220
221\frame{
222\frametitle{Clight: labelling}
223
224Adds cost labels to Clight program.
225
226\begin{itemize}
227\item Implemented as a simple recursive function, like prototype
228\item uses common identifiers definition to produce fresh cost labels
229\end{itemize}
230
231The cost labelling is sound and precise, so we will be able to prove
232some syntactic properties required later:
233\begin{enumerate}
234\item every function starts with a cost label
235\item every branching instruction is followed by a cost label
236\item the head of every loop (including \lstinline'goto's) is a cost label
237\end{enumerate}
238
239}
240
241% \frame{
242% \frametitle{Clight: adding runtime functions}
243
244% Replaces ops not supported by target with function calls.
245
246% \begin{itemize}
247% \item Add functions as Clight ASTs (rather than prototype's reparsing approach)
248% \item substituting function calls is horrible
249% \begin{itemize}
250% \item Clight does not permit calls in expressions
251% \item Need to add statements in front of the target expression
252% \item Messy, duplication
253% \item Hard: just noticed that even the prototype gets it wrong
254% \end{itemize}
255% \item Implemented, but\dots
256% \end{itemize}
257
258% \textsc{Proposal}: shift to later in the compilation process
259% \begin{itemize}
260% \item easy substitution
261% \item adjust semantics to emit extra cost labels
262% \item get to use lower level features
263% \end{itemize}
264% }
265
266\begin{frame}[fragile]
267\frametitle{Cminor syntax and semantics}
268
269Similar language to CompCert's but developed from prototype.
270
271\begin{itemize}
272\item Some type enforcement for expressions:
273\end{itemize}
274
275% TODO: update!
276\begin{lstlisting}[language=matita]
277inductive expr : typ $\rightarrow$ Type[0] ≝
278| Id : $\forall$t. ident $\rightarrow$ expr t
279| Cst : $\forall$t. constant $\rightarrow$ expr t
280| Op1 : $\forall$t,t'. unary_operation $\rightarrow$ expr t $\rightarrow$ expr t'
281| Op2 : $\forall$t1,t2,t'. binary_operation $\rightarrow$ expr t1 $\rightarrow$ expr t2 $\rightarrow$ expr t'
282| Mem : $\forall$t,r. memory_chunk $\rightarrow$ expr (ASTptr r) $\rightarrow$ expr t
283| Cond : $\forall$sz,sg,t. expr (ASTint sz sg) $\rightarrow$ expr t $\rightarrow$ expr t $\rightarrow$ expr t
284| Ecost : $\forall$t. costlabel $\rightarrow$ expr t $\rightarrow$ expr t.
285\end{lstlisting}
286\begin{itemize}
287\item enforces some checking during production
288\item directly forces checking that temporaries are fresh
289\item gets rid of some corner cases in translation to RTLabs
290\end{itemize}
291
292Semantics unexciting small-step function.
293\end{frame}
294
295\begin{frame}
296\frametitle{Adding invariants}
297
298Lots of sources of failure in front-end due to badly formed programs:
299\begin{itemize}
300\item missing variables, graph nodes, \dots
301\item badly structured programs
302\end{itemize}
303
304\bigskip
305Have been refining the intermediate languages to rule these out:
306\begin{enumerate}
307\item \gray{Indexing Cminor syntax by nesting depth}
308\item adding type constraints
309\item adding invariants asserting presence of variables, etc.
310\end{enumerate}
311
312\end{frame}
313
314\begin{frame}[fragile]
315\frametitle{Invariants for identifiers}
316
317Invariants change between language syntax/semantics and compilation stages:
318
319\begin{description}
320\item[syntax] constraints between parts of function body and other information
321about the function
322\item[compilation] constraints between statements and expressions and data structures in the translation
323\end{description}
324
325When using or creating function records we prove that we can switch between
326them:
327\begin{lstlisting}[language=matita]
328lemma populates_env : $\forall$l,e,u,l',e',u'.
329  distinct_env ?? l $\rightarrow$ (* distinct names in l *)
330  populate_env e u l = $\langle$l',e',u'$\rangle$ $\rightarrow$
331                         (* build register mapping *)
332  $\forall$i,t. Exists ? ($\lambda$x.x = $\langle$i,t$\rangle$) l $\rightarrow$
333                        (* So anything in the environment... *)
334      Env_has e' i t.  (* maps to something of the correct type *)
335\end{lstlisting}
336
337\end{frame}
338
339\begin{frame}[fragile]
340\frametitle{Invariants in Cminor}
341
342Embedded invariant in the function record:
343\begin{lstlisting}[language=matita,basicstyle=\scriptsize\tt]
344record internal_function : Type[0] ≝
345{ f_return     : option typ
346; f_params     : list (ident $\times$ typ)
347; f_vars       : list (ident $\times$ typ)
348; f_stacksize : nat
349; f_body       : stmt
350; f_inv        :
351  stmt_P ($\lambda$s.
352    stmt_vars   ($\lambda$i,t.Exists ? ($\lambda$x. x = $\langle$i,t$\rangle$) (f_params @ f_vars)) s $\wedge$
353    stmt_labels ($\lambda$l.Exists ? ($\lambda$l'.l' = l) (labels_of f_body))     s)
354  f_body
355}.
356\end{lstlisting}
357\begin{enumerate}
358\item All variables are in the parameters or locals lists\\
359\quad with the correct type
360\item All \lstinline'goto' labels mentioned are defined in the body
361\end{enumerate}
362
363What is \lstinline'stmt_P'?
364\end{frame}
365
366\begin{frame}[fragile]
367\frametitle{Invariants in Cminor}
368
369\begin{lstlisting}[language=matita]
370let rec stmt_P (P:stmt $\rightarrow$ Prop) (s:stmt) on s : Prop ≝
371match s with
372[ St_seq s1 s2 $\Rightarrow$ P s $\wedge$ stmt_P P s1 $\wedge$ stmt_P P s2
373| St_ifthenelse _ _ _ s1 s2 $\Rightarrow$ P s $\wedge$ stmt_P P s1 $\wedge$ stmt_P P s2
374| St_loop s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
375| St_block s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
376| St_label _ s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
377| St_cost _ s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
378| _ $\Rightarrow$ P s
379].
380\end{lstlisting}
381
382Higher order predicate
383\begin{itemize}
384\item When we do pattern matching on associated statement get to unfold
385predicate
386\item Pretty easy to do proof obligations for substatements
387\item similar definition for expressions
388\item not necessary for RTLabs
389\end{itemize}
390
391\end{frame}
392
393\begin{frame}
394\frametitle{Clight to Cminor}
395
396Two main jobs:
397\begin{enumerate}
398\item make memory allocation of variables explicit
399\item use simpler control structures
400\end{enumerate}
401Again, based on prototype rather than CompCert.
402
403\bigskip
404Added type checking:
405\begin{itemize}
406\item satisfies the restrictions for Cminor expressions
407\item could separate out, have a `nice Clight' language
408\end{itemize}
409Similarly, check variable environments are sane.
410\end{frame}
411
412\begin{frame}
413\frametitle{Clight to Cminor proofs}
414
415Beyond the invariants already shown, we need to prove:
416\begin{enumerate}
417\item a simulation using
418  \begin{itemize}
419  \item memory injection (similar to CompCert)
420  \item relation based around source and target's statements and continuations
421  \end{itemize}
422\item syntactic cost labelling properties are preserved
423  \begin{itemize}
424  \item structural induction on function body
425  \end{itemize}
426\end{enumerate}
427\end{frame}
428
429\begin{frame}
430\frametitle{Cminor: initialization}
431
432Replace initialization data by code in the initial function.
433
434\begin{itemize}
435\item straightforward to define
436\item instantiate a slightly different version of the semantics for it
437\begin{itemize}
438\item Can't accidentally forget the pass
439\end{itemize}
440\end{itemize}
441
442\bigskip
443Correctness should follow because the state after the initialisation will
444be the same as the initial state of the original.
445\end{frame}
446
447\begin{frame}[fragile]
448\frametitle{RTLabs: syntax and semantics}
449
450Register Transfer Language with front-end operations.
451
452Control flow graph implemented by generic identifiers map:
453
454\begin{lstlisting}[language=matita]
455definition label ≝ identifier LabelTag.
456definition graph : Type[0] $\rightarrow$ Type[0] ≝ identifier_map LabelTag.
457
458inductive statement : Type[0] ≝
459| St_skip : label $\rightarrow$ statement
460| St_cost : costlabel $\rightarrow$ label $\rightarrow$ statement
461| St_const : register $\rightarrow$ constant $\rightarrow$ label $\rightarrow$ statement
462| St_op1 : $\forall$t,t'. unary_operation t' t $\rightarrow$ register
463   $\rightarrow$ register $\rightarrow$ label $\rightarrow$ statement
464...
465\end{lstlisting}
466
467\begin{itemize}
468\item Shares basic operations (including semantics) with Cminor.
469\item Tags prevent confusion of labels (graph vs.~goto vs.~cost).
470\end{itemize}
471
472Semantics are straightforward interpretation of statements.
473\end{frame}
474
475\begin{frame}[fragile]
476\frametitle{RTLabs: syntax and semantics}
477
478\begin{lstlisting}[language=matita]
479record internal_function : Type[0] ≝
480{ f_labgen    : universe LabelTag
481; f_reggen    : universe RegisterTag
482...
483; f_graph     : graph statement
484; f_closed    : graph_closed f_graph
485; f_typed     : graph_typed (f_locals @ f_params) f_graph
486...
487\end{lstlisting}
488
489Enforce that
490\begin{itemize}
491\item every statement's successor labels are present in the CFG
492\item every statement should be well-typed (for limited type system)
493\end{itemize}
494
495\end{frame}
496
497
498\begin{frame}
499\frametitle{Cminor to RTLabs}
500
501Break down statements and expressions into RTL graph.
502
503\begin{itemize}
504\item Incrementally build function backwards
505\item all state is in function record, like prototype
506\end{itemize}
507
508\bigskip
509May investigate whether a state monad makes invariant management easier.
510
511\bigskip
512Showing graph closure requires
513\begin{itemize}
514\item monotonicity of graph construction
515\item eventual insertion of all \lstinline'goto' destinations
516\end{itemize}
517
518Carry these along in the partially built function record.
519
520\end{frame}
521
522% \begin{frame}[fragile]
523% \frametitle{Cminor nesting depth}
524
525% Want to prevent
526% \begin{lstlisting}[language=C]
527% void f() {
528%   block { loop { exit 5; } }
529% }
530% \end{lstlisting}
531
532% Index statements by block depth
533% \begin{lstlisting}[language=matita]
534% inductive stmt : $\forall$blockdepth:nat. Type[0] ≝
535% | St_skip : $\forall$n. stmt n
536% ...
537% | St_block : $\forall$n. stmt (S n) $\rightarrow$ stmt n
538% | St_exit : $\forall$n. Fin n $\rightarrow$ stmt n
539% ...
540% \end{lstlisting}
541
542% \begin{itemize}
543% \item In semantics index continuations too
544% \item Use \lstinline[language=matita]'Fin n' to make \lstinline'k_exit'
545% a total function
546% \end{itemize}
547
548% (May be able to remove entirely if we redo \lstinline[language=C]{switch}?)
549% \end{frame}
550
551\begin{frame}[fragile]
552\frametitle{Establishing RTLabs invariants}
553
554Use dependent pairs to show invariant along with results.
555
556\begin{lstlisting}[language=matita,escapechar=\%]
557let rec add_expr (le:label_env) (env:env) (ty:typ) (e:expr ty)
558   (Env:expr_vars ty e (present ?? env))
559   (dst:register)
560   (f:partial_fn le)
561 on e: $\Sigma$f':partial_fn le. fn_graph_included le f f' ≝
562
563match e return
564   $\lambda$ty,e.expr_vars ty e (present ?? env) $\rightarrow$
565         $\Sigma$f':partial_fn le. fn_graph_included le f f' with
566[ ...
567| Cst _ c $\Rightarrow$ $\lambda$_. %\guillemotleft%add_fresh_to_graph ? (St_const dst c) f ?, ?%\guillemotright%
568...
569\end{lstlisting}
570
571Continually appeal to monotonicity (\lstinline'fn_graph_included'),
572use unification hints to simplify stepping back.
573
574\end{frame}
575
576% TODO: graph closed
577
578\frame{
579\frametitle{Cminor to RTLabs: cost labels}
580
581}
582
583\frame{
584\frametitle{RTLabs structured traces}
585
586}
587
588% \frame{
589% \frametitle{Difficulties with invariants}
590
591% \begin{itemize}
592% \item Not clear if we can use Russell
593% \begin{itemize}
594% \item Want to use existing proofs rather than generated equalities
595% \item Some non-uniform treatment of cases
596% \end{itemize}
597% \item Stresses Matita somewhat (try forgetting to make something a dependent pair)
598% \item Lots of fiddly proof obligations that are simple, but a pain to deal with
599% \begin{itemize}
600% \item Appeal to monotonicity 3 times, then note that the label was added to the graph here
601% \end{itemize}
602% \item Already verbose, yet don't cover globals.
603% \end{itemize}
604
605% I'd like to see if better use of Matita's features can help with this stuff.
606
607% \bigskip
608% Open to suggestions for better methods.
609% }
610
611\frame{
612\frametitle{Conclusion}
613
614Front-end in good shape:
615\begin{itemize}
616\item have executable semantics,
617\item have compilation stages.
618\end{itemize}
619
620\bigskip
621Refining some areas may be better in the long run.
622
623\bigskip
624Have a decent start on correctness-related activities.
625}
626
627\end{document}
Note: See TracBrowser for help on using the repository browser.