source: Deliverables/D1.2/Presentations/WP3.tex @ 1863

Last change on this file since 1863 was 1863, checked in by campbell, 8 years ago

Minor WP3 revisions.

File size: 17.8 KB
Line 
1\documentclass[nopanel]{beamer}
2\usepackage{graphicx,color}
3
4\usetheme{Frankfurt}
5\logo{\includegraphics[height=1.0cm]{fetopen.png}}
6
7\usepackage[english]{babel}
8%\usepackage{inputenc}
9
10
11%\documentclass[nopanel,ucs]{beamer}
12\usepackage[utf8x]{inputenc}
13%\usepackage[T1,OT1]{fontenc}
14%\usepackage{pifont}
15%\usepackage{aeguill}
16% using aeguill forces us to T1 everywhere, resulting in horribleness
17%  \def\selectguillfont{\fontencoding{T1}\fontfamily{cmtt}\selectfont}%
18%\def\guillemotleft{{\selectguillfont\symbol{19}}}%
19%\def\guillemotright{{\selectguillfont\symbol{20}}}%
20%\SetUnicodeOption{postscript}
21%\usepackage{hyperref}
22\usepackage{latexsym}
23\usepackage{listings}
24
25\def\cerco{CerCo}
26\def\ocaml{OCaml}
27
28\newcommand{\red}[1]{\textcolor{red}{#1}}
29\newcommand{\blue}[1]{\textcolor{blue}{#1}}
30\definecolor{darkgreen}{rgb}{0,0.4,0}
31\newcommand{\green}[1]{\textcolor{darkgreen}{#1}}
32\newcommand{\gray}[1]{\textcolor{gray}{#1}}
33
34%\setbeamertemplate{footline}[page number]
35%\setbeamertemplate{sidebar right}{}
36\setbeamertemplate{navigation symbols}{}
37
38\lstdefinelanguage{coq}
39  {keywords={Definition,Lemma,Theorem,Remark,Qed,Save,Inductive,Record},
40   morekeywords={[2]if,then,else},
41  }
42
43\lstdefinelanguage{matita}
44  {keywords={definition,lemma,theorem,remark,inductive,record,qed,let,rec,match,with,Type,and,on,return},
45   morekeywords={[2]whd,normalize,elim,cases,destruct},
46   mathescape=true,
47   morecomment=[n]{(*}{*)},
48  }
49
50\lstset{language=matita,basicstyle=\small\tt,columns=flexible,breaklines=false,
51        keywordstyle=\color{red}\bfseries,
52        keywordstyle=[2]\color{blue},
53        commentstyle=\color{darkgreen},
54        stringstyle=\color{blue},
55        showspaces=false,showstringspaces=false}
56
57\lstset{extendedchars=false}
58\lstset{inputencoding=utf8x}
59\DeclareUnicodeCharacter{8797}{:=}
60\DeclareUnicodeCharacter{10746}{++}
61\DeclareUnicodeCharacter{9001}{\ensuremath{\langle}}
62\DeclareUnicodeCharacter{9002}{\ensuremath{\rangle}}
63
64
65\begin{document}
66
67\title{CerCo Work Package 3\\
68Verified Compiler --- Front-end}
69\author{Brian Campbell\\ \scriptsize\smallskip
70\href{mailto:Brian.Campbell@ed.ac.uk}{\texttt{Brian.Campbell@ed.ac.uk}}}
71\institute{Laboratory for Foundations of Computer Science\\
72The University of Edinburgh}
73\date{March 16, 2011}
74\frame{\titlepage}
75
76
77\frame{
78\frametitle{Achievements in period 2}
79
80\begin{description}[T3.2]
81\item[T3.2] \alert{Completed}: Matita encoding of compiler front-end
82\item[T3.3] \alert{Completed}: Executable semantics for intermediate  languages
83\item[T3.4] \alert{In progress}: Correctness proofs
84\end{description}
85
86\bigskip
87Deliverables D3.2 and D3.3 submitted.
88
89\bigskip
90This talk is an interleaved presentation of these results.
91
92
93}
94
95
96\frame{
97\frametitle{Outline}
98
99\begin{itemize}
100\item Shared definitions
101%\item The front-end:
102%\end{itemize}
103%\vspace{-3ex}
104\item \begin{tabbing}
105\quad \= $\downarrow$ \quad \= \kill
106\gray{\textsf{C} (unformalized)}\\
107\> \gray{$\downarrow$} \> \gray{CIL parser (unformalized \ocaml)}\\
108\textsf{Clight}\\
109\> $\downarrow$ \> cast removal\\
110\> \gray{$\downarrow$} \> \gray{add runtime functions}\\
111\> $\downarrow$ \> cost labelling\\
112\> $\downarrow$ \> stack variable allocation and control structure
113 simplification\\
114\textsf{Cminor}\\
115\> $\downarrow$ \> generate global variable initialisation code\\
116\> $\downarrow$ \> transform to RTL graph\\
117\textsf{RTLabs}\\
118\> \,\gray{\vdots} \> \gray{start of target specific back-end}
119\end{tabbing}
120%\vspace{-3ex}
121%\begin{itemize}
122\item Structured traces
123\end{itemize}
124
125}
126
127\section{Shared}
128
129\begin{frame}[fragile]
130\frametitle{Shared definitions}
131
132Identifiers: variables, cost labels, CFG labels, \dots
133
134\begin{itemize}
135\item Represented by arbitrarily large binary numbers and trees
136\begin{itemize}
137\item D3.3 `lazy failure' approach reusing existing structures
138\item Invariant's types may depend on success of name generation
139%\item Use this CompCert-like system instead
140\end{itemize}
141\item Tags for some type safety:
142\begin{lstlisting}[language=matita]
143inductive identifier (tag:String) : Type[0] ≝
144  an_identifier : Word $\rightarrow$ identifier tag.
145\end{lstlisting}
146\end{itemize}
147
148Memory, global environments from D3.1\\
149Bit vector based arithmetic from D4.1
150  \begin{itemize}
151  \item added extra operations, increased efficiency
152  \end{itemize}
153%Common record type for small-step executable semantics
154%  \begin{itemize}
155%  \item used for animation
156%  \item intended to be used when defining simulations
157%  \end{itemize}
158
159
160
161\textsf{Cminor} and \textsf{RTLabs} share operations on values.
162
163
164\end{frame}
165
166\section{Clight}
167\frame{
168\frametitle{Clight: syntax and semantics}
169
170Modest evolution from D3.1:
171
172\medskip
173\begin{itemize}
174\item use precise bit vectors instead of integer and range proof
175\item adapting to changes in common definitions
176\item added function to produce fresh name generator
177\begin{itemize}
178\item used for adding temporary variables
179\item works by finding largest existing identifier
180\item have shown necessary freshness proof
181\end{itemize}
182\end{itemize}
183}
184
185\begin{frame}[fragile]
186\frametitle{Clight: cast simplification}
187
188C insists on arithmetic promotion
189\begin{itemize}
190\item CIL-based \ocaml{} parser adds suitable casts
191\item bad for our target (32-bit ops instead of 8-bit)
192\end{itemize}
193
194\medskip
195Prototype recognises fixed patterns to simplify:
196\[ (t)((t_1)e_1\ op\ (t_2)e_2) \]
197\vspace{-4ex}
198\begin{itemize}
199\item Deep pattern matching is awkward in Matita
200\item Misses (e.g.) \lstinline[language=C]'(char)((int)a+(int)b+(int)c)'
201\end{itemize}
202
203\medskip
204\red{Instead}: recursive `coerce to desired type' approach.
205
206Have done some preliminary proofs that this
207works.
208% (Doesn't get some things,
209% e.g., \lstinline'==')
210\end{frame}
211
212\frame{
213\frametitle{Clight: cost labelling}
214
215Adds cost labels to Clight program.
216
217\begin{itemize}
218\item a simple recursive function, like prototype
219\item uses common identifiers definition to produce fresh cost labels
220\end{itemize}
221
222The cost labelling is sound and precise, so we will be able to prove
223some syntactic properties required later:
224\begin{enumerate}
225\item every function starts with a cost label
226\item every branching instruction is followed by a cost label
227\item the head of every loop (including \lstinline'goto's) is a cost label
228\end{enumerate}
229
230}
231
232\section{Cminor}
233\begin{frame}[fragile]
234\frametitle{Cminor syntax and semantics}
235
236Similar to CompCert's --- but developed from prototype.
237
238\begin{itemize}
239\item Some type enforcement for expressions:
240\end{itemize}
241
242\begin{lstlisting}[language=matita,basicstyle=\footnotesize\tt]
243inductive expr : typ $\rightarrow$ Type[0] ≝
244| Id : $\forall$t. ident $\rightarrow$ expr t
245| Cst : $\forall$t. constant $\rightarrow$ expr t
246| Op1 : $\forall$t,t'. unary_operation t t' $\rightarrow$ expr t $\rightarrow$ expr t'
247| Op2 : $\forall$t1,t2,t'. binary_operation $\rightarrow$ expr t1 $\rightarrow$ expr t2 $\rightarrow$ expr t'
248| Mem : $\forall$t,r. memory_chunk $\rightarrow$ expr (ASTptr r) $\rightarrow$ expr t
249| Cond : $\forall$sz,sg,t. expr (ASTint sz sg) $\rightarrow$ expr t $\rightarrow$ expr t $\rightarrow$ expr t
250| Ecost : $\forall$t. costlabel $\rightarrow$ expr t $\rightarrow$ expr t.
251\end{lstlisting}
252\begin{itemize}
253%\item enforces some checking during production
254\item indirectly forces checking that temporaries are fresh
255\item[$\star$] reduces failure cases in translation to RTLabs
256\end{itemize}
257
258Semantics unexciting small-step function.
259\end{frame}
260
261\begin{frame}
262\frametitle{Adding invariants}
263
264Lots of sources of failure in front-end due to badly formed programs:
265\begin{itemize}
266\item missing variables, graph nodes, \dots
267\item badly structured programs
268\end{itemize}
269
270\bigskip
271Have been refining the intermediate languages to rule these out:
272\begin{enumerate}
273\item \gray{Indexing Cminor syntax by nesting depth}
274\item adding type constraints
275\item adding invariants asserting presence of variables, etc.
276\end{enumerate}
277
278\end{frame}
279
280\begin{frame}[fragile]
281\frametitle{Invariants for identifiers}
282
283
284Invariants change between languages and compilation:
285
286\begin{description}
287\item[syntax] constraints between parts of function body and other information
288about the function
289\item[compilation] constraints between statements and expressions and data structures in the translation
290\end{description}
291
292When using or creating function definitions have proved that we can switch between
293invariants:
294\begin{lstlisting}[language=matita,basicstyle=\footnotesize\tt]
295lemma populates_env : $\forall$l,e,u,l',e',u'.
296  distinct_env ?? l $\rightarrow$                    (* distinct names in l *)
297  populate_env e u l = $\langle$l',e',u'$\rangle$ $\rightarrow$ (* build register mapping *)
298  $\forall$i,t. Exists ? ($\lambda$x.x = $\langle$i,t$\rangle$) l $\rightarrow$ (* Anything in the environment... *)
299      Env_has e' i t.      (* maps to something of the correct type *)
300\end{lstlisting}
301
302\vfill
303\end{frame}
304
305\begin{frame}[fragile]
306\frametitle{Invariants in Cminor}
307
308Embedded invariant in the function record:
309\begin{lstlisting}[language=matita,basicstyle=\scriptsize\tt,morekeywords=f_inv]
310record internal_function : Type[0] ≝
311{ f_return     : option typ
312; f_params     : list (ident $\times$ typ)
313; f_vars       : list (ident $\times$ typ)
314; f_stacksize : nat
315; f_body       : stmt
316; f_inv        :
317  stmt_P ($\lambda$s.
318    stmt_vars   ($\lambda$i,t.Exists ? ($\lambda$x. x = $\langle$i,t$\rangle$) (f_params @ f_vars)) s $\wedge$
319    stmt_labels ($\lambda$l.Exists ? ($\lambda$l'.l' = l) (labels_of f_body))     s)
320  f_body
321}.
322\end{lstlisting}
323\begin{enumerate}
324\item All variables are in the parameters or locals lists\\
325\quad --- with the correct type
326\item All \lstinline'goto' labels mentioned are defined in the body
327\end{enumerate}
328
329\end{frame}
330
331
332\section{Clight to Cminor}
333
334\begin{frame}
335\frametitle{Clight to Cminor}
336
337Two main jobs:
338\begin{enumerate}
339\item make memory allocation of variables explicit
340\item use simpler control structures
341\end{enumerate}
342Again, based on prototype rather than CompCert.
343
344\bigskip
345Added type checking:
346\begin{itemize}
347\item satisfies the restrictions for Cminor expressions
348\item could separate out, have a `nice Clight' language
349\end{itemize}
350Similarly, code checks
351\begin{itemize}
352\item variable environments are well-formed
353\item all \lstinline'goto' labels are translated
354\end{itemize}
355\end{frame}
356
357\begin{frame}
358\frametitle{Clight to Cminor proofs}
359
360Beyond the invariants already shown, we will prove:
361
362\medskip
363\begin{enumerate}
364\item a simulation relation
365  \begin{itemize}
366  \item between statement and continuation pairs
367  \item using memory injection (similar to CompCert)
368  \end{itemize}
369\item syntactic cost labelling properties are preserved
370  \begin{itemize}
371  \item structural induction on function body
372  \end{itemize}
373\end{enumerate}
374\end{frame}
375
376\section{Initialisation}
377\begin{frame}
378\frametitle{Cminor: initialization}
379
380Replace initialization data by code in the initial function.
381
382\begin{itemize}
383\item straightforward to define
384\item instantiate a slightly different version of the semantics for it
385\begin{itemize}
386\item Can't accidentally forget the pass
387\end{itemize}
388\end{itemize}
389
390\bigskip
391Correctness should follow because the state after the initialisation will
392be the same as the initial state of the original.
393\end{frame}
394
395\section{RTLabs}
396\begin{frame}[fragile]
397\frametitle{RTLabs: syntax and semantics}
398
399Register Transfer Language with front-end operations.
400
401Control flow graph implemented by generic identifiers map:
402
403\begin{lstlisting}[language=matita,basicstyle=\footnotesize\tt]
404definition label := identifier LabelTag.
405definition graph : Type[0] $\rightarrow$ Type[0] := identifier_map LabelTag.
406
407inductive statement : Type[0] ≝
408| St_skip : label $\rightarrow$ statement
409| St_const : register $\rightarrow$ constant $\rightarrow$ label $\rightarrow$ statement
410| St_op1 : $\forall$t,t'. unary_operation t' t $\rightarrow$ register
411   $\rightarrow$ register $\rightarrow$ label $\rightarrow$ statement
412...
413\end{lstlisting}
414\vspace{-1.5ex}
415\begin{itemize}
416\item Shares basic operations (including semantics) with Cminor.
417\item Tags prevent confusion of labels (graph vs.~goto vs.~cost).
418\end{itemize}
419
420Semantics straightforward interpretation of statements.
421\end{frame}
422
423\begin{frame}[fragile]
424\frametitle{RTLabs: syntax and semantics}
425
426\begin{lstlisting}[language=matita]
427record internal_function : Type[0] ≝
428{ f_labgen    : universe LabelTag
429; f_reggen    : universe RegisterTag
430...
431; f_graph     : graph statement
432; f_closed    : graph_closed f_graph
433; f_typed     : graph_typed (f_locals @ f_params) f_graph
434...
435\end{lstlisting}
436
437Enforce that
438\begin{itemize}
439\item every statement's successor labels are present in the CFG
440\item every statement should be well-typed (for limited type system)
441\end{itemize}
442
443\end{frame}
444
445\section{Cminor to RTLabs}
446\begin{frame}
447\frametitle{Cminor to RTLabs}
448
449Break down statements and expressions into RTL graph.
450
451\begin{itemize}
452\item Incrementally build function backwards
453\item all state is in function record, like prototype
454\end{itemize}
455
456%\bigskip
457%May investigate whether a state monad makes invariant management easier.
458
459\bigskip
460Showing graph closure requires
461\begin{itemize}
462\item monotonicity of graph construction
463\item eventual insertion of all \lstinline'goto' destinations
464\end{itemize}
465
466Carry these along in the partially built function record.
467
468\end{frame}
469
470% \begin{frame}[fragile]
471% \frametitle{Cminor nesting depth}
472
473% Want to prevent
474% \begin{lstlisting}[language=C]
475% void f() {
476%   block { loop { exit 5; } }
477% }
478% \end{lstlisting}
479
480% Index statements by block depth
481% \begin{lstlisting}[language=matita]
482% inductive stmt : $\forall$blockdepth:nat. Type[0] ≝
483% | St_skip : $\forall$n. stmt n
484% ...
485% | St_block : $\forall$n. stmt (S n) $\rightarrow$ stmt n
486% | St_exit : $\forall$n. Fin n $\rightarrow$ stmt n
487% ...
488% \end{lstlisting}
489
490% \begin{itemize}
491% \item In semantics index continuations too
492% \item Use \lstinline[language=matita]'Fin n' to make \lstinline'k_exit'
493% a total function
494% \end{itemize}
495
496% (May be able to remove entirely if we redo \lstinline[language=C]{switch}?)
497% \end{frame}
498
499\begin{frame}[fragile]
500\frametitle{Establishing RTLabs invariants}
501
502Use dependent pairs to show invariant along with results.
503
504\begin{lstlisting}[language=matita,basicstyle=\footnotesize\tt,escapechar=\%]
505let rec add_expr ... (e:expr ty)
506   (Env:expr_vars ty e (present ?? env))   %\quad%  (* invariant *)
507   (f:partial_fn le)
508 on e: $\Sigma$f':partial_fn le. fn_graph_included le f f' ≝
509
510match e return
511   $\lambda$ty,e.expr_vars ty e (present ?? env) $\rightarrow$ ... with
512[ ...
513| Cst _ c $\Rightarrow$ $\lambda$_. %\guillemotleft%add_fresh_to_graph ? (St_const dst c) f ?, ?%\guillemotright%
514...
515\end{lstlisting}
516
517Continually appeal to monotonicity (\lstinline'fn_graph_included').\\
518Use unification hints to simplify stepping back.
519
520\end{frame}
521
522\frame{
523\frametitle{Cminor to RTLabs: cost labels}
524
525Two cost label properties are the same, the third will require more
526work:
527\begin{enumerate}
528\item cost label at head of function
529\item cost label after branching instructions
530\item cost labels at the head of each loop / \lstinline'goto' destination
531\end{enumerate}
532
533No simple notion of the head of a loop or \lstinline'goto' any more.
534
535\medskip
536Instead: will prove in \alert{Cminor} that after following a finite
537number of instructions we reach either
538\begin{itemize}
539\item a cost label, or
540\item the end of the function
541\end{itemize}
542
543}
544
545\section{Structured traces}
546\frame{
547\frametitle{RTLabs structured traces}
548
549Front-end only uses flat traces consisting of single steps.
550
551\bigskip
552The back-end will need the function call structure and the labelling
553properties in order to show that the generated costs are correct.
554
555\begin{itemize}
556\item Traces are structured in sections from cost label to cost label,
557\item the entire execution of function calls nested as a single `step',
558\item a coinductive definition presents non-terminating traces, using the
559  inductive definition for all terminating function calls
560\end{itemize}
561
562\bigskip
563RTLabs chosen because it is the first languages where statements:
564\begin{itemize}
565\item take one step each (modulo function calls)
566\item have individual `addresses'
567\end{itemize}
568}
569
570\frame{
571\frametitle{RTLabs structured traces}
572Have already established the existence of such traces
573\begin{itemize}
574\item termination decided classically
575\item syntactic labelling properties used to build semantic structure
576\item show stack preservation to ensure that function calls \texttt{return}
577  to the correct location
578\item tricky to establish guardedness of definitions
579\end{itemize}
580
581\bigskip
582Next, prove that flattening these traces yields the original.
583}
584
585\frame{
586\frametitle{Conclusion}
587
588Syntax, semantics and translations of prototype now implemented in Matita.
589
590\bigskip
591Have defined and established
592\begin{itemize}
593\item invariants regarding variables, typing and program structure
594\item a rich form of execution trace to pass to the back-end
595\end{itemize}
596
597\medskip
598Work in progress:
599\begin{itemize}
600\item showing functional correctness of the front-end
601\item proving that cost labelling is appropriate, and preserved
602\end{itemize}
603
604Finally, \alert{end-to-end} functional and cost correctness results.
605
606}
607
608\frame{
609\frametitle{Extra detail beyond here...}
610}
611
612\begin{frame}[fragile]
613\frametitle{Invariants in Cminor}
614
615\begin{lstlisting}[language=matita,basicstyle=\footnotesize\tt]
616let rec stmt_P (P:stmt $\rightarrow$ Prop) (s:stmt) on s : Prop ≝
617match s with
618[ St_seq s1 s2 $\Rightarrow$ P s $\wedge$ stmt_P P s1 $\wedge$ stmt_P P s2
619| St_ifthenelse _ _ _ s1 s2 $\Rightarrow$ P s $\wedge$ stmt_P P s1 $\wedge$ stmt_P P s2
620| St_loop s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
621| St_block s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
622| St_label _ s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
623| St_cost _ s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
624| _ $\Rightarrow$ P s
625].
626\end{lstlisting}
627
628Higher order predicate
629\begin{itemize}
630\item When we pattern match on the statement the predicate can unfold
631\item Easy to discharge proof obligations for substatements
632\item similar definition for expressions
633\end{itemize}
634
635\end{frame}
636\end{document}
Note: See TracBrowser for help on using the repository browser.