source: Deliverables/D1.2/Presentations/WP3.tex @ 1830

Last change on this file since 1830 was 1830, checked in by campbell, 7 years ago

Rest of WP3 presentation.

File size: 19.1 KB
Line 
1\documentclass[nopanel]{beamer}
2
3\usetheme{Frankfurt}
4\logo{\includegraphics[height=1.0cm]{fetopen.png}}
5
6\usepackage[english]{babel}
7%\usepackage{inputenc}
8
9
10%\documentclass[nopanel,ucs]{beamer}
11\usepackage[utf8x]{inputenc}
12%\usepackage[T1,OT1]{fontenc}
13%\usepackage{pifont}
14%\usepackage{aeguill}
15% using aeguill forces us to T1 everywhere, resulting in horribleness
16%  \def\selectguillfont{\fontencoding{T1}\fontfamily{cmtt}\selectfont}%
17%\def\guillemotleft{{\selectguillfont\symbol{19}}}%
18%\def\guillemotright{{\selectguillfont\symbol{20}}}%
19%\SetUnicodeOption{postscript}
20%\usepackage{hyperref}
21\usepackage{graphicx,color}
22\usepackage{latexsym}
23\usepackage{listings}
24
25\def\cerco{CerCo}
26\def\ocaml{OCaml}
27
28\newcommand{\red}[1]{\textcolor{red}{#1}}
29\newcommand{\blue}[1]{\textcolor{blue}{#1}}
30\definecolor{darkgreen}{rgb}{0,0.4,0}
31\newcommand{\green}[1]{\textcolor{darkgreen}{#1}}
32\newcommand{\gray}[1]{\textcolor{gray}{#1}}
33
34%\setbeamertemplate{footline}[page number]
35%\setbeamertemplate{sidebar right}{}
36
37\lstdefinelanguage{coq}
38  {keywords={Definition,Lemma,Theorem,Remark,Qed,Save,Inductive,Record},
39   morekeywords={[2]if,then,else},
40  }
41
42\lstdefinelanguage{matita}
43  {keywords={definition,lemma,theorem,remark,inductive,record,qed,let,rec,match,with,Type,and,on,return},
44   morekeywords={[2]whd,normalize,elim,cases,destruct},
45   mathescape=true,
46   morecomment=[n]{(*}{*)},
47  }
48
49\lstset{language=matita,basicstyle=\small\tt,columns=flexible,breaklines=false,
50        keywordstyle=\color{red}\bfseries,
51        keywordstyle=[2]\color{blue},
52        commentstyle=\color{green},
53        stringstyle=\color{blue},
54        showspaces=false,showstringspaces=false}
55
56\lstset{extendedchars=false}
57\lstset{inputencoding=utf8x}
58\DeclareUnicodeCharacter{8797}{:=}
59\DeclareUnicodeCharacter{10746}{++}
60\DeclareUnicodeCharacter{9001}{\ensuremath{\langle}}
61\DeclareUnicodeCharacter{9002}{\ensuremath{\rangle}}
62
63
64\begin{document}
65
66\title{CerCo Work Package 3\\
67Verified Compiler --- Front-end}
68\author{Brian Campbell\\ \scriptsize\smallskip
69\href{mailto:Brian.Campbell@ed.ac.uk}{\texttt{Brian.Campbell@ed.ac.uk}}}
70\institute{Laboratory for Foundations of Computer Science\\
71The University of Edinburgh}
72\date{March 16, 2011}
73\frame{\titlepage}
74
75
76\frame{
77\frametitle{Introduction}
78
79An interleaved presentation of work on:
80\begin{description}
81\item[T3.2] The encoding of the front-end of the \cerco{} compiler in CIC
82\item[T3.3] Defining the executable semantics of the front-end's intermediate
83  language
84\item[T3.4] Correctness proofs
85\end{description}
86
87\bigskip
88Deliverables D3.2 and D3.3 have been produced.
89
90}
91
92
93\frame{
94\frametitle{Outline}
95
96\begin{itemize}
97\item Common definitions for everything
98\item The front end:
99\end{itemize}
100
101\begin{tabbing}
102\quad \= $\downarrow$ \quad \= \kill
103\gray{\textsf{C} (unformalized)}\\
104\> \gray{$\downarrow$} \> \gray{CIL parser (unformalized \ocaml)}\\
105\textsf{Clight}\\
106\> $\downarrow$ \> cast removal\\
107\> $\downarrow$ \> add runtime functions\\
108\> $\downarrow$ \> labelling\\
109\> $\downarrow$ \> stack variable allocation and control structure
110 simplification\\
111\textsf{Cminor}\\
112\> $\downarrow$ \> generate global variable initialisation code\\
113\> $\downarrow$ \> transform to RTL graph\\
114\textsf{RTLabs}\\
115\> \,\gray{\vdots} \> \gray{start of target specific backend}
116\end{tabbing}
117
118\begin{itemize}
119\item Adding invariants
120\end{itemize}
121
122}
123
124\section{Common}
125
126\begin{frame}[fragile]
127\frametitle{Common: identifiers}
128
129Variable and function names, cost labels, registers, CFG labels, \dots
130
131\begin{itemize}
132\item Represented by arbitrarily large binary numbers and trees
133\begin{itemize}
134\item D3.3 described a `lazy failure' approach reusing existing structures
135\item Impractical for adding invariants (types may depend on success of
136  name generation)
137\item Use this CompCert-like system instead
138\end{itemize}
139\item Tags for some type safety:
140\begin{lstlisting}[language=matita]
141inductive identifier (tag:String) : Type[0] ≝
142  an_identifier : Word $\rightarrow$ identifier tag.
143\end{lstlisting}
144\end{itemize}
145
146\end{frame}
147
148\frame{
149\frametitle{Other common definitions}
150
151\begin{itemize}
152\item Memory, global environments from before.
153\item Bit vector based arithmetic from D4.1
154  \begin{itemize}
155  \item added extra operations, increased efficiency
156  \end{itemize}
157\item Common record type for small-step executable semantics
158  \begin{itemize}
159  \item used for animation
160  \item intended to be used when defining simulations
161  \end{itemize}
162\end{itemize}
163
164\bigskip
165\bigskip
166{\Large \blue{Front end operations}}
167
168\bigskip
169\textsf{Cminor} and \textsf{RTLabs} share operations on values.
170
171\begin{itemize}
172\item one syntax, one semantics
173\item straightforward
174\item no overloading (unlike \textsf{Clight})
175\end{itemize}
176
177}
178
179\section{Clight}
180\frame{
181\frametitle{Clight: syntax and semantics}
182
183Largely unchanged from D3.1, except:
184
185\medskip
186\begin{itemize}
187\item use precise bit vectors instead of integer and range proof
188\item adapting to changes in common definitions
189\item added function to produce fresh name generator
190\begin{itemize}
191\item used for adding temporary variables
192\item works by finding largest existing identifier
193\item have shown necessary freshness proof
194\end{itemize}
195\end{itemize}
196}
197
198\begin{frame}[fragile]
199\frametitle{Clight: cast simplification}
200
201\begin{itemize}
202\item C insists on arithmetic promotion
203\item CIL-based \ocaml{} parser adds suitable casts
204\item bad for our target (32-bit ops instead of 8-bit)
205\end{itemize}
206
207Prototype recognises fixed patterns:
208\[ (t)((t_1)e_1\ op\ (t_2)e_2) \]
209and replaces when types are right.  Have done some preliminary proofs that this
210works.
211\begin{itemize}
212\item Deep pattern matching is awkward in Matita
213\item Misses (e.g.) \lstinline[language=C]'(char)((int)a+(int)b+(int)c)'
214\end{itemize}
215
216\red{Instead}: recursive `coerce to desired type' approach.\\
217 (Doesn't get some things,
218 e.g., \lstinline'==')
219% TODO: would like to separate, show example properly, note proofs
220\end{frame}
221
222\frame{
223\frametitle{Clight: labelling}
224
225Adds cost labels to Clight program.
226
227\begin{itemize}
228\item Implemented as a simple recursive function, like prototype
229\item uses common identifiers definition to produce fresh cost labels
230\end{itemize}
231
232The cost labelling is sound and precise, so we will be able to prove
233some syntactic properties required later:
234\begin{enumerate}
235\item every function starts with a cost label
236\item every branching instruction is followed by a cost label
237\item the head of every loop (including \lstinline'goto's) is a cost label
238\end{enumerate}
239
240}
241
242% \frame{
243% \frametitle{Clight: adding runtime functions}
244
245% Replaces ops not supported by target with function calls.
246
247% \begin{itemize}
248% \item Add functions as Clight ASTs (rather than prototype's reparsing approach)
249% \item substituting function calls is horrible
250% \begin{itemize}
251% \item Clight does not permit calls in expressions
252% \item Need to add statements in front of the target expression
253% \item Messy, duplication
254% \item Hard: just noticed that even the prototype gets it wrong
255% \end{itemize}
256% \item Implemented, but\dots
257% \end{itemize}
258
259% \textsc{Proposal}: shift to later in the compilation process
260% \begin{itemize}
261% \item easy substitution
262% \item adjust semantics to emit extra cost labels
263% \item get to use lower level features
264% \end{itemize}
265% }
266\section{Cminor}
267\begin{frame}[fragile]
268\frametitle{Cminor syntax and semantics}
269
270Similar language to CompCert's but developed from prototype.
271
272\begin{itemize}
273\item Some type enforcement for expressions:
274\end{itemize}
275
276% TODO: update!
277\begin{lstlisting}[language=matita]
278inductive expr : typ $\rightarrow$ Type[0] ≝
279| Id : $\forall$t. ident $\rightarrow$ expr t
280| Cst : $\forall$t. constant $\rightarrow$ expr t
281| Op1 : $\forall$t,t'. unary_operation $\rightarrow$ expr t $\rightarrow$ expr t'
282| Op2 : $\forall$t1,t2,t'. binary_operation $\rightarrow$ expr t1 $\rightarrow$ expr t2 $\rightarrow$ expr t'
283| Mem : $\forall$t,r. memory_chunk $\rightarrow$ expr (ASTptr r) $\rightarrow$ expr t
284| Cond : $\forall$sz,sg,t. expr (ASTint sz sg) $\rightarrow$ expr t $\rightarrow$ expr t $\rightarrow$ expr t
285| Ecost : $\forall$t. costlabel $\rightarrow$ expr t $\rightarrow$ expr t.
286\end{lstlisting}
287\begin{itemize}
288\item enforces some checking during production
289\item directly forces checking that temporaries are fresh
290\item gets rid of some corner cases in translation to RTLabs
291\end{itemize}
292
293Semantics unexciting small-step function.
294\end{frame}
295
296\begin{frame}
297\frametitle{Adding invariants}
298
299Lots of sources of failure in front-end due to badly formed programs:
300\begin{itemize}
301\item missing variables, graph nodes, \dots
302\item badly structured programs
303\end{itemize}
304
305\bigskip
306Have been refining the intermediate languages to rule these out:
307\begin{enumerate}
308\item \gray{Indexing Cminor syntax by nesting depth}
309\item adding type constraints
310\item adding invariants asserting presence of variables, etc.
311\end{enumerate}
312
313\end{frame}
314
315\begin{frame}[fragile]
316\frametitle{Invariants for identifiers}
317
318Invariants change between language syntax/semantics and compilation stages:
319
320\begin{description}
321\item[syntax] constraints between parts of function body and other information
322about the function
323\item[compilation] constraints between statements and expressions and data structures in the translation
324\end{description}
325
326When using or creating function records we prove that we can switch between
327them:
328\begin{lstlisting}[language=matita]
329lemma populates_env : $\forall$l,e,u,l',e',u'.
330  distinct_env ?? l $\rightarrow$ (* distinct names in l *)
331  populate_env e u l = $\langle$l',e',u'$\rangle$ $\rightarrow$
332                         (* build register mapping *)
333  $\forall$i,t. Exists ? ($\lambda$x.x = $\langle$i,t$\rangle$) l $\rightarrow$
334                        (* So anything in the environment... *)
335      Env_has e' i t.  (* maps to something of the correct type *)
336\end{lstlisting}
337
338\end{frame}
339
340\begin{frame}[fragile]
341\frametitle{Invariants in Cminor}
342
343Embedded invariant in the function record:
344\begin{lstlisting}[language=matita,basicstyle=\scriptsize\tt]
345record internal_function : Type[0] ≝
346{ f_return     : option typ
347; f_params     : list (ident $\times$ typ)
348; f_vars       : list (ident $\times$ typ)
349; f_stacksize : nat
350; f_body       : stmt
351; f_inv        :
352  stmt_P ($\lambda$s.
353    stmt_vars   ($\lambda$i,t.Exists ? ($\lambda$x. x = $\langle$i,t$\rangle$) (f_params @ f_vars)) s $\wedge$
354    stmt_labels ($\lambda$l.Exists ? ($\lambda$l'.l' = l) (labels_of f_body))     s)
355  f_body
356}.
357\end{lstlisting}
358\begin{enumerate}
359\item All variables are in the parameters or locals lists\\
360\quad with the correct type
361\item All \lstinline'goto' labels mentioned are defined in the body
362\end{enumerate}
363
364What is \lstinline'stmt_P'?
365\end{frame}
366
367\begin{frame}[fragile]
368\frametitle{Invariants in Cminor}
369
370\begin{lstlisting}[language=matita]
371let rec stmt_P (P:stmt $\rightarrow$ Prop) (s:stmt) on s : Prop ≝
372match s with
373[ St_seq s1 s2 $\Rightarrow$ P s $\wedge$ stmt_P P s1 $\wedge$ stmt_P P s2
374| St_ifthenelse _ _ _ s1 s2 $\Rightarrow$ P s $\wedge$ stmt_P P s1 $\wedge$ stmt_P P s2
375| St_loop s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
376| St_block s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
377| St_label _ s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
378| St_cost _ s' $\Rightarrow$ P s $\wedge$ stmt_P P s'
379| _ $\Rightarrow$ P s
380].
381\end{lstlisting}
382
383Higher order predicate
384\begin{itemize}
385\item When we do pattern matching on associated statement get to unfold
386predicate
387\item Pretty easy to do proof obligations for substatements
388\item similar definition for expressions
389\item not necessary for RTLabs
390\end{itemize}
391
392\end{frame}
393
394
395\section{Clight to Cminor}
396
397\begin{frame}
398\frametitle{Clight to Cminor}
399
400Two main jobs:
401\begin{enumerate}
402\item make memory allocation of variables explicit
403\item use simpler control structures
404\end{enumerate}
405Again, based on prototype rather than CompCert.
406
407\bigskip
408Added type checking:
409\begin{itemize}
410\item satisfies the restrictions for Cminor expressions
411\item could separate out, have a `nice Clight' language
412\end{itemize}
413Similarly, check variable environments are sane.
414\end{frame}
415
416\begin{frame}
417\frametitle{Clight to Cminor proofs}
418
419Beyond the invariants already shown, we need to prove:
420\begin{enumerate}
421\item a simulation using
422  \begin{itemize}
423  \item memory injection (similar to CompCert)
424  \item relation based around source and target's statements and continuations
425  \end{itemize}
426\item syntactic cost labelling properties are preserved
427  \begin{itemize}
428  \item structural induction on function body
429  \end{itemize}
430\end{enumerate}
431\end{frame}
432
433\section{Initialisation}
434\begin{frame}
435\frametitle{Cminor: initialization}
436
437Replace initialization data by code in the initial function.
438
439\begin{itemize}
440\item straightforward to define
441\item instantiate a slightly different version of the semantics for it
442\begin{itemize}
443\item Can't accidentally forget the pass
444\end{itemize}
445\end{itemize}
446
447\bigskip
448Correctness should follow because the state after the initialisation will
449be the same as the initial state of the original.
450\end{frame}
451
452\section{RTLabs}
453\begin{frame}[fragile]
454\frametitle{RTLabs: syntax and semantics}
455
456Register Transfer Language with front-end operations.
457
458Control flow graph implemented by generic identifiers map:
459
460\begin{lstlisting}[language=matita]
461definition label ≝ identifier LabelTag.
462definition graph : Type[0] $\rightarrow$ Type[0] ≝ identifier_map LabelTag.
463
464inductive statement : Type[0] ≝
465| St_skip : label $\rightarrow$ statement
466| St_cost : costlabel $\rightarrow$ label $\rightarrow$ statement
467| St_const : register $\rightarrow$ constant $\rightarrow$ label $\rightarrow$ statement
468| St_op1 : $\forall$t,t'. unary_operation t' t $\rightarrow$ register
469   $\rightarrow$ register $\rightarrow$ label $\rightarrow$ statement
470...
471\end{lstlisting}
472
473\begin{itemize}
474\item Shares basic operations (including semantics) with Cminor.
475\item Tags prevent confusion of labels (graph vs.~goto vs.~cost).
476\end{itemize}
477
478Semantics are straightforward interpretation of statements.
479\end{frame}
480
481\begin{frame}[fragile]
482\frametitle{RTLabs: syntax and semantics}
483
484\begin{lstlisting}[language=matita]
485record internal_function : Type[0] ≝
486{ f_labgen    : universe LabelTag
487; f_reggen    : universe RegisterTag
488...
489; f_graph     : graph statement
490; f_closed    : graph_closed f_graph
491; f_typed     : graph_typed (f_locals @ f_params) f_graph
492...
493\end{lstlisting}
494
495Enforce that
496\begin{itemize}
497\item every statement's successor labels are present in the CFG
498\item every statement should be well-typed (for limited type system)
499\end{itemize}
500
501\end{frame}
502
503\section{Cminor to RTLabs}
504\begin{frame}
505\frametitle{Cminor to RTLabs}
506
507Break down statements and expressions into RTL graph.
508
509\begin{itemize}
510\item Incrementally build function backwards
511\item all state is in function record, like prototype
512\end{itemize}
513
514\bigskip
515May investigate whether a state monad makes invariant management easier.
516
517\bigskip
518Showing graph closure requires
519\begin{itemize}
520\item monotonicity of graph construction
521\item eventual insertion of all \lstinline'goto' destinations
522\end{itemize}
523
524Carry these along in the partially built function record.
525
526\end{frame}
527
528% \begin{frame}[fragile]
529% \frametitle{Cminor nesting depth}
530
531% Want to prevent
532% \begin{lstlisting}[language=C]
533% void f() {
534%   block { loop { exit 5; } }
535% }
536% \end{lstlisting}
537
538% Index statements by block depth
539% \begin{lstlisting}[language=matita]
540% inductive stmt : $\forall$blockdepth:nat. Type[0] ≝
541% | St_skip : $\forall$n. stmt n
542% ...
543% | St_block : $\forall$n. stmt (S n) $\rightarrow$ stmt n
544% | St_exit : $\forall$n. Fin n $\rightarrow$ stmt n
545% ...
546% \end{lstlisting}
547
548% \begin{itemize}
549% \item In semantics index continuations too
550% \item Use \lstinline[language=matita]'Fin n' to make \lstinline'k_exit'
551% a total function
552% \end{itemize}
553
554% (May be able to remove entirely if we redo \lstinline[language=C]{switch}?)
555% \end{frame}
556
557\begin{frame}[fragile]
558\frametitle{Establishing RTLabs invariants}
559
560Use dependent pairs to show invariant along with results.
561
562\begin{lstlisting}[language=matita,escapechar=\%]
563let rec add_expr (le:label_env) (env:env) (ty:typ) (e:expr ty)
564   (Env:expr_vars ty e (present ?? env))
565   (dst:register)
566   (f:partial_fn le)
567 on e: $\Sigma$f':partial_fn le. fn_graph_included le f f' ≝
568
569match e return
570   $\lambda$ty,e.expr_vars ty e (present ?? env) $\rightarrow$
571         $\Sigma$f':partial_fn le. fn_graph_included le f f' with
572[ ...
573| Cst _ c $\Rightarrow$ $\lambda$_. %\guillemotleft%add_fresh_to_graph ? (St_const dst c) f ?, ?%\guillemotright%
574...
575\end{lstlisting}
576
577Continually appeal to monotonicity (\lstinline'fn_graph_included'),
578use unification hints to simplify stepping back.
579
580\end{frame}
581
582\frame{
583\frametitle{Cminor to RTLabs: cost labels}
584
585Two of the cost label properties are easy to deal with, the third requires more
586work:
587\begin{enumerate}
588\item cost label at head of function
589\item cost label after branching instructions
590\item cost labels at the head of each loop / \lstinline'goto' destination
591\end{enumerate}
592
593No simple notion of the head of a loop or \lstinline'goto' any more.
594
595Instead will prove in \alert{Cminor} that after following a finite
596number of instructions we reach either
597\begin{itemize}
598\item a cost label, or
599\item the end of the function
600\end{itemize}
601
602}
603
604\section{Structured traces}
605\frame{
606\frametitle{RTLabs structured traces}
607
608Front-end only uses flat traces consisting of single steps.
609
610The back-end will need the function call structure and the labelling
611properties in order to show that the generated costs are correct.
612
613\begin{itemize}
614\item Traces are structured in sections from cost label to cost label,
615\item the entire execution of function calls nested as a single `step',
616\item A coinductive definition presents non-terminating traces, using the
617  inductive definition for all terminating function calls
618\end{itemize}
619
620Have already established the existence of these traces
621\begin{itemize}
622\item termination decided classically
623\item syntactic labelling properties used to build these semantic structure
624\item tricky to establish guardedness of definitions
625\end{itemize}
626}
627
628% \frame{
629% \frametitle{Difficulties with invariants}
630
631% \begin{itemize}
632% \item Not clear if we can use Russell
633% \begin{itemize}
634% \item Want to use existing proofs rather than generated equalities
635% \item Some non-uniform treatment of cases
636% \end{itemize}
637% \item Stresses Matita somewhat (try forgetting to make something a dependent pair)
638% \item Lots of fiddly proof obligations that are simple, but a pain to deal with
639% \begin{itemize}
640% \item Appeal to monotonicity 3 times, then note that the label was added to the graph here
641% \end{itemize}
642% \item Already verbose, yet don't cover globals.
643% \end{itemize}
644
645% I'd like to see if better use of Matita's features can help with this stuff.
646
647% \bigskip
648% Open to suggestions for better methods.
649% }
650
651\frame{
652\frametitle{Conclusion}
653
654The syntax, semantics and translations of the prototype compiler have
655been implemented in Matita.
656
657We have already defined and established
658\begin{itemize}
659\item invariants regarding variables, typing and program structure
660\item a rich form of execution trace to pass to the back-end
661\end{itemize}
662
663We have plans for
664\begin{itemize}
665\item showing functional correctness of the front-end
666\item proving that the cost labelling is appropriate, and is preserved
667\item using the above in the whole compiler functional and cost correctness results.
668\end{itemize}
669
670}
671
672\end{document}
Note: See TracBrowser for help on using the repository browser.