source: Papers/jar-cerco-2017/architecture.tex @ 3637

Last change on this file since 3637 was 3623, checked in by boender, 3 years ago

Added D2.1 as a base for the architecture parts

File size: 36.0 KB
Line 
1\newcommand{\Alt}{ \mid\!\!\mid  }
2
3% CerCo compiler architecture
4%   Description of languages
5%   Target hardware description
6
7\section{Compiler architecture}
8\label{sect.compiler.architecture}
9
10\subsection{The typical CerCo workflow}
11
12\begin{figure}[!t]
13\begin{tabular}{l@{\hspace{0.2cm}}|@{\hspace{0.2cm}}l}
14\begin{lstlisting}
15char a[] = {3, 2, 7, 14};
16char threshold = 4;
17
18int count(char *p, int len) {
19  char j;
20  int found = 0;
21  for (j=0; j < len; j++) {
22    if (*p <= threshold)
23      found++;
24    p++;
25    }
26  return found;
27}
28
29int main() {
30  return count(a,4);
31}
32\end{lstlisting}
33&
34%  $\vcenter{\includegraphics[width=7.5cm]{interaction_diagram.pdf}}$
35\begin{tikzpicture}[
36    baseline={([yshift=-.5ex]current bounding box)},
37    element/.style={draw, text width=1.6cm, on chain, text badly centered},
38    >=stealth
39    ]
40{[start chain=going below, node distance=.5cm]
41\node [element] (cerco) {CerCo\\compiler};
42\node [element] (cost)  {CerCo\\cost plugin};
43{[densely dashed]
44\node [element] (ded)   {Deductive\\platform};
45\node [element] (check) {Proof\\checker};
46}
47}
48\coordinate [left=3.5cm of cerco] (left);
49{[every node/.style={above, text width=3.5cm, text badly centered,
50                     font=\scriptsize}]
51\draw [<-] ([yshift=-1ex]cerco.north west) coordinate (t) --
52    node {C source}
53    (t-|left);
54\draw [->] (cerco) -- (cost);
55\draw [->] ([yshift=1ex]cerco.south west) coordinate (t) --
56    node {C source+\color{red}{cost annotations}}
57    (t-|left) coordinate (cerco out);
58\draw [->] ([yshift=1ex]cost.south west) coordinate (t) --
59    node {C source+\color{red}{cost annotations}\\+\color{blue}{synthesized assertions}}
60    (t-|left) coordinate (out);
61{[densely dashed]
62\draw [<-] ([yshift=-1ex]ded.north west) coordinate (t) --
63    node {C source+\color{red}{cost annotations}\\+\color{blue}{complexity assertions}}
64    (t-|left) coordinate (ded in) .. controls +(-.5, 0) and +(-.5, 0) .. (out);
65\draw [->] ([yshift=1ex]ded.south west) coordinate (t) --
66    node {complexity obligations}
67    (t-|left) coordinate (out);
68\draw [<-] ([yshift=-1ex]check.north west) coordinate (t) --
69    node {complexity proof}
70    (t-|left) .. controls +(-.5, 0) and +(-.5, 0) .. (out);
71\draw [dash phase=2.5pt] (cerco out) .. controls +(-1, 0) and +(-1, 0) ..
72    (ded in);
73}}
74%% user:
75% head
76\draw (current bounding box.west) ++(-.2,.5)
77    circle (.2) ++(0,-.2) -- ++(0,-.1) coordinate (t);
78% arms
79\draw (t) -- +(-.3,-.2);
80\draw (t) -- +(.3,-.2);
81% body
82\draw (t) -- ++(0,-.4) coordinate (t);
83% legs
84\draw (t) -- +(-.2,-.6);
85\draw (t) -- +(.2,-.6);
86\end{tikzpicture}
87\end{tabular}
88\caption{On the left: C code to count the number of elements in an array
89 that are less than or equal to a given threshold. On the right: CerCo's interaction
90 diagram. Components provided by CerCo are drawn with a solid border.}
91\label{test1}
92\end{figure}
93We illustrate the workflow we envisage (on the right
94of~\autoref{test1}) on an example program (on the left
95of~\autoref{test1}).  The user writes the program and feeds it to the
96CerCo compiler, which outputs an instrumented version of the same
97program that updates global variables that record the elapsed
98execution time and the stack space usage.  The red lines in
99\autoref{itest1} introducing variables, functions and function calls
100starting with \lstinline'__cost' and \lstinline'__stack' are the instrumentation introduced by the
101compiler.  For example, the two calls at the start of
102\lstinline'count' say that 4 bytes of stack are required, and that it
103takes 111 cycles to reach the next cost annotation (in the loop body).
104The compiler measures these on the labelled object code that it generates.
105
106 The annotated program can then be enriched with complexity
107assertions in the style of Hoare logic, that are passed to a deductive
108platform (in our case Frama-C). We provide as a Frama-C cost plugin a
109simple automatic synthesiser for complexity assertions which can
110be overridden by the user to increase or decrease accuracy.  These are the blue
111comments starting with \lstinline'/*@' in \autoref{itest1}, written in
112Frama-C's specification language, ACSL.  From the
113assertions, a general purpose deductive platform produces proof
114obligations which in turn can be closed by automatic or interactive
115provers, ending in a proof certificate.
116
117% NB: if you try this example with the live CD you should increase the timeout
118
119Twelve proof obligations are generated from~\autoref{itest1} (to prove
120that the loop invariant holds after one execution if it holds before,
121to prove that the whole program execution takes at most 1358 cycles, and so on).  Note that the synthesised time bound for \lstinline'count',
122$178+214*(1+\text{\lstinline'len'})$ cycles, is parametric in the length of
123the array. The CVC3 prover
124closes all obligations within half a minute on routine commodity
125hardware.  A simpler non-parametric version can be solved in a few
126seconds.
127%
128\fvset{commandchars=\\\{\}}
129\lstset{morecomment=[s][\color{blue}]{/*@}{*/},
130        moredelim=[is][\color{blue}]{$}{$},
131        moredelim=[is][\color{red}]{|}{|},
132        lineskip=-2pt}
133\begin{figure}[!p]
134\begin{lstlisting}
135|int __cost = 33, __stack = 5, __stack_max = 5;|
136|void __cost_incr(int incr) { __cost += incr; }|
137|void __stack_incr(int incr) {
138  __stack += incr;
139  __stack_max = __stack_max < __stack ? __stack : __stack_max;
140}|
141
142char a[4] = {3, 2, 7, 14};  char threshold = 4;
143
144/*@ behavior stack_cost:
145      ensures __stack_max <= __max(\old(__stack_max), 4+\old(__stack));
146      ensures __stack == \old(__stack);
147    behavior time_cost:
148      ensures __cost <= \old(__cost)+(178+214*__max(1+\at(len,Pre), 0));
149*/
150int count(char *p, int len) {
151  char j;  int found = 0;
152  |__stack_incr(4);  __cost_incr(111);|
153  $__l: /* internal */$
154  /*@ for time_cost: loop invariant
155        __cost <= \at(__cost,__l)+
156                  214*(__max(\at((len-j)+1,__l), 0)-__max(1+(len-j), 0));
157      for stack_cost: loop invariant
158        __stack_max == \at(__stack_max,__l);
159      for stack_cost: loop invariant
160        __stack == \at(__stack,__l);
161      loop variant len-j;
162  */
163  for (j = 0; j < len; j++) {
164    |__cost_incr(78);|
165    if (*p <= threshold) { |__cost_incr(136);| found ++; }
166    else { |__cost_incr(114);| }
167    p ++;
168  }
169  |__cost_incr(67);  __stack_incr(-4);|
170  return found;
171}
172
173/*@ behavior stack_cost:
174      ensures __stack_max <= __max(\old(__stack_max), 6+\old(__stack));
175      ensures __stack == \old(__stack);
176    behavior time_cost:
177      ensures __cost <= \old(__cost)+1358;
178*/
179int main(void) {
180  int t;
181  |__stack_incr(2);  __cost_incr(110);|
182  t = count(a,4);
183  |__stack_incr(-2);|
184  return t;
185}
186\end{lstlisting}
187\caption{The instrumented version of the program in \autoref{test1},
188 with instrumentation added by the CerCo compiler in red and cost invariants
189 added by the CerCo Frama-C plugin in blue. The \lstinline'__cost',
190 \lstinline'__stack' and \lstinline'__stack_max' variables hold the elapsed time
191in clock cycles and the current and maximum stack usage. Their initial values
192hold the clock cycles spent in initialising the global data before calling
193\lstinline'main' and the space required by global data (and thus unavailable for
194the stack).
195}
196\label{itest1}
197\end{figure}
198
199\subsection{The compiler}
200
201This section gives an informal overview of the compiler, in particular it
202highlights the main features of the intermediate languages, the purpose of the
203compilation steps, and the optimisations.
204
205\subsubsection{Clight} 
206Clight is a large subset of the C language that we adopt as
207the source language of our compiler.
208It features most of the types and operators
209of C. It includes pointer arithmetic, pointers to
210functions, and \texttt{struct} and \texttt{union} types, as well as
211all C control structures. The main difference with the C
212language is that Clight expressions are side-effect free, which
213means that side-effect operators ($\texttt{=}$,$\texttt{+=}$,$\texttt{++}$,$\ldots$) and function calls
214within expressions are not supported.
215Given a C program, we rely on the CIL tool~\cite{cil02} to deal
216with the idiosyncrasy of  C concrete syntax and to produce an
217equivalent program in Clight abstract syntax.
218We refer to  the CompCert project \cite{compcert}
219for a formal definition of the
220Clight language. Here we just recall  in
221figure \ref{syntax-clight} its syntax which is
222classically structured in expressions, statements, functions,
223and whole programs. In order to limit the implementation effort,
224our current compiler for Clight does {\em not} cover the operators
225relating to the floating point type {\tt float}. So, in a nutshell,
226the fragment of C we have implemented is Clight without
227floating point.
228
229
230
231\begin{figure}
232  \label{syntax-clight}
233  \footnotesize{
234  \begin{tabular}{l l l l}
235    Expressions: & $a$ ::= & $id$               & variable identifier \\
236    & & $|$ $n$                                 & integer constant \\
237%    & & $|$ $f$                                & float constant \\
238    & & $|$ \texttt{sizeof}($\tau$)             & size of a type \\
239    & & $|$ $op_1$ $a$                          & unary arithmetic operation \\
240    & & $|$ $a$ $op_2$ $a$                      & binary arithmetic operation \\
241    & & $|$ $*a$                                & pointer dereferencing \\
242    & & $|$ $a.id$                              & field access \\
243    & & $|$ $\&a$                               & taking the address of \\
244    & & $|$ $(\tau)a$                           & type cast \\
245    & & $|$ $a ? a : a$                         & conditional expression \\[10pt]
246    Statements: & $s$ ::= & \texttt{skip}       & empty statement \\
247    & & $|$ $a=a$                               & assignment \\
248    & & $|$ $a=a(a^*)$                          & function call \\
249    & & $|$ $a(a^*)$                            & procedure call \\
250    & & $|$ $s;s$                               & sequence \\
251    & & $|$ \texttt{if} $a$ \texttt{then} $s$ \texttt{else} $s$ & conditional \\
252    & & $|$ \texttt{switch} $a$ $sw$            & multi-way branch \\
253    & & $|$ \texttt{while} $a$ \texttt{do} $s$  & ``while'' loop \\
254    & & $|$ \texttt{do} $s$ \texttt{while} $a$  & ``do'' loop \\
255    & & $|$ \texttt{for}($s$,$a$,$s$) $s$       & ``for'' loop\\
256    & & $|$ \texttt{break}                      & exit from current loop \\
257    & & $|$ \texttt{continue}                   & next iteration of the current loop \\
258    & & $|$ \texttt{return} $a^?$               & return from current function \\
259    & & $|$ \texttt{goto} $lbl$                 & branching \\
260    & & $|$ $lbl$ : $s$                         & labelled statement \\[10pt]
261    Switch cases: & $sw$ ::= & \texttt{default} : $s$           & default case \\
262    & & $|$ \texttt{case } $n$ : $s;sw$                         & labelled case \\[10pt]
263    Variable declarations: & $dcl$ ::= & $(\tau\quad id)^*$             & type and name\\[10pt]
264    Functions: & $Fd$ ::= & $\tau$ $id(dcl)\{dcl;s\}$   & internal function \\
265    & & $|$ \texttt{extern} $\tau$ $id(dcl)$                    & external function\\[10pt]
266    Programs: & $P$ ::= & $dcl;Fd^*;\texttt{main}=id$           & global variables, functions, entry point
267  \end{tabular}}
268  \caption{Syntax of the Clight language}
269\end{figure}
270
271
272\subsubsection{Cminor}
273
274Cminor is a simple, low-level imperative language, comparable to a
275stripped-down, typeless variant of C. Again we refer
276to the CompCert project for its formal definition
277and we just recall in figure \ref{syntax-cminor}
278its syntax which as for Clight is structured in
279expressions, statements, functions, and whole programs.
280
281\begin{figure}
282  \label{syntax-cminor}
283  \footnotesize{
284  \begin{tabular}{l l l l}
285    Signatures: & $sig$ ::= & \texttt{sig} $\vec{\texttt{int}}$ $(\texttt{int}|\texttt{void})$ & arguments and result \\[10pt]
286    Expressions: & $a$ ::= & $id$               & local variable \\
287     & & $|$ $n$                                & integer constant \\
288%     & & $|$ $f$                               & float constant \\
289     & & $|$ \texttt{addrsymbol}($id$)          & address of global symbol \\
290     & & $|$ \texttt{addrstack}($\delta$)       & address within stack data \\
291     & & $|$ $op_1$ $a$                         & unary arithmetic operation \\
292     & & $|$ $op_2$ $a$ $a$                     & binary arithmetic operation \\
293     & & $|$ $\kappa[a]$                        & memory read\\
294     & & $|$ $a?a:a$                    & conditional expression \\[10pt]
295    Statements: & $s$ ::= & \texttt{skip}       & empty statement \\
296    & & $|$ $id=a$                              & assignment \\
297    & & $|$ $\kappa[a]=a$                       & memory write \\
298    & & $|$ $id^?=a(\vec{a}):sig$               & function call \\
299    & & $|$ \texttt{tailcall} $a(\vec{a}):sig$  & function tail call \\
300    & & $|$ \texttt{return}$(a^?)$              & function return \\
301    & & $|$ $s;s$                               & sequence \\
302    & & $|$ \texttt{if} $a$ \texttt{then} $s$ \texttt{else} $s$         & conditional  \\
303    & & $|$ \texttt{loop} $s$                   & infinite loop \\
304    & & $|$ \texttt{block} $s$          & block delimiting \texttt{exit} constructs \\
305    & & $|$ \texttt{exit} $n$                   & terminate the $(n+1)^{th}$ enclosing block \\
306    & & $|$ \texttt{switch} $a$ $tbl$           & multi-way test and exit\\
307    & & $|$ $lbl:s$                             & labelled statement \\
308    & & $|$ \texttt{goto} $lbl$                 & jump to a label\\[10pt]
309    Switch tables: & $tbl$ ::= & \texttt{default:exit}($n$) & \\
310    & & $|$ \texttt{case} $i$: \texttt{exit}($n$);$tbl$ & \\[10pt]
311    Functions: & $Fd$ ::= & \texttt{internal} $sig$ $\vec{id}$ $\vec{id}$ $n$ $s$ & internal function: signature, parameters, \\
312    & & & local variables, stack size and body \\
313    & & $|$ \texttt{external} $id$ $sig$ & external function \\[10pt]
314    Programs: & $P$ ::= & \texttt{prog} $(id=data)^*$ $(id=Fd)^*$ $id$   & global variables, functions and entry point
315  \end{tabular}}
316  \caption{Syntax of the Cminor language}
317\end{figure}
318
319\paragraph{Translation of Clight to Cminor.}
320As in Cminor stack operations are made explicit, one has to know
321which variables are stored in the stack. This
322information is produced by a static analysis that determines
323the variables whose address may be `taken'.
324Also space is reserved for local arrays and structures. In a
325second step, the proper compilation is performed: it consists mainly
326in translating Clight control structures to the basic
327ones available in Cminor.
328
329\subsubsection{RTLAbs}
330
331RTLAbs is the last architecture independent language in the
332compilation process.  It is a rather straightforward {\em abstraction} of
333the {\em architecture-dependent} RTL intermediate language
334available in the CompCert project and it is intended to factorize
335some work common to the various target assembly languages
336(e.g. optimizations) and thus to make retargeting of the compiler a
337simpler matter.
338
339We stress that in RTLAbs the structure of Cminor expressions
340is lost and that this may have a negative impact on the following
341instruction selection step.
342Still, the subtleties of instruction selection seem rather orthogonal
343to our goals and we deem the possibility of retargeting
344easily the compiler more important than the efficiency of the generated code.
345
346
347
348\paragraph{Syntax.}
349In RTLAbs, programs are represented as \emph{control flow
350  graphs} (CFGs for short). We associate with the nodes of the graphs
351instructions reflecting the Cminor commands. As usual, commands that change the control
352flow of the program (e.g. loops, conditionals) are translated by inserting
353suitable branching instructions in the CFG.
354The syntax of the language is depicted in table
355\ref{RTLAbs:syntax}. Local variables are now represented by \emph{pseudo
356  registers} that are available in unbounded number. The grammar rule $\textit{op}$ that is
357not detailed in table \ref{RTLAbs:syntax} defines usual arithmetic and boolean
358operations (\texttt{+}, \texttt{xor}, \texttt{$\le$}, etc.) as well as constants
359and conversions between sized integers.
360
361\begin{table}
362{\footnotesize
363\[
364\begin{array}{lllllll}
365\textit{return\_type} & ::= & \textsf{int} \Alt \textsf{void} & \qquad
366\textit{signature} & ::= & (\textsf{int} \rightarrow)^*\ \textit{return\_type}\\
367\end{array}
368\]
369
370\[
371\begin{array}{lllllll}
372\textit{memq} & ::= & \textsf{int8s} \Alt \textsf{int8u} \Alt \textsf{int16s} \Alt \textsf{int16u}
373                 \Alt \textsf{int32} & \qquad
374\textit{fun\_ref} & ::= & \textit{fun\_name} \Alt \textit{psd\_reg}
375\end{array}
376\]
377
378\[
379\begin{array}{llll}
380\textit{instruction} & ::= & \Alt \textsf{skip} \rightarrow \textit{node} &
381                        \quad \mbox{(no instruction)}\\
382                &     & \Alt \textit{psd\_reg} := \textit{op}(\textit{psd\_reg}^*)
383                        \rightarrow \textit{node} & \quad \mbox{(operation)}\\
384                &     & \Alt \textit{psd\_reg} := \textsf{\&}\textit{var\_name}
385                        \rightarrow \textit{node} &
386                        \quad \mbox{(address of a global)}\\
387                &     & \Alt \textit{psd\_reg} := \textsf{\&locals}[n]
388                        \rightarrow \textit{node} &
389                        \quad \mbox{(address of a local)}\\
390                &     & \Alt \textit{psd\_reg} := \textit{fun\_name}
391                        \rightarrow \textit{node} &
392                        \quad \mbox{(address of a function)}\\
393                &     & \Alt \textit{psd\_reg} :=
394                        \textit{memq}(\textit{psd\_reg}[\textit{psd\_reg}])
395                        \rightarrow \textit{node} & \quad \mbox{(memory load)}\\
396                &     & \Alt \textit{memq}(\textit{psd\_reg}[\textit{psd\_reg}]) :=
397                        \textit{psd\_reg}
398                        \rightarrow \textit{node} & \quad \mbox{(memory store)}\\
399                &     & \Alt \textit{psd\_reg} := \textit{fun\_ref}({\it psd\_reg^*}) :
400                        \textit{signature} \rightarrow \textit{node} &
401                        \quad \mbox{(function call)}\\
402                &     & \Alt \textit{fun\_ref}({\it psd\_reg^*}) : \textit{signature}
403                        & \quad \mbox{(function tail call)}\\
404                &     & \Alt \textsf{test}\ \textit{op}(\textit{psd\_reg}^*) \rightarrow
405                        \textit{node}, \textit{node} & \quad \mbox{(branch)}\\
406                &     & \Alt \textsf{return}\ \textit{psd\_reg}? & \quad \mbox{(return)}
407\end{array}
408\]
409
410\[
411\begin{array}{lll}
412\textit{fun\_def} & ::= & \textit{fun\_name}(\textit{psd\_reg}^*): \textit{signature}\\
413             &     & \textsf{result:} \textit{psd\_reg}?\\
414             &     & \textsf{locals:} \textit{psd\_reg}^*\\
415             &     & \textsf{stack:} n\\
416             &     & \textsf{entry:} \textit{node}\\
417             &     & \textsf{exit:} \textit{node}\\
418             &     & (\textit{node:} \textit{instruction})^*
419\end{array}
420\]
421
422\[
423\begin{array}{lllllll}
424\textit{init\_datum} & ::= & \textsf{reserve}(n) \Alt \textsf{int8}(n) \Alt \textsf{int16}(n)
425                       \Alt \textsf{int32}(n) & \qquad
426\textit{init\_data} & ::= & \textit{init\_datum}^+
427\end{array}
428\]
429
430\[
431\begin{array}{lllllll}
432\textit{global\_decl} & ::= & \textsf{var}\ \textit{var\_name} \textit{\{init\_data\}} & \qquad
433\textit{fun\_decl} & ::= & \textsf{extern}\ \textit{fun\_name} \textit{(signature)} \Alt
434                      \textit{fun\_def}
435\end{array}
436\]
437
438\[
439\begin{array}{lll}
440\textit{program} & ::= & \textit{global\_decl}^*\\
441            &     & \textit{fun\_decl}^*
442\end{array}
443\]}
444\caption{Syntax of the RTLAbs language}\label{RTLAbs:syntax}
445\end{table}
446
447\paragraph{Translation of Cminor to RTLAbs.}
448Translating Cminor programs to RTLAbs programs mainly consists in
449transforming Cminor commands in CFGs. Most commands are sequential and have a
450rather straightforward linear translation. A conditional is translated in a
451branch instruction; a loop is translated using a back edge in the CFG.
452
453
454
455\subsubsection{RTL}
456
457As in RTLAbs, the structure of RTL programs is based on CFGs. RTL is
458the first architecture-dependant intermediate language of our compiler
459which, in its current version, targets the MCS-51 assembly language.
460
461\paragraph{Syntax.}
462RTL is very close to RTLAbs. It is based on CFGs and explicits the MCS-51
463instructions corresponding to the RTLAbs instructions. Type information
464disappears: everything is represented using 32 bits integers. Moreover, each
465global of the program is associated to an offset. The syntax of the language can
466be found in table \ref{RTL:syntax}. The grammar rules $\textit{unop}$, $\textit{binop}$,
467$\textit{uncon}$, and $\textit{bincon}$, respectively, represent the sets of unary
468operations, binary operations, unary conditions and binary conditions of the
469MCS-51 language.
470
471\begin{table}
472{\footnotesize
473\[
474\begin{array}{lllllll}
475\textit{size} & ::= & \textsf{Byte} \Alt \textsf{HalfWord} \Alt \textsf{Word}  & \qquad
476\textit{fun\_ref} & ::= & \textit{fun\_name} \Alt \textit{psd\_reg}
477\end{array}
478\]
479
480\[
481\begin{array}{llll}
482\textit{instruction} & ::= & \Alt \textsf{skip} \rightarrow \textit{node} &
483                        \quad \mbox{(no instruction)}\\
484                &     & \Alt \textit{psd\_reg} := n
485                        \rightarrow \textit{node} & \quad \mbox{(constant)}\\
486                &     & \Alt \textit{psd\_reg} := \textit{unop}(\textit{psd\_reg})
487                        \rightarrow \textit{node} & \quad \mbox{(unary operation)}\\
488                &     & \Alt \textit{psd\_reg} :=
489                        \textit{binop}(\textit{psd\_reg},\textit{psd\_reg})
490                        \rightarrow \textit{node} & \quad
491                        \mbox{(binary operation)}\\
492                &     & \Alt \textit{psd\_reg} := \textsf{\&globals}[n]
493                        \rightarrow \textit{node} &
494                        \quad \mbox{(address of a global)}\\
495                &     & \Alt \textit{psd\_reg} := \textsf{\&locals}[n]
496                        \rightarrow \textit{node} &
497                        \quad \mbox{(address of a local)}\\
498                &     & \Alt \textit{psd\_reg} := \textit{fun\_name}
499                        \rightarrow \textit{node} &
500                        \quad \mbox{(address of a function)}\\
501                &     & \Alt \textit{psd\_reg} :=
502                        \textit{size}(\textit{psd\_reg}[n])
503                        \rightarrow \textit{node} & \quad \mbox{(memory load)}\\
504                &     & \Alt \textit{size}(\textit{psd\_reg}[n]) :=
505                        \textit{psd\_reg}
506                        \rightarrow \textit{node} & \quad \mbox{(memory store)}\\
507                &     & \Alt \textit{psd\_reg} := \textit{fun\_ref}({\it psd\_reg^*})
508                        \rightarrow \textit{node} &
509                        \quad \mbox{(function call)}\\
510                &     & \Alt \textit{fun\_ref}({\it psd\_reg^*})
511                        & \quad \mbox{(function tail call)}\\
512                &     & \Alt \textsf{test}\ \textit{uncon}(\textit{psd\_reg}) \rightarrow
513                        \textit{node}, \textit{node} & \quad
514                        \mbox{(branch unary condition)}\\
515                &     & \Alt \textsf{test}\ \textit{bincon}(\textit{psd\_reg},\textit{psd\_reg})
516                        \rightarrow \textit{node}, \textit{node} & \quad
517                        \mbox{(branch binary condition)}\\
518                &     & \Alt \textsf{return}\ \textit{psd\_reg}? & \quad
519                        \mbox{(return)}
520\end{array}
521\]
522
523\[
524\begin{array}{lllllll}
525\textit{fun\_def} & ::= & \textit{fun\_name}(\textit{psd\_reg}^*) & \qquad
526\textit{program} & ::= & \textsf{globals}: n\\
527             &     & \textsf{result:} \textit{psd\_reg}? & \qquad
528            &     & \textit{fun\_def}^*\\
529             &     & \textsf{locals:} \textit{psd\_reg}^*\\
530             &     & \textsf{stack:} n\\
531             &     & \textsf{entry:} \textit{node}\\
532             &     & \textsf{exit:} \textit{node}\\
533             &     & (\textit{node:} \textit{instruction})^*
534\end{array}
535\]}
536\caption{Syntax of the RTL language}\label{RTL:syntax}
537\end{table}
538
539\paragraph{Translation of RTLAbs to RTL.}
540This translation is mostly straightforward. A RTLAbs instruction is often
541directly translated to a corresponding MCS-51 instruction. There are a few
542exceptions: some RTLAbs instructions are expanded in two or more MCS-51
543instructions. When the translation of a RTLAbs instruction requires more than
544a few simple MCS-51 instruction, it is translated into a call to a function
545defined in the preamble of the compilation result.
546
547\subsubsection{ERTL}
548
549As in RTL, the structure of ERTL programs is based on CFGs. ERTL
550explicits the calling conventions of the MCS-51 assembly language.
551
552\paragraph{Syntax.}
553The syntax of the language is given in table \ref{ERTL:syntax}. The main
554difference between RTL and ERTL is the use of hardware registers.
555Parameters are passed in specific hardware registers; if there are too many
556parameters, the remaining are stored in the stack. Other conventionally specific
557hardware registers are used: a register that holds the result of a function, a
558register that holds the base address of the globals, a register that holds the
559address of the top of the stack, and some registers that need to be saved when
560entering a function and whose values are restored when leaving a
561function. Following these conventions, function calls do not list their
562parameters anymore; they only mention their number. Two new instructions appear
563to allocate and deallocate on the stack some space needed by a function to
564execute. Along with these two instructions come two instructions to fetch or
565assign a value in the parameter sections of the stack; these instructions cannot
566yet be translated using regular load and store instructions because we do not
567know the final size of the stack area of each function. At last, the return
568instruction has a boolean argument that tells whether the result of the function
569may later be used or not (this is exploited for optimizations).
570
571\begin{table}
572{\footnotesize
573\[
574\begin{array}{lllllll}
575\textit{size} & ::= & \textsf{Byte} \Alt \textsf{HalfWord} \Alt \textsf{Word}  & \qquad
576\textit{fun\_ref} & ::= & \textit{fun\_name} \Alt \textit{psd\_reg}
577\end{array}
578\]
579
580\[
581\begin{array}{llll}
582\textit{instruction} & ::= & \Alt \textsf{skip} \rightarrow \textit{node} &
583                        \quad \mbox{(no instruction)}\\
584                &     & \Alt \textsf{NewFrame} \rightarrow \textit{node} &
585                        \quad \mbox{(frame creation)}\\
586                &     & \Alt \textsf{DelFrame} \rightarrow \textit{node} &
587                        \quad \mbox{(frame deletion)}\\
588                &     & \Alt \textit{psd\_reg} := \textsf{stack}[\textit{slot}, n]
589                        \rightarrow \textit{node} &
590                        \quad \mbox{(stack load)}\\
591                &     & \Alt \textsf{stack}[\textit{slot}, n] := \textit{psd\_reg}
592                        \rightarrow \textit{node} &
593                        \quad \mbox{(stack store)}\\
594                &     & \Alt \textit{hdw\_reg} := \textit{psd\_reg}
595                        \rightarrow \textit{node} &
596                        \quad \mbox{(pseudo to hardware)}\\
597                &     & \Alt \textit{psd\_reg} := \textit{hdw\_reg}
598                        \rightarrow \textit{node} &
599                        \quad \mbox{(hardware to pseudo)}\\
600                &     & \Alt \textit{psd\_reg} := n
601                        \rightarrow \textit{node} & \quad \mbox{(constant)}\\
602                &     & \Alt \textit{psd\_reg} := \textit{unop}(\textit{psd\_reg})
603                        \rightarrow \textit{node} & \quad \mbox{(unary operation)}\\
604                &     & \Alt \textit{psd\_reg} :=
605                        \textit{binop}(\textit{psd\_reg},\textit{psd\_reg})
606                        \rightarrow \textit{node} & \quad
607                        \mbox{(binary operation)}\\
608                &     & \Alt \textit{psd\_reg} := \textit{fun\_name}
609                        \rightarrow \textit{node} &
610                        \quad \mbox{(address of a function)}\\
611                &     & \Alt \textit{psd\_reg} :=
612                        \textit{size}(\textit{psd\_reg}[n])
613                        \rightarrow \textit{node} & \quad \mbox{(memory load)}\\
614                &     & \Alt \textit{size}(\textit{psd\_reg}[n]) :=
615                        \textit{psd\_reg}
616                        \rightarrow \textit{node} & \quad \mbox{(memory store)}\\
617                &     & \Alt \textit{fun\_ref}(n) \rightarrow \textit{node} &
618                        \quad \mbox{(function call)}\\
619                &     & \Alt \textit{fun\_ref}(n)
620                        & \quad \mbox{(function tail call)}\\
621                &     & \Alt \textsf{test}\ \textit{uncon}(\textit{psd\_reg}) \rightarrow
622                        \textit{node}, \textit{node} & \quad
623                        \mbox{(branch unary condition)}\\
624                &     & \Alt \textsf{test}\ \textit{bincon}(\textit{psd\_reg},\textit{psd\_reg})
625                        \rightarrow \textit{node}, \textit{node} & \quad
626                        \mbox{(branch binary condition)}\\
627                &     & \Alt \textsf{return}\ b & \quad \mbox{(return)}
628\end{array}
629\]
630
631\[
632\begin{array}{lllllll}
633\textit{fun\_def} & ::= & \textit{fun\_name}(n) & \qquad
634\textit{program} & ::= & \textsf{globals}: n\\
635             &     & \textsf{locals:} \textit{psd\_reg}^* & \qquad
636            &     & \textit{fun\_def}^*\\
637             &     & \textsf{stack:} n\\
638             &     & \textsf{entry:} \textit{node}\\
639             &     & (\textit{node:} \textit{instruction})^*
640\end{array}
641\]}
642\caption{Syntax of the ERTL language}\label{ERTL:syntax}
643\end{table}
644
645\paragraph{Translation of RTL to ERTL.}
646The work consists in expliciting the conventions previously mentioned. These
647conventions appear when entering, calling and leaving a function, and when
648referencing a global variable or the address of a local variable.
649
650\paragraph{Optimizations.}
651A \emph{liveness analysis} is performed on ERTL to replace unused
652instructions by a $\textsf{skip}$. An instruction is tagged as unused when it
653performs an assignment on a register that will not be read afterwards. Also, the
654result of the liveness analysis is exploited by a \emph{register allocation}
655algorithm whose result is to efficiently associate a physical location (a
656hardware register or an address in the stack) to each pseudo register of the
657program.
658
659\subsubsection{LTL}
660
661As in ERTL, the structure of LTL programs is based on CFGs. Pseudo
662registers are not used anymore; instead, they are replaced by physical locations
663(a hardware register or an address in the stack).
664
665\paragraph{Syntax.}
666Except for a few exceptions, the instructions of the language are those of
667ERTL with hardware registers replacing pseudo registers. Calling and
668returning conventions were explicited in ERTL; thus, function calls and
669returns do not need parameters in LTL. The syntax is defined in table
670\ref{LTL:syntax}.
671
672\begin{table}
673{\footnotesize
674\[
675\begin{array}{lllllll}
676\textit{size} & ::= & \textsf{Byte} \Alt \textsf{HalfWord} \Alt \textsf{Word}  & \qquad
677\textit{fun\_ref} & ::= & \textit{fun\_name} \Alt \textit{hdw\_reg}
678\end{array}
679\]
680
681\[
682\begin{array}{llll}
683\textit{instruction} & ::= & \Alt \textsf{skip} \rightarrow \textit{node} &
684                        \quad \mbox{(no instruction)}\\
685                &     & \Alt \textsf{NewFrame} \rightarrow \textit{node} &
686                        \quad \mbox{(frame creation)}\\
687                &     & \Alt \textsf{DelFrame} \rightarrow \textit{node} &
688                        \quad \mbox{(frame deletion)}\\
689                &     & \Alt \textit{hdw\_reg} := n
690                        \rightarrow \textit{node} & \quad \mbox{(constant)}\\
691                &     & \Alt \textit{hdw\_reg} := \textit{unop}(\textit{hdw\_reg})
692                        \rightarrow \textit{node} & \quad \mbox{(unary operation)}\\
693                &     & \Alt \textit{hdw\_reg} :=
694                        \textit{binop}(\textit{hdw\_reg},\textit{hdw\_reg})
695                        \rightarrow \textit{node} & \quad
696                        \mbox{(binary operation)}\\
697                &     & \Alt \textit{hdw\_reg} := \textit{fun\_name}
698                        \rightarrow \textit{node} &
699                        \quad \mbox{(address of a function)}\\
700                &     & \Alt \textit{hdw\_reg} := \textit{size}(\textit{hdw\_reg}[n])
701                        \rightarrow \textit{node} & \quad \mbox{(memory load)}\\
702                &     & \Alt \textit{size}(\textit{hdw\_reg}[n]) := \textit{hdw\_reg}
703                        \rightarrow \textit{node} & \quad \mbox{(memory store)}\\
704                &     & \Alt \textit{fun\_ref}() \rightarrow \textit{node} &
705                        \quad \mbox{(function call)}\\
706                &     & \Alt \textit{fun\_ref}()
707                        & \quad \mbox{(function tail call)}\\
708                &     & \Alt \textsf{test}\ \textit{uncon}(\textit{hdw\_reg}) \rightarrow
709                        \textit{node}, \textit{node} & \quad
710                        \mbox{(branch unary condition)}\\
711                &     & \Alt \textsf{test}\ \textit{bincon}(\textit{hdw\_reg},\textit{hdw\_reg})
712                        \rightarrow \textit{node}, \textit{node} & \quad
713                        \mbox{(branch binary condition)}\\
714                &     & \Alt \textsf{return} & \quad \mbox{(return)}
715\end{array}
716\]
717
718\[
719\begin{array}{lllllll}
720\textit{fun\_def} & ::= & \textit{fun\_name}(n) & \qquad
721\textit{program} & ::= & \textsf{globals}: n\\
722             &     & \textsf{locals:} n & \qquad
723            &     & \textit{fun\_def}^*\\
724             &     & \textsf{stack:} n\\
725             &     & \textsf{entry:} \textit{node}\\
726             &     & (\textit{node:} \textit{instruction})^*
727\end{array}
728\]}
729\caption{Syntax of the LTL language}\label{LTL:syntax}
730\end{table}
731
732\paragraph{Translation of ERTL to LTL.} The translation relies on the
733results of the liveness analysis and of the register allocation. Unused
734instructions are eliminated and each pseudo register is replaced by a physical
735location. In LTL, the size of the stack frame of a function is known;
736instructions intended to load or store values in the stack are translated
737using regular load and store instructions.
738
739\paragraph{Optimizations.} A \emph{graph compression} algorithm removes empty
740instructions generated by previous compilation passes and by the liveness
741analysis.
742
743\subsubsection{LIN}
744
745In LIN, the structure of a program is no longer based on CFGs. Every function
746is represented as a sequence of instructions.
747
748\paragraph{Syntax.}
749The instructions of LIN are very close to those of LTL. \emph{Program
750  labels}, \emph{gotos} and branch instructions handle the changes in the
751control flow. The syntax of LIN programs is shown in table \ref{LIN:syntax}.
752
753\begin{table}
754{\footnotesize
755\[
756\begin{array}{lllllll}
757\textit{size} & ::= & \textsf{Byte} \Alt \textsf{HalfWord} \Alt \textsf{Word}  & \qquad
758\textit{fun\_ref} & ::= & \textit{fun\_name} \Alt \textit{hdw\_reg}
759\end{array}
760\]
761
762\[
763\begin{array}{llll}
764\textit{instruction} & ::= & \Alt \textsf{NewFrame} &
765                        \quad \mbox{(frame creation)}\\
766                &     & \Alt \textsf{DelFrame} &
767                        \quad \mbox{(frame deletion)}\\
768                &     & \Alt \textit{hdw\_reg} := n & \quad \mbox{(constant)}\\
769                &     & \Alt \textit{hdw\_reg} := \textit{unop}(\textit{hdw\_reg})
770                        & \quad \mbox{(unary operation)}\\
771                &     & \Alt \textit{hdw\_reg} :=
772                        \textit{binop}(\textit{hdw\_reg},\textit{hdw\_reg})
773                        & \quad \mbox{(binary operation)}\\
774                &     & \Alt \textit{hdw\_reg} := \textit{fun\_name}
775                        & \quad \mbox{(address of a function)}\\
776                &     & \Alt \textit{hdw\_reg} := \textit{size}(\textit{hdw\_reg}[n])
777                        & \quad \mbox{(memory load)}\\
778                &     & \Alt \textit{size}(\textit{hdw\_reg}[n]) := \textit{hdw\_reg}
779                        & \quad \mbox{(memory store)}\\
780                &     & \Alt \textsf{call}\ \textit{fun\_ref}
781                        & \quad \mbox{(function call)}\\
782                &     & \Alt \textsf{tailcall}\ \textit{fun\_ref}
783                        & \quad \mbox{(function tail call)}\\
784                &     & \Alt \textit{uncon}(\textit{hdw\_reg}) \rightarrow
785                        \textit{node} & \quad
786                        \mbox{(branch unary condition)}\\
787                &     & \Alt \textit{bincon}(\textit{hdw\_reg},\textit{hdw\_reg})
788                        \rightarrow \textit{node} & \quad
789                        \mbox{(branch binary condition)}\\
790                &     & \Alt \textit{mips\_label:} & \quad \mbox{(MCS-51 label)}\\
791                &     & \Alt \textsf{goto}\ \textit{mips\_label} & \quad \mbox{(goto)}\\
792                &     & \Alt \textsf{return} & \quad \mbox{(return)}
793\end{array}
794\]
795
796\[
797\begin{array}{lllllll}
798\textit{fun\_def} & ::= & \textit{fun\_name}(n) & \qquad
799\textit{program} & ::= & \textsf{globals}: n\\
800             &     & \textsf{locals:} n & \qquad
801            &     & \textit{fun\_def}^*\\
802             &     & \textit{instruction}^*
803\end{array}
804\]}
805\caption{Syntax of the LIN language}\label{LIN:syntax}
806\end{table}
807
808\paragraph{Translation of LTL to LIN.}
809This translation amounts to transform in an efficient way the graph structure of
810functions into a linear structure of sequential instructions.
811
812\subsubsection{ASM}
Note: See TracBrowser for help on using the repository browser.