source: Deliverables/D1.3/Presentations/WP4-claudio.tex @ 3534

Last change on this file since 3534 was 3296, checked in by sacerdot, 8 years ago

...

File size: 15.4 KB
Line 
1\documentclass{beamer}
2
3\usetheme{Frankfurt}
4\logo{\includegraphics[height=1.0cm]{fetopen.png}}
5
6\usepackage{wasysym}
7\usepackage{amsfonts}
8\usepackage{amsmath}
9\usepackage{eurosym}
10\usepackage{amssymb}
11\usepackage[english]{babel}
12\usepackage{color}
13\usepackage[utf8x]{inputenc}
14\usepackage{listings}
15\usepackage{stmaryrd}
16\usepackage{fancyvrb}
17\usepackage{ulem}
18\usepackage{bbm}
19% \usepackage{microtype}
20
21\author{Claudio Sacerdoti Coen}
22\title{Dependent Labelling for\\
23       Pipelines and Caches}
24\date{May 16, 2013}
25
26\setlength{\parskip}{1em}
27
28\AtBeginSection[]
29{
30  \begin{frame}<beamer>
31    \frametitle{Outline}
32    \tableofcontents[currentsection]
33  \end{frame}
34}
35
36\begin{document}
37
38\begin{frame}
39\maketitle
40\end{frame}
41
42\begin{frame}
43  \frametitle{Outline}
44  \tableofcontents
45\end{frame}
46
47\section{Problem statement}
48
49\begin{frame}
50\frametitle{Problem Statement}
51
52The basic labelling approach statically assigns a constant cost to every
53labelled basic block but \\~\\
54
55\alert{the execution cost on modern hardware is parametric on
56the internal state of hardware components, which is history dependent}
57
58\end{frame}
59
60\begin{frame}
61\frametitle{Towards a solution}
62
63The dependent labelling approach statically assigns to every labelled basic
64block a cost \alert{parametric} on the state \\~\\
65
66and it lifts the cost from the object to the source code \\~\\
67
68Problems:
69\begin{enumerate}
70 \item What cost models can be lifted? How?\\
71       (the internal hardware state is not visible in the source code)
72 \item Can costs associated to pipelines/caches be lifted?
73 \item Can we reason on the source code on the lifted cost models?
74\end{enumerate}
75\end{frame}
76
77\begin{frame}
78\frametitle{Plan of the talk}
79\begin{enumerate}
80 \item We provide a generic, abstract description of modern hardware
81 \item We introduce restrictions on the cost models and a notion of view
82       as an approximation of states
83 \item We show how to instrument the source code with low level state variables
84       (for views) and update functions (over views)
85 \item We argue that simple pipelines can be analyzed in this way
86 \item We describe the difficulties with caches and hint at research directions
87       to also include them
88\end{enumerate}
89\end{frame}
90
91\section{A generic abstract hardware model}
92
93\begin{frame}
94 \frametitle{Hardware state}
95
96 \begin{block}{Hardware state}
97  Hardware state = $\Sigma \times \Delta$\\~\\
98  $\Sigma$ = data states (registers, PC, memory, $\ldots$)\\~\\
99  $\Delta$ = internal state of pipelines, caches, $\ldots$\\~\\
100 \end{block}
101
102 $\Delta$ does not affect the functional behaviour.
103\end{frame}
104
105\begin{frame}
106 \frametitle{Decoded instructions}
107 \begin{block}{Decoded instruction}
108 Decoded instruction = $\mathcal{I} \times \Gamma$ \\~\\
109
110 $\mathcal{I}$ = instructions (opcodes) \\~\\
111
112 $\Gamma$ = operands after fetching and decoding
113 \end{block}
114
115 \bigskip
116
117 \begin{block}{Fetching}
118 $fetch: \Sigma \to \mathcal{I} \times \Gamma$
119 \end{block}
120
121 Not the actual fetching that uses $\Delta$ too.
122\end{frame}
123
124\begin{frame}
125 \frametitle{Abstract semantics}
126 \begin{block}{Semantics}
127  \begin{tabular}{ll}
128  $\longrightarrow : \Sigma \to \Sigma$ & data state transition \\~\\
129  $\Longrightarrow : \Sigma \times \Delta \to \Delta$ & internal state transition \\~\\
130  $K: \mathcal{I} \times \Gamma \times \Delta \to \mathbb{N}$ & cost function
131  \end{tabular}
132 \end{block}
133
134 \begin{block}{Execution history and path}
135  Execution history: $\sigma_0 \longrightarrow \sigma_1 \longrightarrow \sigma_2 \ldots$ \\~\\
136
137  Execution path: the stream of program counters only
138 \end{block}
139
140 The execution path is what the labelling approach\\ captures.
141\end{frame}
142
143\section{Classification of cost models}
144
145\begin{frame}
146 \frametitle{Approximated and operand insensitive models}
147 
148 \begin{block}{Exact vs approximated models}
149   $K$ is \emph{exact} if the cost is real,
150   it is \emph{approximated} if it returns an upper bound.
151 \end{block}
152
153 \begin{block}{Operand insensitive models}
154  $K: \mathcal{I} \times \mbox{\alert{\sout{~$\Gamma$~}}} \times \Delta \to \mathbb{N}$
155 \end{block}
156
157 Most modern architectures are either operand insensitive or have
158 approximated operand insensitive models with modest jitter.
159 WCET tools use these models.\\~\\
160
161 Operand sensitivity is problematic for the labelling\\
162 approach (how to map $\Gamma$ to the source world?).
163\end{frame}
164
165\begin{frame}
166 \frametitle{Views and cost dependent on views only}
167
168 \begin{block}{Views}
169  $(\mathcal{V},|.|)$ where\\
170  $\mathcal{V}$ is a finite non empty set\\
171  $|.|:\Delta \to \mathcal{V}$ is forgetful
172 \end{block}
173
174 \begin{block}{Cost functions dependent on a view only}
175 $K': \mathcal{I} \times \alert{\mathcal{V}} \to \mathbb{N}$ \\~\\
176 $K(I,\delta) = K'(I,|\delta|)$
177 \end{block}
178
179 With an abuse of terminology, we will identify $K$ with $K'$.
180\end{frame}
181
182\begin{frame}
183 \frametitle{Execution history dependent views}
184
185 \begin{block}{Execution history dependent views}
186 $(\mathcal{V},|.|)$ is execution history dependent with lookahead $n$ if
187 there exists
188 $\hookrightarrow : PC^n \times \mathcal{V} \to \mathcal{V}$
189 such that for all $(\sigma,\delta)$ and $pc_1,\ldots,pc_n$ such that every
190$pc_i$ is the program counter of $\sigma_i$ defined by $\sigma \longrightarrow^i \sigma_i$, we have $(\sigma,\delta) \Longrightarrow \delta'$ iff
191$((pc_1,\ldots,pc_n),|\delta|) \hookrightarrow |\delta'|$.
192 \end{block}
193
194 Notes:
195 \begin{enumerate}
196  \item The evolution of the views can be predicted from the execution paths
197        only, that are the ones we track in the labelling approach
198  \item The lookahead is \alert{in the future}
199        (e.g. for pipelines or for determining the outcome of a conditional
200         branch)
201  \item Can be lifted compositionally to
202        $EP \times \mathcal{V} \to \mathcal{V}$\\
203        (compositional = associativity of lifting)
204 \end{enumerate}
205\end{frame}
206
207\begin{frame}
208 \frametitle{Data independent cost models}
209 \begin{block}{Data independent cost models}
210  $K$ is \emph{data independent} if it is dependent on a view that is
211  execution path dependent.
212 \end{block}
213\end{frame}
214
215\section{A data independent cost function for simple pipelines}
216
217\begin{frame}
218 \frametitle{A simple pipeline model}
219
220 Assumptions:
221
222 \begin{itemize}
223  \item Pipeline with $n$ stages without branch prediction and hazards
224  \item The actual value of the operands has no influence on stalls nor on
225        the execution cost. The type of operands, however, can.
226 \end{itemize}
227
228 For example, reading the value 4 from a register may stall a pipeline
229\end{frame}
230
231\begin{frame}
232 \frametitle{States and a view on them}
233
234 \begin{block}{Pipeline states}
235   $\Delta = (\mathcal{I} \times \Gamma \cup \mathbbm{1})^n$\\
236   (an $n$-uple of decoded instructions or bubbles)
237 \end{block}
238
239 \vspace{-0.5cm}Not the real state.\\~\\
240
241 \begin{block}{A data-independent view}
242  $(\{0,1\}^n,|.|)$ where\\
243  $|(x_1,\ldots,x_n)| = (y_1,\ldots,y_n)$ and
244  $y_i$ is 1 iff $x_i$ is a bubble.
245 \end{block}
246
247 \vspace{-0.5cm} A view is representable by a single integer.
248
249 \begin{block}{Cost model and update function}
250 $K: PC \times \{0,1\}^n \to \mathbb{N}$\\
251 $\hookrightarrow : PC^n \times \{0,1\}^n \to \{0,1\}^n$
252 \end{block}
253
254 \vspace{-0.5cm}$n$ look-aheads are required because of prefetching
255\end{frame}
256
257\section{Dependent labelling for data independent cost models}
258
259\begin{frame}
260 \frametitle{Assumptions}
261
262 \begin{enumerate}
263  \item The cost model is data independent. It is defined by
264        $(\hookrightarrow,K)$ over the view $(\mathcal{V},|.|)$
265  \item Every function call in $C$ is immediately followed by a
266        label
267 \end{enumerate}
268
269 Notes:
270 \begin{itemize}
271  \item Condition 2 can easily be enforced
272  \item It puts more burden on the management of cost annotations and
273        requires proofs that every call is terminating
274  \item The labelling approach under 2 only requires associativity of
275        cost composition; without 2 it also requires commutativity
276        (which fails when the state is history dependent)
277  \item Structured traces still necessary (can be simplified)
278 \end{itemize}
279\end{frame}
280
281\begin{frame}
282 \frametitle{Static analysis}
283 We statically compute:
284 \begin{block}{Transition function over labels}
285   $\hookrightarrow : \mathcal{L} \times \mathcal{L} \times \mathcal{V} \to \mathcal{V}$\\
286   $(L,L',v) \hookrightarrow v'$ iff $((pc_0,\ldots,pc_n,pc'_0,\ldots,pc'_m),v) \hookrightarrow v'$ where $(pc_0,\ldots,pc_n)$ are the program counters of the block labelled by $L$ and $(pc'_0,\ldots,pc'_m)$ are those of the block labelled
287with $L'$.\\
288  \end{block}
289   \vspace{-0.5cm}
290   \alert{The lookahead is lifted from PCs to the next label}\\
291   (Skipping some problems/solutions here)
292  \begin{block}{Cost function over labels}
293        $K: \mathcal{V} \times \mathcal{L} \to \mathbb{N}$\\
294        $K(v_0,L) = K(pc_0,v_0) + K(pc_1,v_1) + \ldots + K(pc_n,v_n)$\\
295        where for every $i < n$,
296        $((pc_i,\ldots,pc_{i+l}),v_i) \hookrightarrow v_{k+1}$\\
297        where $l$ is
298        the lookahead and $pc_0,\ldots,pc_n$ the PCs of the instructions in
299        the basic block
300  \end{block}
301\end{frame}
302
303\begin{frame}
304 \frametitle{Static analysis and the preservation of the cost model}
305 The static analysis should be performed \alert{for every initial view}. \\
306 E.g. for pipelines, it requires $2^n$ code traversals.\\~\\
307
308 The cost of the static analysis remains linear in the program size.\\~\\
309
310 Both the cost and the update functions are
311 \alert{compositionally lifted to work over labelled traces}
312 (requires proof of associativity).\\~\\
313
314 \alert{The proof of correctness of the static analysis is preserved.}\\~\\
315
316 \alert{The proof of preservation of labelled traces is preserved.}\\~\\
317
318 Corollary: the cost computed for a labelled trace on the\\
319 labelled source code is the real cost of the object code.
320\end{frame}
321
322\begin{frame}
323 \frametitle{Code instrumentation}
324 Problem:
325 \vspace{-0,25cm}
326 \begin{center}\alert{the current view is not visible in the source code,
327  but is\\ necessary to express costs and infer cost invariants}\end{center}
328
329 \vspace{-0,25cm}
330 Solution:
331  \begin{itemize}
332   \item The current view is exposed in the code via a global variable
333         \texttt{\_\_view}
334   \item The $K$ function is exposed in the code and used to update the
335         global \texttt{\_\_cost} variable
336   \item A global variable \texttt{\_\_label} holds the last visited label
337   \item The $\hookrightarrow$ transition function is exposed in the source\\
338         code and used with \texttt\{\_\_label\} to update the
339         \texttt{\_\_view}
340  \end{itemize}
341\end{frame}
342
343\begin{frame}[fragile]
344\begin{tabular}{p{3cm}p{5cm}}
345\begin{tiny}
346\begin{verbatim}
347
348
349
350
351
352
353
354
355int fact(int n)
356{
357  int i;
358  int res;
359
360  res = 1;
361  for (i = 1; i <= n; i = i + 1) {
362
363    res = res * i;
364  }
365
366  return res;
367}
368
369int main(void)
370{
371  int t;
372
373  t = fact(10);
374
375  return t;
376}
377
378\end{verbatim}
379\end{tiny}
380&
381\end{tabular}
382\end{frame}
383
384\begin{frame}[fragile]
385\begin{tabular}{p{3cm}p{5cm}}
386\begin{tiny}
387\begin{verbatim}
388
389
390
391
392
393
394
395
396int fact(int n)
397{
398  int i;
399  int res;
400
401  res = 1;
402  for (i = 1; i <= n; i = i + 1) {
403
404    res = res * i;
405  }
406
407  return res;
408}
409
410int main(void)
411{
412  int t;
413
414  t = fact(10);
415
416  return t;
417}
418
419\end{verbatim}
420\end{tiny}
421&
422\begin{tiny}
423\begin{verbatim}
424int __next(int label1, int label2, int view) {
425       if (label1 == 0)                             return 0;
426  else if (label1 == 0 && label2 == 1)              return 1;
427  else if (label1 == 1 && label2 == 2)              return 2;
428  else if (label1 == 2 && label2 == 2 && view == 2) return 3;
429  else if (label1 == 2 && label2 == 2 && view == 3) return 2;
430  else if (label1 == 2 && label2 == 3 && view == 2) return 1;
431  else if (label1 == 2 && label2 == 3 && view == 3) return 0;
432  else if (label1 == 3 && label2 == 4 && view == 0) return 0;
433  else if (label1 == 3 && label2 == 4 && view == 1) return 0;
434}
435
436
437
438
439
440
441int __K(int view, int label) {
442       if (view == 0 && label == 0) return 3;
443  else if (view == 1 && label == 1) return 14;
444  else if (view == 2 && label == 2) return 35;
445  else if (view == 3 && label == 2) return 26;
446  else if (view == 0 && label == 3) return 6;
447  else if (view == 1 && label == 3) return 8;
448  else if (view == 0 && label == 4) return 6;
449}
450
451\end{verbatim}
452\end{tiny}
453\end{tabular}
454\end{frame}
455
456
457\begin{frame}[fragile]
458\begin{tabular}{p{3cm}p{5cm}}
459\begin{tiny}
460\begin{verbatim}
461int __cost = 8;
462int __label = 0;
463int __view;
464
465void __cost_incr(int incr) {
466  __cost = __cost + incr;
467}
468
469int fact(int n)
470{
471  int i;
472  int res;
473  __view = __next(__label,1,__view); __cost_incr(_K(__view,1)); __label = 1;
474  res = 1;
475  for (i = 1; i <= n; i = i + 1) {
476     __view = __next(__label,2,__view); __cost_incr(__K(__view,2)); __label = 2;
477    res = res * i;
478  }
479  __view = __next(__label,3,__view); __cost_incr(K(__view,3)); __label = 3;
480  return res;
481}
482
483int main(void)
484{
485  int t;
486  __view = __next(__label,0,__view); __cost_incr(__K(__view,0)); __label = 0;
487  t = fact(10);
488  __view = __next(__label,4,__view); __cost_incr(__K(__view,4)); __label = 4;
489  return t;
490}
491
492\end{verbatim}
493\end{tiny}
494&
495\end{tabular}
496\end{frame}
497
498\begin{frame}
499 \frametitle{Open Questions and problems}
500 \begin{enumerate}
501  \item How does the invariant generators work on the instrumentation?
502  \item Are automatic provers still performant enough?
503  \item Recover all techniques used in WCET for graceful degradation
504        (e.g. full unrolling, de-parameterization of blocks with log jitter,
505         etc.)
506  \item How to reduce the size of the cost and update functions
507        (e.g. by transfering source code control flow analysis to the object
508        code to determine the reachable views or by performing abstract
509        interpretation over the object code)
510 \end{enumerate}
511
512 Requires full implementation and testing.
513\end{frame}
514
515\section{The problem with caches}
516
517\begin{frame}
518 \frametitle{Dealing with caches}
519 Major problem:
520 \begin{center}
521  \alert{Caches do not have a data independent cost model}
522 \end{center}
523
524 PROARTIS:
525 \begin{center}
526  \alert{Hardware producers should provide probabilist caches that\\
527         admit a data independent \emph{probabilist} cost model}
528 \end{center}
529
530 Basic idea: the placement and replacement algorithms are based on a
531 uniform distribution. Lower performances in the average case (but not
532 much worse), higher predictability.
533\end{frame}
534
535\begin{frame}
536 \frametitle{Static Probabilistic Time Analysis}
537
538 Basic idea (from PROARTIS and alts.):
539 \begin{enumerate}
540  \item Integer costs are replaced by integral random variables
541  \item Addition is replaced by convolution (which is associative)
542  \item Static probabilistic cache analysis for basic blocks is explained in
543        PROARTIS papers\\
544        (but some problems are not spotted and solved\ldots)
545 \end{enumerate}
546
547 \alert{No changes are necessary to the (dependent) labelling approach!}
548\end{frame}
549
550\begin{frame}
551 \frametitle{Open Questions and problems}
552 \begin{enumerate}
553  \item How can we reason symbolically on convolutions?\\
554        (computing them takes exponential time/space; how does PROARTIS
555        planned to solve the issue?)
556  \item The cardinality of the set of cache states is too large:\\
557        how to induce views dinamically?\\
558        (e.g. by transfering source code control flow analysis to the object
559        code to determine the reachable views or by performing abstract
560        interpretation over the object code)
561  \item Is it possible to handle a larger set of cost models to do
562        non probabilistic case anlysis with the labelling approach?\\
563        (same techniques as above)
564 \end{enumerate}
565
566 Requires further theoretical investigations before\\
567 implementation and testing.
568\end{frame}
569
570\end{document}
Note: See TracBrowser for help on using the repository browser.