Changeset 819


Ignore:
Timestamp:
May 19, 2011, 4:45:58 PM (8 years ago)
Author:
mulligan
Message:

Final changes. Under 8 pages.

Location:
Deliverables/D4.1/ITP-Paper
Files:
2 added
2 edited

Legend:

Unmodified
Added
Removed
  • Deliverables/D4.1/ITP-Paper/itp-2011.bib

    r817 r819  
    5858  author = {Jun Yan and Wei Zhang},
    5959  title = {{WCET} Analysis for Multi-Core Processors with Shared {L2} Instruction Caches},
    60   booktitle = {Proceedings of the $\mathrm{14^{th}}$ {IEEE} Symposium on Real-time and Embedded Technology and Applications},
     60  booktitle = {$\mathrm{14^{th}}$ {IEEE} Symposium on Real-time and Embedded Technology and Applications},
    6161  pages = {80--89},
    6262  year = {2008}
     
    6767  author = {Robert Atkey},
    6868  title = {{CoqJVM}: An executable specification of the {Java Virtual Machine} using dependent types},
    69   booktitle = {Proceedings of the Conference of the {TYPES} Project},
     69  booktitle = {Conference of the {TYPES} Project},
    7070  pages = {18--32},
    7171  year = {2007}
     
    7676  title = {Designing a {CPU} model: from a pseudo-formal document to fast code},
    7777  author = {Fr\'ed\'eric Blanqui and Claude Helmstetter and Vania Joloboff and Jean-Fran\c{c}ois Monin and Xiaomu Shi},
    78   booktitle = {Proceedings of the $\mathrm{3^{rd}}$ Workshop on Rapid Simulation and Performance Evaluation: Methods and Tools},
     78  booktitle = {$\mathrm{3^{rd}}$ Workshop on Rapid Simulation and Performance Evaluation: Methods and Tools},
    7979  year = {2010}
    8080}
     
    8484  author = {Sandrine Blazy and Zaynah Dargaye and Xavier Leroy},
    8585  title = {Formal Verification of a {C} Compiler Front-End},
    86   booktitle = {Proceedings of the International Symposium on Formal Methods},
     86  booktitle = {International Symposium on Formal Methods},
    8787  pages = {460--475},
    8888  year = {2006}
     
    9393  author = {Adam Chlipala},
    9494  title = {A verified compiler for an impure functional language},
    95   booktitle = {Proceedings of the $\mathrm{37^{th}}$ Annual Symposium on Principles of Programming Languages},
     95  booktitle = {$\mathrm{37^{th}}$ Annual Symposium on Principles of Programming Languages},
    9696  pages = {93--106},
    9797  year = {2010}
     
    102102  author = {Anthony Fox and Magnus O. Myreen},
    103103  title = {A Trustworthy Monadic Formalization of the {ARMv7} Instruction Set Architecture},
    104   booktitle = {Proceedings of the $\mathrm{1^{st}}$ International Conference on Interactive Theorem Proving},
     104  booktitle = {$\mathrm{1^{st}}$ International Conference on Interactive Theorem Proving},
    105105  pages = {243--258},
    106106  year = {2010}
     
    119119  author = {Daan Leijen and Erik Meijer},
    120120  title = {Domain specific embedded compilers},
    121   booktitle = {Proceedings of the $\mathrm{2^{nd}}$ Conference on Domain Specific Languages},
     121  booktitle = {$\mathrm{2^{nd}}$ Conference on Domain Specific Languages},
    122122  pages = {109--122},
    123123  year = {1999}
     
    128128  author = {Susmit Sarkar and Peter Sewell and Francesco Zappa Nardelli and Scott Owens and Tom Ridge and Thomas Braibant and Magnus O. Myreen and Jade Alglave},
    129129  title = {The semantics of {x86-CC} multiprocessor machine code},
    130   booktitle = {Proceedings of the $\mathrm{36^{th}}$ Annual Symposium on Principles of Programming Languages},
     130  booktitle = {$\mathrm{36^{th}}$ Annual Symposium on Principles of Programming Languages},
    131131  pages = {379--391},
    132132  year = {2009}
     
    146146  author = {Matthieu Sozeau},
    147147  title = {Subset coercions in {Coq}},
    148   booktitle = {Proceedings of the Conference of the {TYPES} Project},
     148  booktitle = {Conference of the {TYPES} Project},
    149149  pages = {237--252},
    150150  year = {2006}
     
    155155  author = {Hongwei Xi and Chiyan Chen and Gang Chen},
    156156  title = {Guarded recursive datatype constructors},
    157   booktitle = {Proceedings of the $\mathrm{30^{th}}$ Annual Symposium on Principles of Programming Languages},
     157  booktitle = {$\mathrm{30^{th}}$ Annual Symposium on Principles of Programming Languages},
    158158  pages = {224--235},
    159159  year = {2003}
     
    169169
    170170@misc
    171 { cerco-code:2011,
    172   title = {{CerCo Deliverable D4.1}: Matita and O'Caml code},
    173   howpublished = {\url{http://cerco.cs.unibo.it/raw-attachment/wiki/WikiStart/D4_1_Code.tar.gz}},
     171{ cerco-report-code:2011,
     172  title = {{CerCo Deliverable D4.1}: executable formal semantics of machine code},
     173  howpublished = {\url{http://cerco.cs.unibo.it/raw-attachment/wiki/WikiStart/D4_1.pdf} and \url{http://cerco.cs.unibo.it/raw-attachment/wiki/WikiStart/D4_1_Code.tar.gz}},
    174174  year = {2011},
    175   key = {{CerCo-code}}
    176 }
    177 
    178 @misc
    179 { cerco-report:2011,
    180   title = {{CerCo Deliverable D4.1}:: executable formal semantics of machine code},
    181   howpublished = {\url{http://cerco.cs.unibo.it/raw-attachment/wiki/WikiStart/D4_1.pdf}},
    182   year = {2011},
    183   key = {{CerCo-report}}
     175  key = {{CerCo-report-code}}
    184176}
    185177
     
    210202@misc
    211203{ sdcc:2010,
    212   title = {Small Device {C} Compiler {(SDCC)} 3.0.0},
     204  title = {Small Device {C} Compiler 3.0.0},
    213205  howpublished = {\url{http://sdcc.sourceforge.net/}},
    214206  year = {2010},
  • Deliverables/D4.1/ITP-Paper/itp-2011.tex

    r817 r819  
    1 \documentclass[10pt, final, conference]{IEEEtran}
     1\documentclass[10pt, final, conference, letter]{IEEEtran}
    22
    33\usepackage{amsfonts}
     
    6969number: 243881}
    7070
    71 \bibliographystyle{plain}
     71\bibliographystyle{IEEEtran}
    7272
    7373\begin{document}
     
    8282However, the Matita emulator is intended to be used as a target for a certified, complexity preserving C compiler, as part of the EU-funded CerCo project.
    8383As a result, not all features of the MCS-51 are formalised in the Matita emulator.
    84 
    85 %The formalisation proceeded in two stages, first implementing an O'Caml prototype, for quickly `ironing out' bugs, and then porting the O'Caml emulator to Matita.
    86 %Though mostly straight-forward, this porting presented multiple problems.
    87 %Of particular interest is how the unorthoganality of the MSC-51's instruction set is handled.
    88 %In O'Caml, this was handled with polymorphic variants.
    89 %In Matita, we achieved the same effect with a non-standard use of dependent types.
    90 
    9184Both the O'Caml and Matita emulators are `executable'.
    9285Assembly programs may be animated within Matita, producing a trace of instructions executed.
     
    107100The majority of programs are written in high level languages and then compiled into low level ones.
    108101Specifications are therefore also given at a high level and correctness can be proved by reasoning on the program's source code.
    109 The code that is actually run, however, is not the high level source code that we reason on, but low level code generated by the compiler.
     102The code that is actually run is not the high level source code that we reason on, but low level code generated by the compiler.
    110103A few questions now arise:
    111104\begin{itemize*}
     
    117110To what extent can you trust your compiler in preserving those properties?
    118111\end{itemize*}
    119 These questions, and others like them, motivate a current `hot topic' in computer science research: \emph{compiler verification} (for instance~\cite{chlipala:verified:2010,leroy:formal:2009}, and many others).
     112These and other questions motivate a current `hot topic' in computer science research: \emph{compiler verification} (e.g.~\cite{chlipala:verified:2010,leroy:formal:2009}, and so on).
    120113So far, the field has only been focused on the first and last questions.
    121114Much attention has been placed on verifying compiler correctness with respect to extensional properties of programs.
     
    139132A user can certify that `deadlines' are met whilst wringing as many clock cycles from the processor---using a cost model that does not over-estimate---as possible.
    140133
    141 We also see our approach as being relevant to compiler verification (and construction).
    142 \emph{An optimisation specified only extensionally is only half specified}.
    143 Though the optimisation may preserve the denotational semantics of a program, there is no guarantee that intensional properties of the program improve.
     134Our approach is also relevant to compiler verification and construction.
     135\emph{An optimisation specified only extensionally is only half specified}; the optimisation may preserve the denotational semantics of a program, but there is no guarantee that intensional properties of the program improve.
    144136
    145137Another potential application is toward completeness and correctness of the compilation process in the presence of space constraints.
    146138A compiler could reject a source program targetting an embedded system when the size of the compiled code exceeds the available ROM size.
    147139Preservation of a program's semantics may only be required for those programs that do not exhaust the stack or heap.
    148 The statement of completeness of the compiler must therefore take into account a realistic cost model.
     140The statement of completeness of the compiler must take into account a realistic cost model.
    149141
    150142With the CerCo methodology, we assume we can assign to object code exact and realistic costs for sequential blocks of instructions.
     
    270262Bit memory, followed by a small amount of stack space, resides in the memory space immediately following the register banks.
    271263What remains of IRAM may be treated as general purpose memory.
    272 A schematic view of IRAM layout is also provided in Figure~\ref{fig.memory.layout}.
     264A schematic view of IRAM is also provided in Figure~\ref{fig.memory.layout}.
    273265
    274266External RAM (XRAM), limited to a maximum size of 64 kilobytes, is optional, and may be provided on or off chip, depending on the vendor.
     
    284276Two 8-bit general purpose accumulators, A and B, are provided.
    285277
    286 Communication with the device is handled by an inbuilt UART serial port and controller.
     278Communication with the device is handled by a UART serial port and controller.
    287279This can operate in numerous modes.
    288280Serial baud rate is determined by one of two 16-bit timers included with the 8051, which can be set to multiple modes of operation.
     
    335327
    336328We box Matita code to distinguish it from O'Caml code.
    337 In Matita, `\texttt{?}' or `\texttt{$\ldots$}' denote arguments to be inferred automatically.
    338 
    339 A full account of the formalisation can be found in~\cite{cerco-report:2011}.
    340 All source code is available from the CerCo project website~\cite{cerco-code:2011}.
     329In Matita `\texttt{$?$}' and  `\texttt{$\ldots$}' are arguments to be inferred automatically.
     330
     331A full account of the formalisation can be found in~\cite{cerco-report-code:2011}.
     332All source code is available from the CerCo project website~\cite{cerco-report-code:2011}.
    341333
    342334%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     
    385377| VCons: $\forall$n: nat. A $\rightarrow$ Vector A n $\rightarrow$ Vector A (S n).
    386378\end{lstlisting}
    387 We define \texttt{BitVector} as a specialization of \texttt{Vector} to \texttt{bool}.
     379\texttt{BitVector} is a specialization of \texttt{Vector} to \texttt{bool}.
    388380We may use Matita's type system to provide precise typings for functions that are polymorphic in the size without code duplication:
    389381\begin{lstlisting}[frame=single]
    390382let rec split (A: Type[0]) (m,n: nat) on m:
    391    Vector A (m + n) $\rightarrow$ (Vector A m)$\times$(Vector A n) := ...
     383  Vector A (m+n) $\rightarrow$ (Vector A m)$\times$(Vector A n) := ...
    392384\end{lstlisting}
    393385
     
    406398Paths are represented by bitvectors (already used in the implementation for addresses and registers) of length $h$:
    407399\begin{lstlisting}[frame=single]
    408 inductive BitVectorTrie (A: Type[0]): nat $\rightarrow$ Type[0] ≝
     400inductive BitVectorTrie(A: Type[0]):nat $\rightarrow$ Type[0] :=
    409401  Leaf: A $\rightarrow$ BitVectorTrie A 0
    410402| Node: ∀n. BitVectorTrie A n $\rightarrow$ BitVectorTrie A n $\rightarrow$
     
    416408Performing a lookup in memory is now straight-forward.
    417409The only subtlety over normal trie lookup is how we handle \texttt{Stub}.
    418 We traverse a path, and upon encountering \texttt{Stub}, we return a default value\footnote{All manufacturer data sheets that we consulted were silent on the subject of what should be returned if we attempt to access uninitialized memory.  We defaulted to simply returning zero, though our \texttt{lookup} function is parametric in this choice.  We do not believe that this is an outrageous decision, as SDCC for instance generates code which first `zeroes out' all memory in a preamble before executing the program proper.  This is in line with the C standard, which guarantees that all global variables will be zero initialized piecewise.}.
    419 
    420 \texttt{BitVectorTrie} and \texttt{Vector} datastructures, and related functions, can be used in the formalising other microprocessors.
     410We traverse a path, and upon encountering \texttt{Stub}, we return a default value.
     411All manufacturer data sheets that we consulted were silent on the subject of what should be returned if we attempt to access uninitialized memory.
     412We defaulted to simply returning zero, though our \texttt{lookup} function is parametric in this choice.
     413We believe that this is reasonable, as SDCC for instance generates code which first `zeroes' memory in a preamble before executing the program proper.
     414This is in line with the C standard, which guarantees that all global variables will be zero initialized piecewise.
     415
     416\texttt{BitVectorTrie} and \texttt{Vector}, and related functions, can be used in the formalisation of other microprocessors.
    421417
    422418%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     
    456452\label{subsect.anatomy.matita.emulator}
    457453
    458 The internal state of the Matita emulator is represented as a record:
    459 \begin{lstlisting}[frame=single]
    460 record Status: Type[0] :=
    461 {
     454The Matita emulator's internal state is a record:
     455\begin{lstlisting}[frame=single]
     456record Status: Type[0] := {
    462457  code_memory: BitVectorTrie Byte 16;
    463458  low_internal_ram: BitVectorTrie Byte 7;
     
    466461  program_counter: Word;
    467462  special_function_registers_8051: Vector Byte 19;
    468   ...
    469 }.
     463  ... }.
    470464\end{lstlisting}
    471465This record encapsulates the current memory contents, the program counter, the state of the current SFRs, and so on.
     
    510504A callback function was also accepted as an argument, which `witnessed' the execution as it happened.
    511505Due to Matita's termination requirement, \texttt{execute} cannot execute a program indefinitely.
    512 An alternative approach would be to produce an infinite stream of statuses representing an execution trace.
    513 Matita supports infinite streams through co-inductive types.
     506An alternative would be to produce an infinite stream of statuses representing an execution trace using Matita's co-inductive types.
    514507
    515508%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     
    519512\label{subsect.instruction.set.unorthogonality}
    520513
    521 A peculiarity of the MCS-51 is its unorthogonal instruction set.
    522 For instance, the \texttt{MOV} instruction can be invoked using one of 16 combinations of addressing modes out of a possible 361.
     514A peculiarity of the MCS-51 is its unorthogonal instruction set; \texttt{MOV} can be invoked using one of 16 combinations of addressing modes out of a total of 361, for instance.
    523515
    524516% Show example of pattern matching with polymorphic variants
     
    551543  [ `U1 of 'a | ... | `U6 of 'f ]
    552544\end{lstlisting}
    553 For our purposes, the types \texttt{union2}, \texttt{union3} and \texttt{union6} sufficed.
     545The types \texttt{union2}, \texttt{union3} and \texttt{union6} sufficed.
    554546
    555547This polymorphic variant machinery worked well: it introduced a certain level of type safety (for instance, the type of \texttt{MOV} above guarantees it cannot be invoked with arguments in the \texttt{carry} and \texttt{data16} addressing modes, respectively), and also allowed us to pattern match against instructions, when necessary.
    556548However, this polymorphic variant machinery is \emph{not} present in Matita.
    557549We needed some way to produce the same effect, which Matita supported.
    558 For this task, we used dependent types.
    559 
    560 We first provided an inductive data type representing all possible addressing modes, a type that functions will pattern match against:
     550We used dependent types.
     551
     552We provided an inductive data type representing all addressing modes, a type that functions will pattern match against:
    561553\begin{lstlisting}[frame=single]
    562554inductive addressing_mode: Type[0] :=
     
    576568The \texttt{is\_a} function checks if an \texttt{addressing\_mode} matches an \texttt{addressing\_mode\_tag}:
    577569\begin{lstlisting}[frame=single]
    578 let rec is_a
    579   (d: addressing_mode_tag)
    580   (A: addressing_mode) on d :=
     570definition is_a :=
     571  $\lambda$d: addressing_mode_tag. $\lambda$A: addressing_mode.
    581572match d with
    582573[ direct $\Rightarrow$
     
    586577...
    587578\end{lstlisting}
    588 The \texttt{is\_in} function checks if an \texttt{addressing\_mode} matches a set of tags represented as a vector. It simply extends the \texttt{is\_a} function in the obvious manner.
    589 
    590 A \texttt{subaddressing\_mode} is an \emph{ad hoc} non-empty $\Sigma$-type of \texttt{addressing\_mode}s constrained to be in a set of tags:
     579The \texttt{is\_in} function checks if an \texttt{addressing\_mode} matches a set of tags represented as a vector.
     580It simply extends the \texttt{is\_a} function in the obvious manner.
     581
     582A \texttt{subaddressing\_mode} is an \emph{ad hoc} non-empty $\Sigma$-type of \texttt{addressing\_mode}s in a set of tags:
    591583\begin{lstlisting}[frame=single]
    592584record subaddressing_mode
     
    596588  subaddressing_modeel :> addressing_mode;
    597589  subaddressing_modein:
    598     bool_to_Prop (is_in ? l subaddressing_modeel)
     590    bool_to_Prop (is_in $\ldots$ l subaddressing_modeel)
    599591}.
    600592\end{lstlisting}
     
    612604% One of these coercions opens up a proof obligation which needs discussing
    613605% Have lemmas proving that if an element is a member of a sub, then it is a member of a superlist, and so on
    614 The final component is a pair of type coercions from \texttt{addressing\_mode} to \texttt{subaddressing\_mode} and from \texttt{subaddressing\_mode} to \texttt{Type$\lbrack0\rbrack$}, respectively.
    615 The first is a forgetful coercion, while the second opens a proof obligation wherein we must prove that the provided value is in the admissible set.
    616 These coercions were first introduced by PVS to implement subset types~\cite{shankar:principles:1999}, and later in Coq as part of Russell~\cite{sozeau:subset:2006}.
    617 In Matita all coercions can open proof obligations.
     606Finally, type coercions from \texttt{addressing\_mode} to \texttt{subaddressing\_mode} and from \texttt{Vector addressing\_mode\_tag} to \texttt{Type$\lbrack0\rbrack$}, are required.
     607The first opens a proof obligation wherein we must prove that the provided value is in the admissible set, simulating PVS subset types~\cite{shankar:principles:1999}.
     608%PVS introduced subset types, and these later featured in Coq as part of Russell~\cite{sozeau:subset:2006}.
     609%All coercions in Matita can open proof obligations.
    618610
    619611Proof obligations require us to state and prove a few auxilliary lemmas related to the transitivity of subtyping.
     
    643635
    644636We tried other dependently and non-dependently typed solutions before settling on this approach.
    645 As we need a large number of different combinations of addressing modes to describe the whole instruction set, it is infeasible to declare a datatype for each one of these combinations.
     637As we need a large number of different combinations of addressing modes to describe the instruction set, it is infeasible to declare a datatype for each one of these combinations.
    646638The current solution is closest to the corresponding O'Caml code, to the point that the translation from O'Caml to Matita is almost syntactical.
    647 We would like to investigate the possibility of changing the code extraction procedure of Matita so that it recognises this programming pattern and outputs O'Caml code using polymorphic variants.
     639% We would like to investigate the possibility of changing the code extraction procedure of Matita so that it recognises this programming pattern and outputs O'Caml code using polymorphic variants.
    648640
    649641% Talk about extraction to O'Caml code, which hopefully will allow us to extract back to using polymorphic variants, or when extracting vectors we could extract using phantom types
     
    664656This field is only logical, since it does not represent any quantity stored in the physical processor, and is used to keep track of the current `processor time'.
    665657Before every execution step, \texttt{clock} is incremented by the number of processor cycles that the instruction just fetched will take to execute.
    666 The emulator then executes the instruction, followed by the code implementing the timers and I/O\footnote{Though it isn't fully specified by the manufacturer's data sheets if I/O is handled at the beginning or the end of each cycle.}.
    667 In order to model I/O, we also store in \texttt{status} a \emph{continuation} which is a description of the behaviour of the environment:
     658The emulator executes the instruction then the code implementing timers and I/O (it isn't specified by the data sheets if I/O is handled at the beginning or the end of each cycle.)
     659To model I/O, we store in \texttt{status} a \emph{continuation} which is a description of the behaviour of the environment:
    668660\begin{lstlisting}
    669661type line =
     
    671663| `SerialBuff of
    672664   [ `Eight of byte
    673    | `Nine of BitVectors.bit * byte ]
    674 ]
     665   | `Nine of BitVectors.bit * byte ]  ]
    675666type continuation =
    676667[`In of time * line *
     
    678669[`Out of (time -> line -> time * continuation)]
    679670\end{lstlisting}
    680 At each moment, the second projection of the continuation $k$ describes how the environment will react to an output event performed in the future by the processor.
     671The second projection of the continuation $k$ describes how the environment will react to an output event performed in the future by the processor.
    681672Suppose $\pi_2(k)(\tau,o) = \langle \tau',k' \rangle$.
    682673If the emulator at time $\tau$ starts an asynchronous output $o$ either on the P1 or P3 output lines, or on the UART, then the environment will receive the output at time $\tau'$.
     
    690681We leave the computation of the delay time to the environment.
    691682
    692 We use only the P1 and P3 lines despite the MCS-51 having 4 output lines, P0--P3.
     683We use only the P1 and P3 lines despite the MCS-51 having~4 output lines, P0--P3.
    693684This is because P0 and P2 become inoperable if XRAM is present (we assume it is).
    694685
     
    706697As mentioned in Subsection~\ref{subsect.labels.pseudoinstructions} we introduced a notion of \emph{cost label}.
    707698Cost labels are inserted by the prototype C compiler at specific locations in the object code.
    708 Roughly, for those familiar with control flow graphs, they are inserted at the start of every basic block.
    709 
    710 Cost labels are used to calculate a precise costing for a program by marking the location of basic blocks.
     699
     700Cost labels are used to calculate a precise costing for a program by marking the start of basic blocks.
    711701During the assembly phase, where labels and pseudoinstructions are eliminated, a map is generated associating cost labels with memory locations.
    712702This map is later used in a separate analysis which computes the cost of a program by traversing through a program, fetching one instruction at a time, and computing the cost of blocks.
     
    742732%\end{figure}
    743733
    744 We spent considerable effort attempting to ensure that what we have formalised is an accurate model of the MCS-51 microprocessor.
     734We attempted to ensure that what we have formalised is an accurate model of the MCS-51 microprocessor.
    745735
    746736We made use of multiple data sheets, each from a different manufacturer.
     
    757747
    758748We further used MCU 8051 IDE as a reference, which allows a user to step through an assembly program one instruction at a time.
    759 Using these execution traces, we were able to step through a compiled program in MCU 8051 IDE and compare the resulting execution trace with the trace produced by our emulator.
     749With these execution traces, we could step through a compiled program in MCU 8051 IDE and compare the resulting execution trace with the trace produced by our emulator.
    760750
    761751We partially validated the assembler by proving in Matita that on all defined opcodes the \texttt{assembly\_1} and \texttt{fetch} functions are inverse.
    762752
    763 The Matita formalisation was largely copied from the O'Caml source code, apart from the changes already mentioned.
    764 However, as the Matita emulator is executable, we could perform further validation by comparing the trace of a program's execution in the Matita emulator with the trace of the same program in the O'Caml emulator.
     753The Matita formalisation was largely copied from the O'Caml source code apart from the changes already mentioned.
     754As the Matita emulator is executable we could perform further validation by comparing the trace of a program's execution in the Matita and O'Caml emulators.
    765755
    766756%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
     
    777767Work closely related to our own can be found in~\cite{fox:trustworthy:2010}.
    778768Here, the authors describe the formalisation, in HOL4, of the ARMv7 instruction set architecture.
    779 They further point to an excellent list of references to related work in the literature for the interested reader.
     769They further point to an excellent list of references to related work in the literature.
    780770This formalisation also considers the machine code level, opposed to their formalisation, which only considering an abstract assembly language.
    781 In particular, instruction decoding is explicitly modeled inside HOL4's logic.
     771Instruction decoding is explicitly modeled inside HOL4's logic.
    782772We go further in also providing an assembly language, complete with assembler, to translate instructions and pseudoinstruction into machine code.
    783773
     
    799789The CLI Stack consists of the design and verification of a whole chain of artifacts including a 32-bit microprocessor, the Piton assembler and two compilers for high-level languages.
    800790Like CerCo, the CLI Stack compilers gave the cost of high-level instructions in processor cycles.
    801 However, unlike CerCo, both the CLI Stack high-level languages ($\mu$Gypsy and Nqthm Pure Lisp) and FM9001 microprocessor were not commercial products, but `artificial' designs created for the purpose of verification (see~\cite{moore:grand:2005}).
     791However, unlike CerCo, both the CLI Stack high-level languages ($\mu$Gypsy and Nqthm Lisp) and FM9001 microprocessor were not commercial products, but designs created for the purpose of verification (see~\cite{moore:grand:2005}).
    802792
    803793The CompCert C compiler is extracted to O'Caml using Coq's code extraction facility.
     
    824814
    825815Finally~\cite{sarkar:semantics:2009} provides an executable semantics for x86-CC multiprocessor machine code.
    826 This machine code exhibits a high degree of non-uniformity similar to the MCS-51.
    827 However, only a small subset of the instruction set is considered, and they over-approximate the possibilities of unorthogonality of the instruction set, largely dodging the problems we had to face.
     816This machine code exhibits a degree of non-uniformity similar to the MCS-51.
     817Only a small subset of the instruction set is considered, and they over-approximate the possibilities of unorthogonality of the instruction set, largely dodging the problems we had to face.
    828818
    829819Further, it seems that the definition of the decode function is potentially error prone.
    830 A small domain specific language of patterns is formalised in HOL4.
    831 This is similar to the specification language of the x86 instruction set found in manufacturer's data sheets.
     820A domain specific language of patterns is formalised in HOL4, similar to the specification language of the x86 instruction set found in manufacturer's data sheets.
    832821A decode function is implemented by copying lines from data sheets into the proof script, which are then partially evaluated to obtain a compiler.
    833822We are currently considering implementing a similar domain specific language in Matita.
     
    842831Our cost model assigns costs to blocks of instructions by tracing the way that blocks are compiled, and by computing exact costs on generated machine language.
    843832To perform this accurately, we have provided an executable semantics for the MCS-51 family of processors.
    844 The formalisation was done twice, first in O'Caml and then in Matita, and captures the exact timings of the processor (according to a Siemen's data sheet).
    845 Moreover, the O'Caml formalisation also considers timers and I/O.
    846 Adding support for I/O and timers in Matita is on-going work that will not present any major problem, and was delayed only because the addition is not immediately useful for the formalisation of the CerCo compiler.
     833An O'Caml and Matita formalisation was provided, and both capture the exact timings of the MCS-51 (according to a Siemen's data sheet).
     834The O'Caml formalisation also considers timers and I/O.
     835Adding support for I/O and timers in Matita is on-going work that will not present any problem, and was delayed only because the addition is not immediately useful for the formalisation of the CerCo compiler.
    847836
    848837The formalisation is done at machine level and not at assembly level; we also formalise fetching and decoding.
     
    854843
    855844Our main difficulty in formalising the MCS-51 was the unorthogonality of its memory model and instruction set.
    856 These problems are easily handled in O'Caml by using advanced language features like polymorphic variants and phantom types, simulating Generalized Abstract Data Types~\cite{xi:guarded:2003}.
    857 Importantly, we discovered the best manner of using dependent types to recover the same flexibility, to reduce spurious partiality, and to grant invariants that will be later useful in other formalisations in the CerCo project.
     845These problems are handled in O'Caml by using language features like polymorphic variants and phantom types, simulating Generalized Abstract Data Types~\cite{xi:guarded:2003}.
     846Importantly, we searched for a manner of using dependent types to recover the same flexibility, reduce spurious partiality, and grant invariants that will be later useful in other formalisations in CerCo.
    858847
    859848The formalisation has been partially verified by computing execution traces on selected programs and comparing them with an existing emulator.
     
    862851Finally, we are aware of having over-specified the processor in several places, by fixing a behaviour hopefully consistent with the real machine, where manufacturer data sheets are ambiguous or under-specified.
    863852
    864 \bibliography{itp-2011.bib}
     853\bibliography{IEEEabrv,itp-2011}
    865854
    866855\end{document}
Note: See TracChangeset for help on using the changeset viewer.