Changeset 3477 for Papers


Ignore:
Timestamp:
Sep 22, 2014, 3:50:30 PM (5 years ago)
Author:
mulligan
Message:

changes to intro, in progress...

Location:
Papers/sttt
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • Papers/sttt/biblio.bib

    r3470 r3477  
     1@article
     2{ asperti:user:2007,
     3  author = {Andrea Asperti and Claudio {Sacerdoti Coen} and Enrico Tassi and Stefano Zacchiroli},
     4  title = {User interaction with the {Matita} proof assistant},
     5  journal = {Automated Reasoning},
     6  pages = {109--139},
     7  volume = {39},
     8  issue = {2},
     9  year = {2007}
     10}
     11
     12@article
     13{ bate:wcet:2011,
     14  author = {Iain Bate and Usman Khan},
     15  title = {{WCET} analysis of modern processors using multi-criteria optimisation},
     16  journal = {Empirical Software Engineering},
     17  pages = {5--28},
     18  volume = {16},
     19  issue = {1},
     20  year = {2011}
     21}
     22
     23@article
     24{ klein:machine:2006,
     25  author = {Gerwin Klein and Tobias Nipkow},
     26  title = {A machine-checked model for a {Java-like} language, virtual machine and compiler},
     27  journal = {{ACM} Transactions on Programming Languages and Systems},
     28  volume = {28},
     29  number = {4},
     30  pages = {619--695},
     31  year = {2006}
     32}
     33
     34@article
     35{ klein:sel4:2010,
     36  author = {Gerwin Klein and June Andronick and Kevin Elphinstone and Gernot Heiser and David Cock and Philip Derrin and Dhammika Elkaduwe and Kai Engelhardt and Rafal Kolanski and Michael Norrish and Thomas Sewell and Harvey Tuch and Simon Winwood},
     37  title = {{seL4}: Formal verification of an operating system kernel},
     38  journal = {Communications of the {ACM}},
     39  issue = {6},
     40  volume = {53},
     41  pages = {107--115},
     42  year = {2010}
     43}
     44
     45@article
     46{ leroy:formal:2009,
     47  author = {Xavier Leroy},
     48  title = {Formal verification of a realistic compiler},
     49  journal = {Communications of the {ACM}},
     50  volume = {52},
     51  number = {7},
     52  pages = {107--115},
     53  year = {2009}
     54}
     55
     56@article
     57{ leroy:formally:2009,
     58  author = {Xavier Leroy},
     59  title = {A formally verified compiler back-end},
     60  journal = {Automated Reasoning},
     61  volume = {43},
     62  number = {4},
     63  pages = {363--446},
     64  year = {2009}
     65}
     66
     67@book
     68{ moore:piton:1996,
     69  author = {J Strother Moore},
     70  title = {Piton: A mechanically verified assembly language},
     71  series = {Automated Reasoning Series},
     72  volume = {3},
     73  isbn = {978-0-7923-3920-5},
     74  publisher = {Springer},
     75  year = {1996}
     76}
     77
     78@inproceedings
     79{ atkey:coqjvm:2007,
     80  author = {Robert Atkey},
     81  title = {{CoqJVM}: An executable specification of the {Java Virtual Machine} using dependent types},
     82  booktitle = {Types},
     83  pages = {18--32},
     84  year = {2007}
     85}
     86
     87@inproceedings
     88{ blazy:formal:2006,
     89  author = {Sandrine Blazy and Zaynah Dargaye and Xavier Leroy},
     90  title = {Formal Verification of a {C} Compiler Front-End},
     91  booktitle = {{FM}},
     92  pages = {460--475},
     93  year = {2006}
     94}
     95
     96@inproceedings
     97{ fox:trustworthy:2010,
     98  author = {Anthony Fox and Magnus O. Myreen},
     99  title = {A Trustworthy Monadic Formalization of the {ARMv7} Instruction Set Architecture},
     100  booktitle = {{ITP}},
     101  pages = {243--258},
     102  year = {2010}
     103}
     104
     105@inproceedings
     106{ sevcik:relaxed-memory:2011,
     107  author = {Jaroslav \v{S}ev\v{c}\'ik and Viktor Vafeiadis and  Francesco {Zappa Nardelli} and Suresh Jagannathan and Peter Sewell},
     108  title = {Relaxed-Memory Concurrency and Verified Compilation},
     109  booktitle = {{POPL}},
     110  year = {2011}
     111}
     112
     113@inproceedings
     114{ tuch:types:2007,
     115  author = {Harvey Tuch and Gerwin Klein and Michael Norrish},
     116  title = {Types, Bytes, and Separation Logic},
     117  booktitle = {{POPL}},
     118  pages = {97--108},
     119  year =  {2007}
     120}
     121
     122
     123@inproceedings
     124{ klein:sel4:2009,
     125  author = {Gerwin Klein and June Andronick and Kevin Elphinstone and Gernot Heiser and David Cock and Philip Derrin and Dhammika Elkaduwe and Kai Engelhardt and Rafal Kolanski and Michael Norrish and Thomas Sewell and Harvey Tuch and Simon Winwood},
     126  title = {{seL4}: Formal verification of an operating system kernel},
     127  booktitle = {{SOSP}},
     128  year = {2009}
     129}
     130
     131@inproceedings
     132{ mulligan:executable:2011,
     133  author = {Dominic P. Mulligan and Claudio {Sacerdoti Coen}},
     134  title = {An executable formal semantics of the {MCS-51} microprocessor in {Matita}},
     135  booktitle = {{FMCAD}},
     136  year = {2011},
     137  note = {Submitted}
     138}
     139
     140@inproceedings
     141{ sarkar:semantics:2009,
     142  author = {Susmit Sarkar and Peter Sewell and Francesco Zappa Nardelli and Scott Owens and Tom Ridge and Thomas Braibant and Magnus O. Myreen and Jade Alglave},
     143  title = {The semantics of {x86-CC} multiprocessor machine code},
     144  booktitle = {{POPL}},
     145  pages = {379--391},
     146  year = {2009}
     147}
     148
     149@inproceedings
     150{ sozeau:subset:2006,
     151  author = {Matthieu Sozeau},
     152  title = {Subset Coercions in {Coq}},
     153  booktitle = {Types},
     154  pages = {237--252},
     155  year = {2006}
     156}
     157
     158@inproceedings
     159{ yan:wcet:2008,
     160  author = {Jun Yan and Wei Zhang},
     161  title = {{WCET} Analysis for Multi-Core Processors with Shared {L2} Instruction Caches},
     162  booktitle = {{RTAS}},
     163  pages = {80--89},
     164  year = {2008}
     165}
     166
     167@misc
     168{ boender:correctness:2012,
     169  author = {Jaap Boender and Claudio {Sacerdoti Coen}},
     170  title = {On the correctness of a branch displacement algorithm},
     171  howpublished = {\url{http://arxiv.org/abs/1209.5920}},
     172  year = {2012}
     173}
     174
     175@misc
     176{ cerco:2011,
     177  title = {The {CerCo} {FET-Open} project},
     178  howpublished = {\url{http://cerco.cs.unibo.it/}},
     179  year = {2011},
     180  key = {CerCo:2011}
     181}
     182
     183@misc
     184{ cerco-report-code:2011,
     185  title = {{CerCo Deliverable D2.2}: prototype cost-preserving {C} compiler},
     186  howpublished = {\url{http://cerco.cs.unibo.it/raw-attachment/wiki/WikiStart/D2_2.pdf} and \url{http://cerco.cs.unibo.it/raw-attachment/wiki/WikiStart/D2_2_Code.tar.gz}},
     187  year = {2011},
     188  key = {CerCo-2.2-Report-Code:2011}
     189}
     190
     191@misc
     192{ compcert:2011,
     193  title = {The {CompCert} project},
     194  howpublished = {\url{http://compcert.inria.fr/}},
     195  year = {2011},
     196  key = {CompCert:2011}
     197}
     198
     199@misc
     200{ hyde:branch:2006,
     201  title = {Branch displacement optimisation},
     202  howpublished = {\url{http://groups.google.com/group/alt.lang.asm/msg/d31192d442accad3}},
     203  year = {2006},
     204  key = {Hyde:Branch:2006}
     205}
     206
     207@misc
     208{ moore:grand:2005,
     209  author = {J Strother Moore},
     210  title = {A Grand Challenge Proposal for Formal Methods},
     211  year = {2005}
     212}
     213
     214@misc
     215{ sdcc:2011,
     216  title = {Small Device {C} Compiler 3.0.0},
     217  howpublished = {\url{http://sdcc.sourceforge.net/}},
     218  year = {2011},
     219  key = {sdcc:2010}
     220}
     221
     222@misc
     223{ sel4:2011,
     224  title = {The {l4.verified} project},
     225  howpublished = {\url{http://ertos.nicta.com.au/research/l4.verified/}},
     226  year = {2011}
     227}
     228
     229@misc
     230{ siemens:2011,
     231  title = {{Siemens Semiconductor Group} 8051 derivative instruction set},
     232  howpublished = {\url{http://www.win.tue.nl/~aeb/comp/8051/instruction-set.pdf}},
     233  year = {2011},
     234  key = {siemens:2011}
     235}
     236
     237@techreport
     238{ amadio:certifying:2010,
     239  author = {Roberto M. Amadio and Nicolas Ayache and Yann R\'{e}gis-Gianas and Ronan Saillard},
     240  title = {Cerifying cost annotations in compilers},
     241  institution = {Universit\'{e} Paris Diderot {(Paris 7)}, Laboratoire {PPS}},
     242  year = {2010}
     243}
     244
     245@techreport
     246{ klein:machine:2010,
     247  author = {Gerwin Klein and Tobias Nipkow},
     248  title = {A machine-checked model for a {Java-like} language, virtual machine and compiler},
     249  institution = {National {ICT} Australia},
     250  number = {0400001T.1},
     251  year = {2010}
     252
     253}
     254
     255
    1256@article{Szymanski1978,
    2257 author = {Szymanski, Thomas G.},
  • Papers/sttt/main.tex

    r3476 r3477  
    1 \documentclass[twocolumn,draft]{svjour3}
     1%\documentclass[twocolumn,draft]{svjour3}
     2\documentclass[a4paper]{article}
    23\usepackage{algpseudocode}
    34%\usepackage{algorithmicx}
     
    78\usepackage[british]{babel}
    89\usepackage{caption}
    9 \usepackage{hyperref}
     10\usepackage[colorlinks]{hyperref}
    1011\usepackage[utf8]{inputenc}
    1112\usepackage{listings}
     13\usepackage{microtype}
    1214\usepackage{subcaption}
    1315
     
    2123\thanks{Research supported by the CerCo project, within the Future and Emerging Technologies (FET) programme of the Seventh Framework Programme for Research of the European Commission, under FET-Open grant number 243881}}
    2224\author{Jaap Boender \and Dominic P. Mulligan \and Claudio Sacerdoti Coen}
    23 \institute{Department of Computer Science, University of Middlesex \and Computer Laboratory, University of Cambridge \and Dipartimento di Informatica, University of Bologna}
     25%\institute{Department of Computer Science, University of Middlesex \and Computer Laboratory, University of Cambridge \and Dipartimento di Informatica, University of Bologna}
    2426
    2527\maketitle
    2628
    2729\begin{abstract}
    28 Optimising assemblers face the `branch displacement' or `jump encoding' problem, i.e. how best to choose between concrete machine code jump instructions --- typically of differing instruction and offset sizes --- when expanding pseudo-instructions.
    29 Ideally, an optimising assembler would choose the set of jump expansions that minimises the size of the resulting machine code program, a task that is provably \textsc{np}-hard.
    30 
    31 As part of CerCo (`Certified Complexity') --- an \textsc{eu}-funded project to develop a verified concrete complexity preserving compiler for a large subset of the C programming language --- we have implemented and proved correct an optimising assembler within the Matita interactive theorem prover.
    32 Our assembler targets the instruction set of a typical micro-controller, the Intel \textsc{mcs}-51 series.
    33 As is common in embedded systems development, this micro-controller has a paucity of available code memory and therefore we face an additional pressure in reducing the size of any assembled machine code program.
    34 Out of necessity, then, our assembler implements an algorithm for solving the branch displacement problem, and we must prove that this algorithm is correct.
    35 
    36 However, the efficient expansion of pseudoinstructions, namely jumps, into machine instructions is complex.
    37 We therefore isolate the decision making over how jumps should be expanded from the expansion process itself as much as possible using `policies', making the proof of correctness for the assembler more straightforward.
     30Optimising assemblers face the `branch displacement' or `jump encoding' problem, that is: how best to choose between concrete machine code jump instructions---typically of differing instruction and offset sizes---when expanding pseudo-instructions.
     31Ideally, on space constrained hardware, an optimising assembler would choose the set of pseudo-instruction expansions that minimises the size of the resulting machine code program, a task that is provably \textsc{np}-hard.
     32
     33As part of CerCo (`Certified Complexity')---an \textsc{eu}-funded project to develop a verified concrete complexity preserving compiler for a large subset of the C programming language---we have implemented and proved correct an optimising assembler within the Matita interactive theorem prover.
     34Our assembler targets the instruction set of a typical micro-controller, the Intel \textsc{mcs-51} series.
     35As is common with embedded systems development this micro-controller has a paucity of available code memory and therefore we face an additional pressure in reducing the size of any assembled machine code program.
     36Out of necessity, then, our assembler implements an algorithm for solving the branch displacement problem, and we have proved that this algorithm is correct.
     37
     38However, this efficient expansion of jump pseudoinstructions into machine code equivalents is complex, and therefore could unneccessarily complicate the proof of correctness for the rest of the assembler.
     39We therefore isolate the decision making over how jumps should be expanded from the expansion process itself as much as possible using `policies', making the rest of the proof of correctness more straightforward.
    3840Our proof strategy contains a tracking facility for `good addresses' and only programs that use good addresses have their semantics preserved under assembly, as we observe that it is impossible for an assembler to preserve the semantics of every assembly program.
    3941Our strategy offers increased flexibility over the traditional approach to proving the correctness of assemblers, wherein addresses in assembly are kept opaque and immutable.
    40 In particular, we may experiment with allowing the benign manipulation of addresses.
     42In particular, our approach permits experimentation with the benign manipulation of addresses.
    4143
    4244We discuss wider issues associated with a proof of correctness for an assembler, detail our algorithm solving the `branch displacement' problem, and discuss our proof of correctness, employing `policies', for the assembler.
    4345
    44 \keywords{Formal verification, interactive theorem proving, assembler, branch displacement optimisation, CerCo (`Certified Complexity'), MCS-51 microcontroller, Matita proof assistant}
     46%\keywords{Formal verification, interactive theorem proving, assembler, branch displacement optimisation, CerCo (`Certified Complexity'), MCS-51 microcontroller, Matita proof assistant}
    4547\end{abstract}
    4648
    4749\section{Introduction}
    4850
    49 The problem of branch displacement optimisation, also known as jump encoding, is
    50 a well-known problem in assembler design~\cite{Hyde2006}. Its origin lies in the
    51 fact that in many architecture sets, the encoding (and therefore size) of some
    52 instructions depends on the distance to their operand (the instruction 'span').
    53 The branch displacement optimisation problem consists of encoding these
    54 span-dependent instructions in such a way that the resulting program is as
    55 small as possible.
    56 
    57 This problem is the subject of the present paper. After introducing the problem
    58 in more detail, we will discuss the solutions used by other compilers, present
    59 the algorithm we use in the CerCo assembler, and discuss its verification,
    60 that is the proofs of termination and correctness using the Matita proof
    61 assistant~\cite{Asperti2007}.
    62 
    63 Formulating the final statement of correctness and finding the loop invariants
    64 have been non-trivial tasks and are, indeed, the main contribution of this
    65 paper. It has required considerable care and fine-tuning to formulate not
    66 only the minimal statement required for the ulterior proof of correctness of
    67 the assembler, but also the minimal set of invariants needed for the proof
    68 of correctness of the algorithm.
    69 
    70 The research presented in this paper has been executed within the CerCo project
    71 which aims at formally verifying a C compiler with cost annotations. The
    72 target architecture for this project is the MCS-51, whose instruction set
    73 contains span-dependent instructions. Furthermore, its maximum addressable
    74 memory size is very small (64 Kb), which makes it important to generate
    75 programs that are as small as possible. With this optimisation, however, comes increased complexity and hence increased possibility for error. We must make sure that the branch instructions are encoded correctly, otherwise the assembled program will behave unpredictably.
    76 
    77 All Matita files related to this development can be found on the CerCo
    78 website, \url{http://cerco.cs.unibo.it}. The specific part that contains the
    79 branch displacement algorithm is in the {\tt ASM} subdirectory, in the files
    80 {\tt PolicyFront.ma}, {\tt PolicyStep.ma} and {\tt Policy.ma}.
     51We consider the formalisation of an assembler for the Intel MCS-51 8-bit microprocessor in the Matita proof assistant~\cite{asperti:user:2007}.
     52This formalisation forms a major component of the EU-funded CerCo (`Certified Complexity') project~\cite{cerco:2011}, concerning the construction and formalisation of a concrete complexity preserving compiler for a large subset of the C programming language.
     53
     54The \textsc{mcs-51}, commonly called the 8051, is an 8-bit Harvard architecture \textsc{cisc} instruction set micro-controller dating from the early 1980s, and was originally manufactured by Intel.
     55An extended variant, the \textsc{mcs-52} or 8052, was subsequently released adding extra \textsc{ram} and \textsc{rom} above and beyond that offered by the original \textsc{mcs-51}, and an extra timer.
     56Derivatives are still widely manufactured by a number of semiconductor foundries, with the processor being used especially in embedded systems.
     57
     58The MCS-51 has a relative paucity of features compared to its more modern brethren, with a lack of any caching or pipelining features meaning that timing of execution is predictable, making the MCS-51 very attractive for CerCo's ends.
     59However, the MCS-51's paucity of features---though an advantage in many respects---also quickly becomes a hindrance, as the MCS-51 features a relatively minuscule series of memory spaces by modern standards.
     60As a result our C compiler, to be able to successfully compile realistic programs for embedded devices, ought to produce `tight' machine code.
     61
     62To do this, we must solve the `branch displacement' problem---deciding how best to expand pseudojumps to labels in assembly language to machine code jumps.
     63This problem, also known as `jump encoding', is
     64a well-known problem amongst assembler implementors~\cite{Hyde2006} and arises when pseudojumps can be expanded
     65in different ways to real machine instructions, but the different expansions
     66are not equivalent (e.g. jumps that differ in the distance that they `span', or instructions that differ in the number of clock cycles needed to completely execute them) and are not always
     67correct (e.g. correctness is only up to global constraints over the compiled
     68code). For instance, some jump instructions (short jumps) are very small
     69and can be executed quickly, but they can only reach destinations within a
     70certain distance from the current instruction. When the destinations are
     71too far away, larger and slower long jumps must be used. The use of a long jump may
     72augment the distance between another pseudojump and its target, forcing
     73another long jump use, in a cascade. The job of the optimising
     74compiler (assembler) is to individually expand every pseudo-instruction in such a way
     75that all global constraints are satisfied and that the compiled program
     76is minimal in size and faster in concrete time complexity.
     77This problem is known to be computationally hard for most CISC architectures (\textsc{np}-hard, see~\cite{hyde:branch:2006}).
     78
     79To simplify the CerCo C compiler we have chosen to implement an optimising assembler whose input language the compiler will target.
     80Labels, conditional jumps to labels, a program preamble containing global data and a \texttt{MOV} instruction for moving this global data into the MCS-51's one 16-bit register all feature in our assembly language.
     81We further simplify by ignoring linking, assuming that all our assembly programs are pre-linked.
     82
     83The requirements of the CerCo project add yet more complications to our proof of correctness, as we must also address a cost model.
     84CerCo imposes a cost model on C programs or, more specifically, on simple blocks of instructions.
     85This cost model is induced by the compilation process itself, and its non-compositional nature allows us to assign different costs to identical C statements depending on how they are compiled.
     86In short, we aim to obtain a very precise costing for a program by embracing the compilation process, not ignoring it.
     87At the assembler level, this is reflected by our need to induce a cost
     88model on the assembly code as a function of the assembly program and the
     89strategy used to solve the branch displacement problem. In particular, our
     90optimising assembler should also return a map that assigns a cost (in clock
     91cycles) to every instruction in the source program. We expect the induced cost
     92to be preserved by the assembler: we will prove that the compiled code
     93tightly simulates the source code by taking exactly the predicted amount of
     94time.
     95Note that the temporal `tightness' of the simulation is a fundamental prerequisite
     96of the correctness of the simulation, as some functions of the MCS-51---timers and \textsc{i/o}---depend on the microprocessor's clock.
     97If the pseudo- and concrete clock differ the result of an \textsc{i/o} operation may not be preserved.
     98
     99
     100
     101
     102
     103
     104
     105
     106
     107
     108
     109
     110
     111
     112
     113
     114
     115
     116
     117
     118
     119% This problem is the subject of the present paper. After introducing the problem
     120% in more detail, we will discuss the solutions used by other compilers, present
     121% the algorithm we use in the CerCo assembler, and discuss its verification,
     122% that is the proofs of termination and correctness using the Matita proof
     123% assistant~\cite{Asperti2007}.
     124
     125% Formulating the final statement of correctness and finding the loop invariants
     126% have been non-trivial tasks and are, indeed, the main contribution of this
     127% paper. It has required considerable care and fine-tuning to formulate not
     128% only the minimal statement required for the ulterior proof of correctness of
     129% the assembler, but also the minimal set of invariants needed for the proof
     130% of correctness of the algorithm.
     131
     132% The research presented in this paper has been executed within the CerCo project
     133% which aims at formally verifying a C compiler with cost annotations. The
     134% target architecture for this project is the MCS-51, whose instruction set
     135% contains span-dependent instructions. Furthermore, its maximum addressable
     136% memory size is very small (64 Kb), which makes it important to generate
     137% programs that are as small as possible. With this optimisation, however, comes increased complexity and hence increased possibility for error. We must make sure that the branch instructions are encoded correctly, otherwise the assembled program will behave unpredictably.
     138
     139% All Matita files related to this development can be found on the CerCo
     140% website, \url{http://cerco.cs.unibo.it}. The specific part that contains the
     141% branch displacement algorithm is in the {\tt ASM} subdirectory, in the files
     142% {\tt PolicyFront.ma}, {\tt PolicyStep.ma} and {\tt Policy.ma}.
    81143
    82144\section{The branch displacement optimisation problem}
     
    839901 
    840902
    841 
     903\bibliographystyle{alpha}
    842904\bibliography{biblio}
    843 \bibliographystyle{spbasic}
     905%\bibliographystyle{spbasic}
    844906
    845907\end{document}
Note: See TracChangeset for help on using the changeset viewer.