 Timestamp:
 Sep 22, 2014, 3:50:30 PM (5 years ago)
 Location:
 Papers/sttt
 Files:

 2 edited
Legend:
 Unmodified
 Added
 Removed

Papers/sttt/biblio.bib
r3470 r3477 1 @article 2 { asperti:user:2007, 3 author = {Andrea Asperti and Claudio {Sacerdoti Coen} and Enrico Tassi and Stefano Zacchiroli}, 4 title = {User interaction with the {Matita} proof assistant}, 5 journal = {Automated Reasoning}, 6 pages = {109139}, 7 volume = {39}, 8 issue = {2}, 9 year = {2007} 10 } 11 12 @article 13 { bate:wcet:2011, 14 author = {Iain Bate and Usman Khan}, 15 title = {{WCET} analysis of modern processors using multicriteria optimisation}, 16 journal = {Empirical Software Engineering}, 17 pages = {528}, 18 volume = {16}, 19 issue = {1}, 20 year = {2011} 21 } 22 23 @article 24 { klein:machine:2006, 25 author = {Gerwin Klein and Tobias Nipkow}, 26 title = {A machinechecked model for a {Javalike} language, virtual machine and compiler}, 27 journal = {{ACM} Transactions on Programming Languages and Systems}, 28 volume = {28}, 29 number = {4}, 30 pages = {619695}, 31 year = {2006} 32 } 33 34 @article 35 { klein:sel4:2010, 36 author = {Gerwin Klein and June Andronick and Kevin Elphinstone and Gernot Heiser and David Cock and Philip Derrin and Dhammika Elkaduwe and Kai Engelhardt and Rafal Kolanski and Michael Norrish and Thomas Sewell and Harvey Tuch and Simon Winwood}, 37 title = {{seL4}: Formal verification of an operating system kernel}, 38 journal = {Communications of the {ACM}}, 39 issue = {6}, 40 volume = {53}, 41 pages = {107115}, 42 year = {2010} 43 } 44 45 @article 46 { leroy:formal:2009, 47 author = {Xavier Leroy}, 48 title = {Formal verification of a realistic compiler}, 49 journal = {Communications of the {ACM}}, 50 volume = {52}, 51 number = {7}, 52 pages = {107115}, 53 year = {2009} 54 } 55 56 @article 57 { leroy:formally:2009, 58 author = {Xavier Leroy}, 59 title = {A formally verified compiler backend}, 60 journal = {Automated Reasoning}, 61 volume = {43}, 62 number = {4}, 63 pages = {363446}, 64 year = {2009} 65 } 66 67 @book 68 { moore:piton:1996, 69 author = {J Strother Moore}, 70 title = {Piton: A mechanically verified assembly language}, 71 series = {Automated Reasoning Series}, 72 volume = {3}, 73 isbn = {9780792339205}, 74 publisher = {Springer}, 75 year = {1996} 76 } 77 78 @inproceedings 79 { atkey:coqjvm:2007, 80 author = {Robert Atkey}, 81 title = {{CoqJVM}: An executable specification of the {Java Virtual Machine} using dependent types}, 82 booktitle = {Types}, 83 pages = {1832}, 84 year = {2007} 85 } 86 87 @inproceedings 88 { blazy:formal:2006, 89 author = {Sandrine Blazy and Zaynah Dargaye and Xavier Leroy}, 90 title = {Formal Verification of a {C} Compiler FrontEnd}, 91 booktitle = {{FM}}, 92 pages = {460475}, 93 year = {2006} 94 } 95 96 @inproceedings 97 { fox:trustworthy:2010, 98 author = {Anthony Fox and Magnus O. Myreen}, 99 title = {A Trustworthy Monadic Formalization of the {ARMv7} Instruction Set Architecture}, 100 booktitle = {{ITP}}, 101 pages = {243258}, 102 year = {2010} 103 } 104 105 @inproceedings 106 { sevcik:relaxedmemory:2011, 107 author = {Jaroslav \v{S}ev\v{c}\'ik and Viktor Vafeiadis and Francesco {Zappa Nardelli} and Suresh Jagannathan and Peter Sewell}, 108 title = {RelaxedMemory Concurrency and Verified Compilation}, 109 booktitle = {{POPL}}, 110 year = {2011} 111 } 112 113 @inproceedings 114 { tuch:types:2007, 115 author = {Harvey Tuch and Gerwin Klein and Michael Norrish}, 116 title = {Types, Bytes, and Separation Logic}, 117 booktitle = {{POPL}}, 118 pages = {97108}, 119 year = {2007} 120 } 121 122 123 @inproceedings 124 { klein:sel4:2009, 125 author = {Gerwin Klein and June Andronick and Kevin Elphinstone and Gernot Heiser and David Cock and Philip Derrin and Dhammika Elkaduwe and Kai Engelhardt and Rafal Kolanski and Michael Norrish and Thomas Sewell and Harvey Tuch and Simon Winwood}, 126 title = {{seL4}: Formal verification of an operating system kernel}, 127 booktitle = {{SOSP}}, 128 year = {2009} 129 } 130 131 @inproceedings 132 { mulligan:executable:2011, 133 author = {Dominic P. Mulligan and Claudio {Sacerdoti Coen}}, 134 title = {An executable formal semantics of the {MCS51} microprocessor in {Matita}}, 135 booktitle = {{FMCAD}}, 136 year = {2011}, 137 note = {Submitted} 138 } 139 140 @inproceedings 141 { sarkar:semantics:2009, 142 author = {Susmit Sarkar and Peter Sewell and Francesco Zappa Nardelli and Scott Owens and Tom Ridge and Thomas Braibant and Magnus O. Myreen and Jade Alglave}, 143 title = {The semantics of {x86CC} multiprocessor machine code}, 144 booktitle = {{POPL}}, 145 pages = {379391}, 146 year = {2009} 147 } 148 149 @inproceedings 150 { sozeau:subset:2006, 151 author = {Matthieu Sozeau}, 152 title = {Subset Coercions in {Coq}}, 153 booktitle = {Types}, 154 pages = {237252}, 155 year = {2006} 156 } 157 158 @inproceedings 159 { yan:wcet:2008, 160 author = {Jun Yan and Wei Zhang}, 161 title = {{WCET} Analysis for MultiCore Processors with Shared {L2} Instruction Caches}, 162 booktitle = {{RTAS}}, 163 pages = {8089}, 164 year = {2008} 165 } 166 167 @misc 168 { boender:correctness:2012, 169 author = {Jaap Boender and Claudio {Sacerdoti Coen}}, 170 title = {On the correctness of a branch displacement algorithm}, 171 howpublished = {\url{http://arxiv.org/abs/1209.5920}}, 172 year = {2012} 173 } 174 175 @misc 176 { cerco:2011, 177 title = {The {CerCo} {FETOpen} project}, 178 howpublished = {\url{http://cerco.cs.unibo.it/}}, 179 year = {2011}, 180 key = {CerCo:2011} 181 } 182 183 @misc 184 { cercoreportcode:2011, 185 title = {{CerCo Deliverable D2.2}: prototype costpreserving {C} compiler}, 186 howpublished = {\url{http://cerco.cs.unibo.it/rawattachment/wiki/WikiStart/D2_2.pdf} and \url{http://cerco.cs.unibo.it/rawattachment/wiki/WikiStart/D2_2_Code.tar.gz}}, 187 year = {2011}, 188 key = {CerCo2.2ReportCode:2011} 189 } 190 191 @misc 192 { compcert:2011, 193 title = {The {CompCert} project}, 194 howpublished = {\url{http://compcert.inria.fr/}}, 195 year = {2011}, 196 key = {CompCert:2011} 197 } 198 199 @misc 200 { hyde:branch:2006, 201 title = {Branch displacement optimisation}, 202 howpublished = {\url{http://groups.google.com/group/alt.lang.asm/msg/d31192d442accad3}}, 203 year = {2006}, 204 key = {Hyde:Branch:2006} 205 } 206 207 @misc 208 { moore:grand:2005, 209 author = {J Strother Moore}, 210 title = {A Grand Challenge Proposal for Formal Methods}, 211 year = {2005} 212 } 213 214 @misc 215 { sdcc:2011, 216 title = {Small Device {C} Compiler 3.0.0}, 217 howpublished = {\url{http://sdcc.sourceforge.net/}}, 218 year = {2011}, 219 key = {sdcc:2010} 220 } 221 222 @misc 223 { sel4:2011, 224 title = {The {l4.verified} project}, 225 howpublished = {\url{http://ertos.nicta.com.au/research/l4.verified/}}, 226 year = {2011} 227 } 228 229 @misc 230 { siemens:2011, 231 title = {{Siemens Semiconductor Group} 8051 derivative instruction set}, 232 howpublished = {\url{http://www.win.tue.nl/~aeb/comp/8051/instructionset.pdf}}, 233 year = {2011}, 234 key = {siemens:2011} 235 } 236 237 @techreport 238 { amadio:certifying:2010, 239 author = {Roberto M. Amadio and Nicolas Ayache and Yann R\'{e}gisGianas and Ronan Saillard}, 240 title = {Cerifying cost annotations in compilers}, 241 institution = {Universit\'{e} Paris Diderot {(Paris 7)}, Laboratoire {PPS}}, 242 year = {2010} 243 } 244 245 @techreport 246 { klein:machine:2010, 247 author = {Gerwin Klein and Tobias Nipkow}, 248 title = {A machinechecked model for a {Javalike} language, virtual machine and compiler}, 249 institution = {National {ICT} Australia}, 250 number = {0400001T.1}, 251 year = {2010} 252 253 } 254 255 1 256 @article{Szymanski1978, 2 257 author = {Szymanski, Thomas G.}, 
Papers/sttt/main.tex
r3476 r3477 1 \documentclass[twocolumn,draft]{svjour3} 1 %\documentclass[twocolumn,draft]{svjour3} 2 \documentclass[a4paper]{article} 2 3 \usepackage{algpseudocode} 3 4 %\usepackage{algorithmicx} … … 7 8 \usepackage[british]{babel} 8 9 \usepackage{caption} 9 \usepackage {hyperref}10 \usepackage[colorlinks]{hyperref} 10 11 \usepackage[utf8]{inputenc} 11 12 \usepackage{listings} 13 \usepackage{microtype} 12 14 \usepackage{subcaption} 13 15 … … 21 23 \thanks{Research supported by the CerCo project, within the Future and Emerging Technologies (FET) programme of the Seventh Framework Programme for Research of the European Commission, under FETOpen grant number 243881}} 22 24 \author{Jaap Boender \and Dominic P. Mulligan \and Claudio Sacerdoti Coen} 23 \institute{Department of Computer Science, University of Middlesex \and Computer Laboratory, University of Cambridge \and Dipartimento di Informatica, University of Bologna}25 %\institute{Department of Computer Science, University of Middlesex \and Computer Laboratory, University of Cambridge \and Dipartimento di Informatica, University of Bologna} 24 26 25 27 \maketitle 26 28 27 29 \begin{abstract} 28 Optimising assemblers face the `branch displacement' or `jump encoding' problem, i.e. how best to choose between concrete machine code jump instructions  typically of differing instruction and offset sizes when expanding pseudoinstructions.29 Ideally, an optimising assembler would choose the set of jumpexpansions that minimises the size of the resulting machine code program, a task that is provably \textsc{np}hard.30 31 As part of CerCo (`Certified Complexity')  an \textsc{eu}funded project to develop a verified concrete complexity preserving compiler for a large subset of the C programming language we have implemented and proved correct an optimising assembler within the Matita interactive theorem prover.32 Our assembler targets the instruction set of a typical microcontroller, the Intel \textsc{mcs }51series.33 As is common in embedded systems development,this microcontroller has a paucity of available code memory and therefore we face an additional pressure in reducing the size of any assembled machine code program.34 Out of necessity, then, our assembler implements an algorithm for solving the branch displacement problem, and we must provethat this algorithm is correct.35 36 However, th e efficient expansion of pseudoinstructions, namely jumps, into machine instructions is complex.37 We therefore isolate the decision making over how jumps should be expanded from the expansion process itself as much as possible using `policies', making the proof of correctness for the assemblermore straightforward.30 Optimising assemblers face the `branch displacement' or `jump encoding' problem, that is: how best to choose between concrete machine code jump instructionstypically of differing instruction and offset sizeswhen expanding pseudoinstructions. 31 Ideally, on space constrained hardware, an optimising assembler would choose the set of pseudoinstruction expansions that minimises the size of the resulting machine code program, a task that is provably \textsc{np}hard. 32 33 As part of CerCo (`Certified Complexity')an \textsc{eu}funded project to develop a verified concrete complexity preserving compiler for a large subset of the C programming languagewe have implemented and proved correct an optimising assembler within the Matita interactive theorem prover. 34 Our assembler targets the instruction set of a typical microcontroller, the Intel \textsc{mcs51} series. 35 As is common with embedded systems development this microcontroller has a paucity of available code memory and therefore we face an additional pressure in reducing the size of any assembled machine code program. 36 Out of necessity, then, our assembler implements an algorithm for solving the branch displacement problem, and we have proved that this algorithm is correct. 37 38 However, this efficient expansion of jump pseudoinstructions into machine code equivalents is complex, and therefore could unneccessarily complicate the proof of correctness for the rest of the assembler. 39 We therefore isolate the decision making over how jumps should be expanded from the expansion process itself as much as possible using `policies', making the rest of the proof of correctness more straightforward. 38 40 Our proof strategy contains a tracking facility for `good addresses' and only programs that use good addresses have their semantics preserved under assembly, as we observe that it is impossible for an assembler to preserve the semantics of every assembly program. 39 41 Our strategy offers increased flexibility over the traditional approach to proving the correctness of assemblers, wherein addresses in assembly are kept opaque and immutable. 40 In particular, we may experiment with allowingthe benign manipulation of addresses.42 In particular, our approach permits experimentation with the benign manipulation of addresses. 41 43 42 44 We discuss wider issues associated with a proof of correctness for an assembler, detail our algorithm solving the `branch displacement' problem, and discuss our proof of correctness, employing `policies', for the assembler. 43 45 44 \keywords{Formal verification, interactive theorem proving, assembler, branch displacement optimisation, CerCo (`Certified Complexity'), MCS51 microcontroller, Matita proof assistant}46 %\keywords{Formal verification, interactive theorem proving, assembler, branch displacement optimisation, CerCo (`Certified Complexity'), MCS51 microcontroller, Matita proof assistant} 45 47 \end{abstract} 46 48 47 49 \section{Introduction} 48 50 49 The problem of branch displacement optimisation, also known as jump encoding, is 50 a wellknown problem in assembler design~\cite{Hyde2006}. Its origin lies in the 51 fact that in many architecture sets, the encoding (and therefore size) of some 52 instructions depends on the distance to their operand (the instruction 'span'). 53 The branch displacement optimisation problem consists of encoding these 54 spandependent instructions in such a way that the resulting program is as 55 small as possible. 56 57 This problem is the subject of the present paper. After introducing the problem 58 in more detail, we will discuss the solutions used by other compilers, present 59 the algorithm we use in the CerCo assembler, and discuss its verification, 60 that is the proofs of termination and correctness using the Matita proof 61 assistant~\cite{Asperti2007}. 62 63 Formulating the final statement of correctness and finding the loop invariants 64 have been nontrivial tasks and are, indeed, the main contribution of this 65 paper. It has required considerable care and finetuning to formulate not 66 only the minimal statement required for the ulterior proof of correctness of 67 the assembler, but also the minimal set of invariants needed for the proof 68 of correctness of the algorithm. 69 70 The research presented in this paper has been executed within the CerCo project 71 which aims at formally verifying a C compiler with cost annotations. The 72 target architecture for this project is the MCS51, whose instruction set 73 contains spandependent instructions. Furthermore, its maximum addressable 74 memory size is very small (64 Kb), which makes it important to generate 75 programs that are as small as possible. With this optimisation, however, comes increased complexity and hence increased possibility for error. We must make sure that the branch instructions are encoded correctly, otherwise the assembled program will behave unpredictably. 76 77 All Matita files related to this development can be found on the CerCo 78 website, \url{http://cerco.cs.unibo.it}. The specific part that contains the 79 branch displacement algorithm is in the {\tt ASM} subdirectory, in the files 80 {\tt PolicyFront.ma}, {\tt PolicyStep.ma} and {\tt Policy.ma}. 51 We consider the formalisation of an assembler for the Intel MCS51 8bit microprocessor in the Matita proof assistant~\cite{asperti:user:2007}. 52 This formalisation forms a major component of the EUfunded CerCo (`Certified Complexity') project~\cite{cerco:2011}, concerning the construction and formalisation of a concrete complexity preserving compiler for a large subset of the C programming language. 53 54 The \textsc{mcs51}, commonly called the 8051, is an 8bit Harvard architecture \textsc{cisc} instruction set microcontroller dating from the early 1980s, and was originally manufactured by Intel. 55 An extended variant, the \textsc{mcs52} or 8052, was subsequently released adding extra \textsc{ram} and \textsc{rom} above and beyond that offered by the original \textsc{mcs51}, and an extra timer. 56 Derivatives are still widely manufactured by a number of semiconductor foundries, with the processor being used especially in embedded systems. 57 58 The MCS51 has a relative paucity of features compared to its more modern brethren, with a lack of any caching or pipelining features meaning that timing of execution is predictable, making the MCS51 very attractive for CerCo's ends. 59 However, the MCS51's paucity of featuresthough an advantage in many respectsalso quickly becomes a hindrance, as the MCS51 features a relatively minuscule series of memory spaces by modern standards. 60 As a result our C compiler, to be able to successfully compile realistic programs for embedded devices, ought to produce `tight' machine code. 61 62 To do this, we must solve the `branch displacement' problemdeciding how best to expand pseudojumps to labels in assembly language to machine code jumps. 63 This problem, also known as `jump encoding', is 64 a wellknown problem amongst assembler implementors~\cite{Hyde2006} and arises when pseudojumps can be expanded 65 in different ways to real machine instructions, but the different expansions 66 are not equivalent (e.g. jumps that differ in the distance that they `span', or instructions that differ in the number of clock cycles needed to completely execute them) and are not always 67 correct (e.g. correctness is only up to global constraints over the compiled 68 code). For instance, some jump instructions (short jumps) are very small 69 and can be executed quickly, but they can only reach destinations within a 70 certain distance from the current instruction. When the destinations are 71 too far away, larger and slower long jumps must be used. The use of a long jump may 72 augment the distance between another pseudojump and its target, forcing 73 another long jump use, in a cascade. The job of the optimising 74 compiler (assembler) is to individually expand every pseudoinstruction in such a way 75 that all global constraints are satisfied and that the compiled program 76 is minimal in size and faster in concrete time complexity. 77 This problem is known to be computationally hard for most CISC architectures (\textsc{np}hard, see~\cite{hyde:branch:2006}). 78 79 To simplify the CerCo C compiler we have chosen to implement an optimising assembler whose input language the compiler will target. 80 Labels, conditional jumps to labels, a program preamble containing global data and a \texttt{MOV} instruction for moving this global data into the MCS51's one 16bit register all feature in our assembly language. 81 We further simplify by ignoring linking, assuming that all our assembly programs are prelinked. 82 83 The requirements of the CerCo project add yet more complications to our proof of correctness, as we must also address a cost model. 84 CerCo imposes a cost model on C programs or, more specifically, on simple blocks of instructions. 85 This cost model is induced by the compilation process itself, and its noncompositional nature allows us to assign different costs to identical C statements depending on how they are compiled. 86 In short, we aim to obtain a very precise costing for a program by embracing the compilation process, not ignoring it. 87 At the assembler level, this is reflected by our need to induce a cost 88 model on the assembly code as a function of the assembly program and the 89 strategy used to solve the branch displacement problem. In particular, our 90 optimising assembler should also return a map that assigns a cost (in clock 91 cycles) to every instruction in the source program. We expect the induced cost 92 to be preserved by the assembler: we will prove that the compiled code 93 tightly simulates the source code by taking exactly the predicted amount of 94 time. 95 Note that the temporal `tightness' of the simulation is a fundamental prerequisite 96 of the correctness of the simulation, as some functions of the MCS51timers and \textsc{i/o}depend on the microprocessor's clock. 97 If the pseudo and concrete clock differ the result of an \textsc{i/o} operation may not be preserved. 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 % This problem is the subject of the present paper. After introducing the problem 120 % in more detail, we will discuss the solutions used by other compilers, present 121 % the algorithm we use in the CerCo assembler, and discuss its verification, 122 % that is the proofs of termination and correctness using the Matita proof 123 % assistant~\cite{Asperti2007}. 124 125 % Formulating the final statement of correctness and finding the loop invariants 126 % have been nontrivial tasks and are, indeed, the main contribution of this 127 % paper. It has required considerable care and finetuning to formulate not 128 % only the minimal statement required for the ulterior proof of correctness of 129 % the assembler, but also the minimal set of invariants needed for the proof 130 % of correctness of the algorithm. 131 132 % The research presented in this paper has been executed within the CerCo project 133 % which aims at formally verifying a C compiler with cost annotations. The 134 % target architecture for this project is the MCS51, whose instruction set 135 % contains spandependent instructions. Furthermore, its maximum addressable 136 % memory size is very small (64 Kb), which makes it important to generate 137 % programs that are as small as possible. With this optimisation, however, comes increased complexity and hence increased possibility for error. We must make sure that the branch instructions are encoded correctly, otherwise the assembled program will behave unpredictably. 138 139 % All Matita files related to this development can be found on the CerCo 140 % website, \url{http://cerco.cs.unibo.it}. The specific part that contains the 141 % branch displacement algorithm is in the {\tt ASM} subdirectory, in the files 142 % {\tt PolicyFront.ma}, {\tt PolicyStep.ma} and {\tt Policy.ma}. 81 143 82 144 \section{The branch displacement optimisation problem} … … 839 901 840 902 841 903 \bibliographystyle{alpha} 842 904 \bibliography{biblio} 843 \bibliographystyle{spbasic}905 %\bibliographystyle{spbasic} 844 906 845 907 \end{document}
Note: See TracChangeset
for help on using the changeset viewer.