\documentclass{beamer} \usetheme{Frankfurt} \logo{\includegraphics[height=1.0cm]{fetopen.png}} \usepackage{amsfonts} \usepackage{amsmath} \usepackage{amssymb} \usepackage[english]{babel} \usepackage{color} \usepackage[utf8x]{inputenc} \usepackage{listings} \usepackage{stmaryrd} % \usepackage{microtype} \author{Dominic P. Mulligan and Claudio Sacerdoti Coen} \title{CerCo Work Package 4.1} \date{March 11, 2011} \setlength{\parskip}{1em} \lstdefinelanguage{matita-ocaml} {keywords={definition,coercion,lemma,theorem,remark,inductive,record,qed,let,in,rec,match,return,with,Type,try,on,to}, morekeywords={[2]whd,normalize,elim,cases,destruct}, morekeywords={[3]type,of,val,assert,let,function}, mathescape=true, } \lstset{language=matita-ocaml,basicstyle=\small\tt,columns=flexible,breaklines=false, keywordstyle=\color{red}\bfseries, keywordstyle=[2]\color{blue}, keywordstyle=[3]\color{blue}\bfseries, commentstyle=\color{green}, stringstyle=\color{blue}, showspaces=false,showstringspaces=false} \begin{document} \begin{frame} \maketitle \end{frame} \begin{frame} \frametitle{Work Package 4.1} Work Package 4.1 entitled Executable Formal Semantics of Machine Code': \begin{quote} Formal definition of the semantics of the target language. The semantics will be given in a functional (and hence executable) form, useful for testing, validation and project assessment. \end{quote} Manpower: Dominic Mulligan (609 hours) and Claudio Sacerdoti Coen (16 hours). \end{frame} \begin{frame} \frametitle{The MCS-51 microprocessor I} \begin{itemize} \item Commonly called the 8051 (has an immediate successor in the 8052). \item Popular 8-bit microprocessor from the late 1970s. \item Widely used and manufactured. \item Relatively simple microprocessor (especially suited for CerCo). \item Can accurately predict timing information in cycles. \end{itemize} \end{frame} \begin{frame} \frametitle{The MCS-51 microprocessor II} \begin{itemize} \item No exhaustive introduction. Reveal enough to understand what problems we faced. \item Byzantine memory model. \item Overlap, have different sizes, may not be present depending on model, addressed in multiple ways, addressed with different sized pointers. \item Non-orthogonal instruction set. \texttt{MOV} takes 16 combinations of addressing mode (possible 300+)s. \end{itemize} \end{frame} \begin{frame} \frametitle{The MCS-51 microprocessor III} \begin{itemize} \item Three unconditional jumps: \texttt{AJMP}, \texttt{SJMP} and \texttt{LJMP} (first one rarely used). \item Differ in the size of permissible offset and the size in bytes of instruction. \item Also has various timers, UART I/O and interrupts. 8052 adds additional timers. \item Interrupts are simple. Flags can be used to manually handle errors. \end{itemize} \end{frame} \begin{frame} \frametitle{Development strategy} \begin{itemize} \item Have two emulators. \item First in O'Caml. Iron out' issues. \item I/O for debugging purposes. \item Then we moved to Matita. Lexically similar to O'Caml: code copy-pasted. \end{itemize} \end{frame} \begin{frame} \frametitle{Dealing with the jumps} \begin{itemize} \item Unconditional jumps: offset computed ahead of time. \item Problems: separate compilation, prototype C compiler. \item Add pseudoinstructions and assemble away'. \item Introduce labels and cost labels (for cost traces). \item Introduce pseudoinstructions. \texttt{Jump}: unconditional jump to labels. \item Other pseudoinstructions introduced: \texttt{Mov} moves (16 bit) data stored at a label (global vars). Conditional jumps to labels. \item Assembly language similar to that of SDCC. \end{itemize} \end{frame} \begin{frame}[fragile] \frametitle{Polymorphic variants and phantom types} Instruction set is highly non-orthogonal. Polymorphic variants and phantom types used to capture this non-orthogonality. For instance: \begin{small} \begin{lstlisting} type direct = [ DIRECT of byte ] type indirect = [ INDIRECT of bit ] ... type 'addr preinstruction = [ ADD of acc * [ reg | direct | indirect | data ] ... | MOV of (acc * [ reg | direct | indirect | data ], [ reg | indirect ] * [ acc | direct | data ], direct * [ acc | reg | direct | indirect | data ], ...) ... \end{lstlisting} \end{small} \end{frame} \begin{frame}[fragile] \frametitle{Use of dependent types I} Worked well in O'Caml, Matita does not have polymorphic variants. We use dependent types. Introduce a type for addressing modes: \begin{small} \begin{lstlisting} inductive addressing_mode: Type[0] := DIRECT: Byte $\rightarrow$ addressing_mode ... \end{lstlisting} \end{small} Another type \texttt{addressing\_mode\_tag} of tags'. Constructors are in correspondence with those of \texttt{addressing\_mode}. \end{frame} \begin{frame}[fragile] \frametitle{Use of dependent types II} Use vectors of \texttt{addressing\_mode\_tag}s in type signatures for instructions. For instance: \begin{small} \begin{lstlisting} inductive preinstruction (A: Type[0]): Type[0] := ADD: $\llbracket$ acc_a $\rrbracket$ $\rightarrow$ $\llbracket$ register; direct; indirect; data $\rrbracket$ $\rightarrow$ preinstruction A ... \end{lstlisting} \end{small} We need: an \emph{ad hoc} $\Sigma$ type and two coercions. One coercion opens up a proof obligation when it is used. Requires some lemmas. Lemmas and automation close proof obligations generated (300+ in typechecking interpreter function). \end{frame} \begin{frame} \frametitle{Overlapping memory spaces and addressing modes} \begin{itemize} \item Memory spaces overlap, can be addressed with different modes and pointers. \item Status' record models memory as disjoint spaces. \item Tries with holes' datastructure (dependent types). \item Complications with overlapping handled by \texttt{set\_arg\_XX} and \texttt{get\_arg\_XX} for \texttt{XX} = 1, 8, 16. \item Make use of dependent type trick. \end{itemize} \end{frame} \begin{frame} \frametitle{Validation} We worked hard to make sure we implemented a MCS-51 emulator: \begin{itemize} \item Multiple data sheets from different manufacturers (errors found!) \item Output to Intel HEX. Loading into third party tools. \item O'Caml trace files with processor status after every execution step. Every opcode tested. \item Matita formalisation is also executable. \item Traces can be compared with O'Caml. \end{itemize} \end{frame} \begin{frame} \frametitle{What is implemented} \begin{itemize} \item In O'Caml: emulator proper, associated assembler, supprting debugging code (Intel HEX), (untested) code for timers, interrupts and I/O. \item In Matita: emulator proper and associated assembler. \item Yet to port the timers and I/O, preferring to focus on the core emulator. \end{itemize} \end{frame} \begin{frame} \frametitle{Demo} \end{frame} \end{document}