formal description of factor started

2005-03-12 02:27:47 +00:00 · 2005-03-12 02:27:47 +00:00 · 544b2fa434
parent 2b956e1ff4
commit 544b2fa434
2 changed files with 388 additions and 84 deletions
--- a/doc/formal.txt
+++ b/doc/formal.txt
@ -1,84 +0,0 @@
-Formal model of a mini-Factor.
------------------------------
-
-* Objects
-
-OBJ is by definition the set of all possible objects.
-
-OBJ contains objects of three distinct types:
-
- f
-
- [[ a b ]] where a and b are objects. Abbreviation: [ a b c ... ] for
-  nested conses terminating with f.
-
- primitive words:
-  call ?
-  cons car cdr
-  drop dup swap >r r>
-  datastack set-datastack callstack set-callstack
-
-* Arrows
-
-ARROW is by definition the set of recursive (effectively computable)
-functions from OBJ to OBJ.
-
-* The e function
-
-Now consider a map e: OBJ * OBJ * OBJ --> OBJ.
-
-The e function takes three objects (or really, a list of three elements,
-which is itself an object) and is defined recursively as follows:
-
-(Here, uppercase words are variables, lowercase are symbols.)
-
-** Base case; no more code to evaluate, return resulting datastack:
-
-e[DS f f] = DS
-
-** Recursive cases:
-
-e[DS < Q CS > f] = e[DS CS Q]
-e[DS CS < X CF >] = e[< X DS > CS CF]
-
-** Primitive words:
-
-e[< Q DS > CS < call CF >] = e[DS < CF CS > Q]
-e[< F < T < f DS > > > CS < ? CF >] = e[< F DS > CS CF]
-e[< F < T < X DS > > > CS < ? CF >] = e[< T DS > CS CF]
-
-e[< A D DS > CS < cons CF >] = e[< < A D > DS > CS CF]
-e[< < A D > DS > CS < car CF >] = e[< A DS > CS CF]
-e[< < A D > DS > CS < cdr CF >] = e[< D DS > CS CF]
-
-e[< X DS > CS < drop CF >] = e[DS CS CF]
-e[< X DS > CS < dup CF >] = e[< X < X DS > > CS CF]
-e[< X < Y DS > > CS < swap CF >] = e[< Y < X DS > > CS CF]
-e[< X DS > CS < >r CF >] = e[DS < X CS > CF]
-e[DS < X CS > < r> CF >] = e[< X DS > CS CF]
-
-e[DS CS < datastack CF >] = e[< DS DS > CS CF]
-e[< L DS > CS < set-datastack CF >] = e[L CS CF]
-e[DS CS < callstack CF >] = e[< CS DS > CS CF]
-e[< L DS > CS < set-callstack CF >] = e[DS L CF]
-
-Define
-
-eval Q = e[f,f,Q]
-
-* A fixed-point combinator
-
-[ dup cons over call ] dup cons over call
-
-* Turing-completeness
-
-First, I claim that e is recursive; so e is an element of ARROW.
-
-Furthermore, for any element g in ARROW, there exists an element G in
-OBJ such that g(x) = eval < x G >. This means that the language executed
-by e is turing-complete, and that ARROW can be identified with a subset
-of OBJ.
-
-Furthermore, there exists an element E in OBJ such that
-e(x) = eval < x E >; so we have proven that there exists a Factor
-interpreter written in Factor.
--- a/doc/theory.tex
+++ b/doc/theory.tex
@ -0,0 +1,388 @@
+\documentclass{amsart}
+\usepackage{times}
+\usepackage{tabularx}
+\usepackage{mathpartir}
+
+\theoremstyle{plain}
+\newtheorem{theorem}{Theorem}[section]  
+\newtheorem{lemma}[theorem]{Lemma} 
+\newtheorem{proposition}[theorem]{Proposition}
+\newtheorem{conjecture}[theorem]{Conjecture}
+\newtheorem{corollary}[theorem]{Corollary}
+
+\theoremstyle{definition}
+\newtheorem{algorithm}[theorem]{Algorithm}
+\newtheorem{definition}[theorem]{Definition}
+\newtheorem{remark}[theorem]{Remark}
+\newtheorem{example}[theorem]{Example}
+
+\newcommand{\pcall  }{\mbox{\texttt{call}}}
+\newcommand{\pchoose}{\mbox{\texttt{?}}   }
+\newcommand{\pdrop  }{\mbox{\texttt{drop}}}
+\newcommand{\pdup  }{\mbox{\texttt{dup}}}
+\newcommand{\pswap  }{\mbox{\texttt{swap}}}
+\newcommand{\ptor  }{\mbox{\texttt{>r}}}
+\newcommand{\pfromr  }{\mbox{\texttt{r>}}}
+\newcommand{\pcar  }{\mbox{\texttt{car}}}
+\newcommand{\pcdr  }{\mbox{\texttt{cdr}}}
+\newcommand{\pcons  }{\mbox{\texttt{cdr}}}
+\newcommand{\peq  }{\mbox{\texttt{=}}}
+
+\def\leng{\mathop{\rm length}}
+
+\begin{document}
+
+\title{Factor Theory}
+\author{Slava Pestov}
+
+\begin{abstract}
+
+In this article, I will attempt to give formal semantics to several aspects of
+the Factor programming language. This is neither a reference or an introduction; for such
+information, consult \texttt{http://factor.sf.net}.
+
+\end{abstract}
+
+\maketitle
+
+\tableofcontents{}
+
+\section{Transition semantics}
+
+First, I present formal semantics for a subset of the Factor language.
+
+\begin{definition}
+The set of objects, denoted $OBJ$, is the smallest set satisfying:
+
+\begin{enumerate}
+
+\item The empty list $\bot \in OBJ$.
+\item If $P$ is a primitive word, then $P \in OBJ$, where the primitive words are
+\texttt{call}, \texttt{?},
+\texttt{car}, \texttt{cdr}, \texttt{cons},
+\texttt{drop}, \texttt{dup}, \texttt{swap}, \texttt{>r}, \texttt{r>}, and
+\texttt{=}.
+\item If $a, b \in OBJ$, the cons cell $(a::b) \in OBJ$.
+\end{enumerate}
+
+Note that Factor source code syntax for cons cells is \texttt{[[ a b ]]}, and the
+empty list is denoted \texttt{f}. However, I will use the above notation since it
+looks better in typeset output.
+
+\end{definition}
+
+In practice, Factor also allows a number of other data types, such as
+numbers, strings, vectors, and so on. They will not be needed for now. We are
+interested in a number of subsets of $OBJ$, the first being literal values.
+
+\begin{definition}
+The set $LIT\subset OBJ$ consists of  $OBJ-PRIM$, where $PRIM$ is the set of primitive words:
+\texttt{call}, \texttt{?},
+\texttt{car}, \texttt{cdr}, \texttt{cons},
+\texttt{drop}, \texttt{dup}, \texttt{swap}, \texttt{>r}, \texttt{r>}, and
+\texttt{=}.
+\end{definition}
+
+\begin{definition}
+The set of program quotations, $QUOT \subset OBJ$, is the smallest set satisfying:
+
+\begin{enumerate}
+
+\item The empty list $\mbox{\texttt{f}} \in QUOT$.
+\item If $a \in OBJ$, $b \in QUOT$, $(a::b) \in QUOT$.
+
+\end{enumerate}
+
+I will write $(a,b,c)$ in place of $(a::(b::(c::\bot)))$. Note the Factor source code
+syntax is \texttt{[ a b c ]}.
+
+It is convenient to define $\leng(\bot) := 0$, and $\leng(a::d):=1+\leng(d)$. Also, I will write $a\circ b$ for the concatenation of $a,b \in QUOT$.
+
+\end{definition}
+
+\begin{definition}
+The set of stacks, $STACK \subset QUOT$, is the smallest set satisfying:
+
+\begin{enumerate}
+
+\item The empty list $\mbox{\texttt{f}} \in STACK$.
+\item If $a \in QUOT$, $b \in STACK$, $(a::b) \in STACK$.
+
+\end{enumerate}
+
+\end{definition}
+
+\begin{definition}
+
+An \emph{interpreter} is a 3-tuple $(\delta, \rho, \chi)$, where $\delta, \rho \in STACK$,
+and $\chi \in QUOT$. The set of interpreters is denoted $INT$, and can be regarded as
+a subset of $OBJ$.
+
+I call $\delta$ the \emph{data stack}; it holds values currently
+being operated on by the running program. The stack $\rho$ is called the \emph{return stack} and retains interpreter state
+between recursive calls. The quotation $\chi$ is called \emph{call frame}, and stores the
+currently executing program fragment. Basically, the transition relation looks at the
+call frame; if the first element is a literal, it is pushed on the stack, if the first
+element is a primitive word, it is executed, and if the call frame is the empty list,
+a new call frame is popped from the return stack.
+
+\begin{figure}
+\caption{\label{trans}Transition semantics for the Factor interpreter.}
+
+\begin{tabularx}{12cm}{lc}
+\hline
+
+\{Reload\}&
+$$
+\inferrule 
+   {\chi = \bot \\ \rho = (\chi\prime::\rho\prime)}
+   {(\delta,\rho,\chi)\rhd(\delta,\rho\prime,\chi\prime)}
+$$
+\\[4mm]
+
+\{Literal\}&
+$$
+\inferrule
+   {\chi = (\lambda :: \chi\prime) \\ \lambda \in LIT}
+   {(\delta,\rho,\chi)\rhd((\lambda :: \delta),\rho,\chi\prime)}
+$$
+\\[4mm]
+
+\{\pcall\}&
+$$
+\inferrule
+   {\delta = (q :: \delta\prime) \\ \chi = (\pcall :: \chi\prime)}
+   {(\delta,\rho,\chi)\rhd(\delta\prime,(\chi::\rho),\chi\prime)}
+$$
+\\[4mm]
+
+\{\pchoose{}a\}&
+$$
+\inferrule
+   {\delta = (\tau,\phi,\beta :: \delta\prime) \\ \beta = \bot \\ \chi = (\pchoose :: \chi\prime)}
+   {(\delta,\rho,\chi)\rhd((\phi :: \delta\prime),\rho,\chi\prime)}
+$$
+\\[4mm]
+
+\{\pchoose{}b\}&
+$$
+\inferrule
+   {\delta = (\tau,\phi,\beta :: \delta\prime) \\ \beta \ne \bot \\ \chi = (\pchoose :: \chi\prime)}
+   {(\delta,\rho,\chi)\rhd((\tau :: \delta\prime),\rho,\chi\prime)}
+$$
+\\[4mm]
+
+\{\pdrop\}&
+$$
+\inferrule
+   {\delta = (o :: \delta\prime) \\ \chi = (\pdrop :: \chi\prime)}
+   {(\delta,\rho,\chi)\rhd(\delta\prime,\rho,\chi\prime)}
+$$
+\\[4mm]
+
+\{\pdup\}&
+$$
+\inferrule
+   {\delta = (o :: \delta\prime) \\ \chi = (\pdup :: \chi\prime)}
+   {(\delta,\rho,\chi)\rhd((o o :: \delta\prime),\rho,\chi\prime)}
+$$
+\\[4mm]
+
+\{\ptor\}&
+$$
+\inferrule
+   {\delta = (o :: \delta\prime) \\ \chi = (\ptor :: \chi\prime)}
+   {(\delta,\rho,\chi)\rhd(\delta\prime,(o :: \rho),\chi\prime)}
+$$
+\\[4mm]
+
+\{\pfromr\}&
+$$
+\inferrule
+   {\rho = (o :: \rho\prime) \\ \chi = (\pfromr :: \chi\prime)}
+   {(\delta,\rho,\chi)\rhd((o :: \delta),\rho,\chi\prime)}
+$$
+\\[4mm]
+
+\{\pcar\}&
+$$
+\inferrule
+   {\rho = ((a :: d) :: \rho\prime) \\ \chi = (\pcar :: \chi\prime)}
+   {(\delta,\rho,\chi)\rhd((a :: \delta),\rho,\chi\prime)}
+$$
+\\[4mm]
+
+\{\pcdr\}&
+$$
+\inferrule
+   {\rho = ((a :: d) :: \rho\prime) \\ \chi = (\pcdr :: \chi\prime)}
+   {(\delta,\rho,\chi)\rhd((d :: \delta),\rho,\chi\prime)}
+$$
+\\[4mm]
+
+\{\pcons\}&
+$$
+\inferrule
+   {\rho = (a,d :: \rho\prime) \\ \chi = (\pcons :: \chi\prime)}
+   {(\delta,\rho,\chi)\rhd(((a :: d) :: \delta),\rho,\chi\prime)}
+$$
+\\[4mm]
+
+\{\peq{}a\}&
+$$
+\inferrule
+   {\rho = (x,y :: \rho\prime) \\ x = y \\ \chi = (\peq :: \chi\prime)}
+   {(\delta,\rho,\chi)\rhd((\bot :: \delta),\rho,\chi\prime)}
+$$
+\\[4mm]
+
+\{\peq{}b\}&
+$$
+\inferrule
+   {\rho = (x,y :: \rho\prime) \\ x \ne y \\ \chi = (\peq :: \chi\prime)}
+   {(\delta,\rho,\chi)\rhd(((\bot) :: \delta),\rho,\chi\prime)}
+$$
+\\
+
+\hline
+\end{tabularx}
+
+\end{figure}
+
+Now, I am ready to present transition semantics. 
+Define a relation $\rhd$ by the deduction rules of figure \ref{trans}. By inspection, it is obvious that $(x\rhd y \wedge x \rhd z) \Rightarrow y=z$, so we are able to make the following definition.
+
+\begin{definition}
+Define a functional $\mathcal{F}: (INT \rightarrow INT) \rightarrow (INT \rightarrow INT)$:
+
+$$
+\mathcal{F}(f)(i) = 
+\begin{cases}
+f(j)&\text{if $\exists j / i\rhd j$}\\
+i&\text{else}
+\end{cases}
+$$
+
+\end{definition}
+
+Now, let $F: INT \rightarrow INT$ be the least fixed point of $\mathcal{F}$. $F$ is the \emph{Factor evaluator function}. It takes an interpreter and maps it to another
+interpreter. If the resulting interpreter's call frame is $\bot$, evaluation completed
+successfully; otherwise, at some stage of the computation, there was no applicable
+inference rule (for example, a \texttt{drop} was attempted with an empty stack), in which case it is an error condition. In the case that the final call frame is non-empty, we abuse notation and say that $F(\delta,\rho,\chi)=\bot$.
+
+Note that $F$ is a partial function, and it might not terminate; in this case, lets say $F(i) = \bot$.
+
+\end{definition}
+
+Now, I will give the first lemma. Intuitively, it states that if a quotation consumes $n$ elements from the stack, it does not ``see'' anything below. This forms the basis for the work in the next section.
+
+\begin{lemma}\label{dipping}
+Let $q\in QUOT$ be such that there exists $\delta\in STACK$ with $F(\delta,\bot,q)=(\delta\prime,\bot,\bot)$. Then for any $\delta_0\in STACK$,
+$F(\delta\circ\delta_0,\bot,q)=(\delta\prime\circ\delta_0,\bot,\bot)$
+\end{lemma}
+
+\begin{proof}
+Since $F(\delta,\bot,q)\ne\bot$ by assumption, each step of its transition chain has the
+property that the data stack is either $\bot$ and no attempt is made to access its top
+element, or the data stack is not $\bot$. Now notice that the same transition chain will
+occur with $F(\delta\circ \delta_0,\bot,q)$; the only difference being that when the data stack would become $\bot$ in the former, it would become $\delta_0$ in the latter. But since no attempt is made to access the top stack element, both transition chains are equivalent up to the appending of $\delta_0$. Therefore  $F(\delta\circ \delta_0,\bot,q)=(\delta\prime\circ \delta_0,\bot,\bot)$.
+
+\end{proof}
+
+\section{Stack effects}
+
+Now, I will study the effect of evaluation on the data stack in detail. The primary motivation, and
+eventual goal, is to be able to statically deduce the number of input and output parameters
+of a quotation on the stack.
+
+\begin{definition} Let $q \in QUOT$ be a program fragment. The \emph{stack effect} of $q$, denoted $[q]$, is $(m,n)\in \mathbb{Z}^+\times\mathbb{Z}^+$ iff the following conditions hold:
+
+\begin{enumerate}
+
+\item If $\leng(\delta)=m$, $(F(\delta,\bot,q)=(\delta\prime,\bot,\bot)) \Rightarrow (\leng(\delta\prime)=n)$.
+
+\item If $l<m$, there exists a data stack $\delta$ with $\leng(\delta)=l$ such that $F(\delta,\bot,q)=\bot$.
+
+\end{enumerate}
+
+Note that if for all data stacks $\delta$, we have $F(\delta,\bot,q)=\bot$, the stack effect can be stated to be any pair $(m,n)$. This does not cause a problem in practice.
+
+\end{definition}
+
+The following result is intuitively obvious; the stack effect of a literal object is to increase the number of elements on the stack by one.
+
+\begin{lemma}
+For $o \in LIT$, $[o]=(0,1)$. 
+\end{lemma}
+
+\begin{proof}
+By the transition rule \{Literal\}, we have:
+
+$$F(\bot,\bot,(o))=((o),\bot,\bot)$$
+
+By definition if the length function, it follows that:
+
+$$\leng((o))=1+\leng(\bot)=1$$
+
+Since 0 is the smallest possible choice of $n$, the second condition on the definition of a
+stack effect is automatically satisfied.
+\end{proof}
+
+\begin{definition} The \emph{composition of stack effects} $(a,b), (c,d) \in \mathbb{Z}^+\times\mathbb{Z}^+$ is defined and denoted as follows:
+
+$$(a,b) \otimes (c,d) = (a + \max(0,c-b),d + \max(0,b-c))$$
+
+\end{definition}
+
+The reason we define the above operation becomes clear with the following theorem.
+It actually does represent composition of stack effects.
+
+\begin{theorem}\label{iso}
+For $p,q \in QUOT$, $[p\circ q]=[p]\otimes [q]$.
+\end{theorem}
+
+\begin{proof}
+Let $[p]=(a,b)$, $[q]=(c,d)$.
+If for all $\delta$, $F(\delta,\bot,p\circ q)=\bot$, then the stack effect of $p\circ q$ is vacuously $(a,b)\otimes(c,d)$. So assume that there exists a stack $\delta$ such that $F(\delta,\bot,p\circ q)=(\delta\prime,\bot,\bot)$ for some $\delta\prime$.
+
+Now, consider two cases.
+
+\begin{enumerate}
+\item $c<b$. Let $\delta$ be such that $\leng(\delta)=a$ and $F(\delta,\bot,p)=(\delta\prime,\bot,\bot)$. Then $\leng(\delta\prime)=b$.
+By assumption, $F(\delta\prime,\bot,q)\ne\bot$. So by \ref{dipping},
+$F(\delta\prime,\bot,q)=(\delta\prime\prime,\bot,\bot)$ where $\leng(\delta\prime\prime)=\leng(\delta\prime)+b-c$.
+
+\item $c\geq b$. p takes a, leaves b, q takes c-b more then and leaves c-b+d by \ref{dipping}.
+\end{enumerate}
+
+In both cases, we have that $[p\circ q]=(a + \max(0,c-b),d + \max(0,b-c))$.
+\end{proof}
+
+\begin{lemma}
+Stack effect composition has an equal left and right identity, and is associative:
+
+\begin{enumerate}
+\item For all $(a,b) \in \mathbb{Z}^+\times\mathbb{Z}^+$, $(a,b) \otimes (0,0) = (0,0) \otimes (a,b) = (a,b)$.
+\item For all $x,y,z \in \mathbb{Z}^+\times\mathbb{Z}^+$, $x\otimes (y\otimes z) = (x\otimes y) \otimes z$.
+\end{enumerate}
+
+\end{lemma}
+
+\begin{proof}
+The first part follows by routine calculation.
+
+$$(a,b) \otimes (0,0) = (a+\max(0,0-b),0+\max(0,b-0)) = (a,b)$$
+$$(0,0) \otimes (a,b) = (0+\max(0,a-0),b+\max(0,0-a)) = (a,b)$$
+
+The second follows from the associativity of the concatenation operator $\circ$. Given stack effects $x,y,z$, let $X,Y,Z \in QUOT$ be quotations whose stack effects are $x,y,z$, respectively. Then by the previous theorem we have:
+
+$$[X\circ Y\circ Z] = [X] \otimes [Y\circ Z] = [X] \otimes ([Y] \otimes [Z])$$
+$$[X\circ Y\circ Z] = [X\circ Y] \otimes [Z] = ([X] \otimes [Y]) \otimes [Z]$$
+
+Or indeed,
+
+$$x\otimes(y\otimes z) = (x\otimes y)\otimes z$$
+
+\end{proof}
+
+\end{document}