\documentclass{amsart}
\usepackage{times}
\usepackage{tabularx}
\usepackage{mathpartir}

\theoremstyle{plain}
\newtheorem{theorem}{Theorem}[section]  
\newtheorem{lemma}[theorem]{Lemma} 
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{conjecture}[theorem]{Conjecture}
\newtheorem{corollary}[theorem]{Corollary}

\theoremstyle{definition}
\newtheorem{algorithm}[theorem]{Algorithm}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{example}[theorem]{Example}

\newcommand{\pcall  }{\mbox{\texttt{call}}}
\newcommand{\pchoose}{\mbox{\texttt{?}}   }
\newcommand{\pdrop  }{\mbox{\texttt{drop}}}
\newcommand{\pdup  }{\mbox{\texttt{dup}}}
\newcommand{\pswap  }{\mbox{\texttt{swap}}}
\newcommand{\ptor  }{\mbox{\texttt{>r}}}
\newcommand{\pfromr  }{\mbox{\texttt{r>}}}
\newcommand{\pcar  }{\mbox{\texttt{car}}}
\newcommand{\pcdr  }{\mbox{\texttt{cdr}}}
\newcommand{\pcons  }{\mbox{\texttt{cdr}}}
\newcommand{\peq  }{\mbox{\texttt{=}}}

\def\leng{\mathop{\rm length}}

\begin{document}

\title{Factor Theory}
\author{Slava Pestov}

\begin{abstract}

In this article, I will attempt to give formal semantics to several aspects of
the Factor programming language. This is neither a reference or an introduction; for such
information, consult \texttt{http://factor.sf.net}.

\end{abstract}

\maketitle

\tableofcontents{}

\section{Transition semantics}

First, I present formal semantics for a subset of the Factor language.

\begin{definition}
The set of objects, denoted $OBJ$, is the smallest set satisfying:

\begin{enumerate}

\item The empty list $\bot \in OBJ$.
\item If $P$ is a primitive word, then $P \in OBJ$, where the primitive words are
\texttt{call}, \texttt{?},
\texttt{car}, \texttt{cdr}, \texttt{cons},
\texttt{drop}, \texttt{dup}, \texttt{swap}, \texttt{>r}, \texttt{r>}, and
\texttt{=}.
\item If $a, b \in OBJ$, the cons cell $(a::b) \in OBJ$.
\end{enumerate}

Note that Factor source code syntax for cons cells is \texttt{[[ a b ]]}, and the
empty list is denoted \texttt{f}. However, I will use the above notation since it
looks better in typeset output.

\end{definition}

In practice, Factor also allows a number of other data types, such as
numbers, strings, vectors, and so on. They will not be needed for now. We are
interested in a number of subsets of $OBJ$, the first being literal values.

\begin{definition}
The set $LIT\subset OBJ$ consists of  $OBJ-PRIM$, where $PRIM$ is the set of primitive words:
\texttt{call}, \texttt{?},
\texttt{car}, \texttt{cdr}, \texttt{cons},
\texttt{drop}, \texttt{dup}, \texttt{swap}, \texttt{>r}, \texttt{r>}, and
\texttt{=}.
\end{definition}

\begin{definition}
The set of program quotations, $QUOT \subset OBJ$, is the smallest set satisfying:

\begin{enumerate}

\item The empty list $\bot \in QUOT$.
\item If $a \in OBJ$, $b \in QUOT$, $(a::b) \in QUOT$.

\end{enumerate}

I will write $(a,b,c)$ in place of $(a::(b::(c::\bot)))$. Note the Factor source code
syntax is \texttt{[ a b c ]}.

It is convenient to define $\leng(\bot) := 0$, and $\leng(a::d):=1+\leng(d)$. Also, I will write $a\circ b$ for the concatenation of $a,b \in QUOT$.

\end{definition}

\begin{definition}
The set of stacks, $STACK \subset QUOT$, is the smallest set satisfying:

\begin{enumerate}

\item The empty list $\bot \in STACK$.
\item If $a \in QUOT$, $b \in STACK$, $(a::b) \in STACK$.

\end{enumerate}

\end{definition}

\begin{definition}

An \emph{interpreter} is a 3-tuple $(\delta, \rho, \chi)$, where $\delta, \rho \in STACK$,
and $\chi \in QUOT$. The set of interpreters is denoted $INT$, and can be regarded as
a subset of $OBJ$.

I call $\delta$ the \emph{data stack}; it holds values currently
being operated on by the running program. The stack $\rho$ is called the \emph{return stack} and retains interpreter state
between recursive calls. The quotation $\chi$ is called \emph{call frame}, and stores the
currently executing program fragment. Basically, the transition relation looks at the
call frame; if the first element is a literal, it is pushed on the stack, if the first
element is a primitive word, it is executed, and if the call frame is the empty list,
a new call frame is popped from the return stack.

\begin{figure}
\caption{\label{trans}Transition semantics for the Factor interpreter.}

\begin{tabularx}{12cm}{lc}
\hline

\{Reload\}&
$$
\inferrule 
   {\chi = \bot \\ \rho = (\chi\prime::\rho\prime)}
   {(\delta,\rho,\chi)\rhd(\delta,\rho\prime,\chi\prime)}
$$
\\[4mm]

\{Literal\}&
$$
\inferrule
   {\chi = (\lambda :: \chi\prime) \\ \lambda \in LIT}
   {(\delta,\rho,\chi)\rhd((\lambda :: \delta),\rho,\chi\prime)}
$$
\\[4mm]

\{\pcall\}&
$$
\inferrule
   {\delta = (q :: \delta\prime) \\ \chi = (\pcall :: \chi\prime)}
   {(\delta,\rho,\chi)\rhd(\delta\prime,(\chi::\rho),\chi\prime)}
$$
\\[4mm]

\{\pchoose{}a\}&
$$
\inferrule
   {\delta = (\tau,\phi,\beta :: \delta\prime) \\ \beta = \bot \\ \chi = (\pchoose :: \chi\prime)}
   {(\delta,\rho,\chi)\rhd((\phi :: \delta\prime),\rho,\chi\prime)}
$$
\\[4mm]

\{\pchoose{}b\}&
$$
\inferrule
   {\delta = (\tau,\phi,\beta :: \delta\prime) \\ \beta \ne \bot \\ \chi = (\pchoose :: \chi\prime)}
   {(\delta,\rho,\chi)\rhd((\tau :: \delta\prime),\rho,\chi\prime)}
$$
\\[4mm]

\{\pdrop\}&
$$
\inferrule
   {\delta = (o :: \delta\prime) \\ \chi = (\pdrop :: \chi\prime)}
   {(\delta,\rho,\chi)\rhd(\delta\prime,\rho,\chi\prime)}
$$
\\[4mm]

\{\pdup\}&
$$
\inferrule
   {\delta = (o :: \delta\prime) \\ \chi = (\pdup :: \chi\prime)}
   {(\delta,\rho,\chi)\rhd((o o :: \delta\prime),\rho,\chi\prime)}
$$
\\[4mm]

\{\ptor\}&
$$
\inferrule
   {\delta = (o :: \delta\prime) \\ \chi = (\ptor :: \chi\prime)}
   {(\delta,\rho,\chi)\rhd(\delta\prime,(o :: \rho),\chi\prime)}
$$
\\[4mm]

\{\pfromr\}&
$$
\inferrule
   {\rho = (o :: \rho\prime) \\ \chi = (\pfromr :: \chi\prime)}
   {(\delta,\rho,\chi)\rhd((o :: \delta),\rho,\chi\prime)}
$$
\\[4mm]

\{\pcar\}&
$$
\inferrule
   {\rho = ((a :: d) :: \rho\prime) \\ \chi = (\pcar :: \chi\prime)}
   {(\delta,\rho,\chi)\rhd((a :: \delta),\rho,\chi\prime)}
$$
\\[4mm]

\{\pcdr\}&
$$
\inferrule
   {\rho = ((a :: d) :: \rho\prime) \\ \chi = (\pcdr :: \chi\prime)}
   {(\delta,\rho,\chi)\rhd((d :: \delta),\rho,\chi\prime)}
$$
\\[4mm]

\{\pcons\}&
$$
\inferrule
   {\rho = (a,d :: \rho\prime) \\ \chi = (\pcons :: \chi\prime)}
   {(\delta,\rho,\chi)\rhd(((a :: d) :: \delta),\rho,\chi\prime)}
$$
\\[4mm]

\{\peq{}a\}&
$$
\inferrule
   {\rho = (x,y :: \rho\prime) \\ x = y \\ \chi = (\peq :: \chi\prime)}
   {(\delta,\rho,\chi)\rhd((\bot :: \delta),\rho,\chi\prime)}
$$
\\[4mm]

\{\peq{}b\}&
$$
\inferrule
   {\rho = (x,y :: \rho\prime) \\ x \ne y \\ \chi = (\peq :: \chi\prime)}
   {(\delta,\rho,\chi)\rhd(((\bot) :: \delta),\rho,\chi\prime)}
$$
\\

\hline
\end{tabularx}

\end{figure}

Now, I am ready to present transition semantics. 
Define a relation $\rhd$ by the deduction rules of figure \ref{trans}. By inspection, it is obvious that $(x\rhd y \wedge x \rhd z) \Rightarrow y=z$, so we are able to make the following definition.

\begin{definition}
Define a functional $\mathcal{F}: (INT \rightarrow INT) \rightarrow (INT \rightarrow INT)$:

$$
\mathcal{F}(f)(i) = 
\begin{cases}
f(j)&\text{if $\exists j / i\rhd j$}\\
i&\text{else}
\end{cases}
$$

\end{definition}

Now, let $F: INT \rightarrow INT$ be the least fixed point of $\mathcal{F}$. $F$ is the \emph{Factor evaluator function}. It takes an interpreter and maps it to another
interpreter. If the resulting interpreter's call frame is $\bot$, evaluation completed
successfully; otherwise, at some stage of the computation, there was no applicable
inference rule (for example, a \texttt{drop} was attempted with an empty stack), in which case it is an error condition. In the case that the final call frame is non-empty, we abuse notation and say that $F(\delta,\rho,\chi)=\bot$.

Note that $F$ is a partial function, and it might not terminate; in this case, lets say $F(i) = \bot$.

\end{definition}

Now, I will give the first lemma. Intuitively, it states that if a quotation consumes $n$ elements from the stack, it does not ``see'' anything below. This forms the basis for the work in the next section.

\begin{lemma}\label{dipping}
Let $q\in QUOT$ be such that there exists $\delta\in STACK$ with $F(\delta,\bot,q)=(\delta\prime,\bot,\bot)$. Then for any $\delta_0\in STACK$,
$F(\delta\circ\delta_0,\bot,q)=(\delta\prime\circ\delta_0,\bot,\bot)$
\end{lemma}

\begin{proof}
Since $F(\delta,\bot,q)\ne\bot$ by assumption, each step of its transition chain has the
property that the data stack is either $\bot$ and no attempt is made to access its top
element, or the data stack is not $\bot$. Now notice that the same transition chain will
occur with $F(\delta\circ \delta_0,\bot,q)$; the only difference being that when the data stack would become $\bot$ in the former, it would become $\delta_0$ in the latter. But since no attempt is made to access the top stack element, both transition chains are equivalent up to the appending of $\delta_0$. Therefore  $F(\delta\circ \delta_0,\bot,q)=(\delta\prime\circ \delta_0,\bot,\bot)$.

\end{proof}

\section{Stack effects}

Now, I will study the effect of evaluation on the data stack in detail. The primary motivation, and
eventual goal, is to be able to statically deduce the number of input and output parameters
of a quotation on the stack.

\begin{definition} Let $q \in QUOT$ be a program fragment. The \emph{stack effect} of $q$, denoted $[q]$, is $(m,n)\in \mathbb{Z}^+\times\mathbb{Z}^+$ iff the following conditions hold:

\begin{enumerate}

\item If $\leng(\delta)=m$, $(F(\delta,\bot,q)=(\delta\prime,\bot,\bot)) \Rightarrow (\leng(\delta\prime)=n)$.

\item If $l<m$, there exists a data stack $\delta$ with $\leng(\delta)=l$ such that $F(\delta,\bot,q)=\bot$.

\end{enumerate}

Note that if for all data stacks $\delta$, we have $F(\delta,\bot,q)=\bot$, the stack effect can be stated to be any pair $(m,n)$. This does not cause a problem in practice.

\end{definition}

The following result is intuitively obvious; the stack effect of a literal object is to increase the number of elements on the stack by one.

\begin{lemma}
For $o \in LIT$, $[o]=(0,1)$. 
\end{lemma}

\begin{proof}
By the transition rule \{Literal\}, we have:

$$F(\bot,\bot,(o))=((o),\bot,\bot)$$

By definition if the length function, it follows that:

$$\leng((o))=1+\leng(\bot)=1$$

Since 0 is the smallest possible choice of $n$, the second condition on the definition of a
stack effect is automatically satisfied.
\end{proof}

\begin{definition} The \emph{composition of stack effects} $(a,b), (c,d) \in \mathbb{Z}^+\times\mathbb{Z}^+$ is defined and denoted as follows:

$$(a,b) \otimes (c,d) = (a + \max(0,c-b),d + \max(0,b-c))$$

\end{definition}

The reason we define the above operation becomes clear with the following theorem.
It actually does represent composition of stack effects.

\begin{theorem}\label{iso}
For $p,q \in QUOT$, $[p\circ q]=[p]\otimes [q]$.
\end{theorem}

\begin{proof}
Let $[p]=(a,b)$, $[q]=(c,d)$.
If for all $\delta$, $F(\delta,\bot,p\circ q)=\bot$, then the stack effect of $p\circ q$ is vacuously $(a,b)\otimes(c,d)$. So assume that there exists a stack $\delta$ such that $F(\delta,\bot,p\circ q)=(\delta\prime,\bot,\bot)$ for some $\delta\prime$.

Now, consider two cases.

\begin{enumerate}
\item $c<b$. Let $\delta$ be such that $\leng(\delta)=a$ and $F(\delta,\bot,p)=(\delta\prime,\bot,\bot)$. Then $\leng(\delta\prime)=b$.
By assumption, $F(\delta\prime,\bot,q)\ne\bot$. So by \ref{dipping},
$F(\delta\prime,\bot,q)=(\delta\prime\prime,\bot,\bot)$ where $\leng(\delta\prime\prime)=\leng(\delta\prime)+b-c$.

\item $c\geq b$. p takes a, leaves b, q takes c-b more then and leaves c-b+d by \ref{dipping}.
\end{enumerate}

In both cases, we have that $[p\circ q]=(a + \max(0,c-b),d + \max(0,b-c))$.
\end{proof}

\begin{lemma}
Stack effect composition has an equal left and right identity, and is associative:

\begin{enumerate}
\item For all $(a,b) \in \mathbb{Z}^+\times\mathbb{Z}^+$, $(a,b) \otimes (0,0) = (0,0) \otimes (a,b) = (a,b)$.
\item For all $x,y,z \in \mathbb{Z}^+\times\mathbb{Z}^+$, $x\otimes (y\otimes z) = (x\otimes y) \otimes z$.
\end{enumerate}

\end{lemma}

\begin{proof}
The first part follows by routine calculation.

$$(a,b) \otimes (0,0) = (a+\max(0,0-b),0+\max(0,b-0)) = (a,b)$$
$$(0,0) \otimes (a,b) = (0+\max(0,a-0),b+\max(0,0-a)) = (a,b)$$

The second follows from the associativity of the concatenation operator $\circ$. Given stack effects $x,y,z$, let $X,Y,Z \in QUOT$ be quotations whose stack effects are $x,y,z$, respectively. Then by the previous theorem we have:

$$[X\circ Y\circ Z] = [X] \otimes [Y\circ Z] = [X] \otimes ([Y] \otimes [Z])$$
$$[X\circ Y\circ Z] = [X\circ Y] \otimes [Z] = ([X] \otimes [Y]) \otimes [Z]$$

Or indeed,

$$x\otimes(y\otimes z) = (x\otimes y)\otimes z$$

\end{proof}

\end{document}