%^TEX
\documentstyle[glammar]{article}
\topmargin      0 mm
\headheight     0 mm
\headsep        0 mm
\textheight     240 mm
\footskip       7 mm
\footheight     11 mm

\oddsidemargin  0 mm
\evensidemargin 0 mm
\textwidth      159.2 mm
\author{Eric Voss}
\title{Description of builtins in Glammar}
\makeindex
\begin{document}
\maketitle
%

%TEX
\noindent
This section contains the description of  the current set of builtins.  
Some builtins may not be redefined.
The comment statement \verb+#plain+ indicates this for hyperrules.
None of the metabuiltins can be redefined.
In addition, you may not even use some builtins.
This is the case when its name ends with a underscore. 
Other properties of a builtin are implicit in the form of its definition.
These properties are:
\begin{enumerate}
\item
deterministic builtins have only one production:
{\small
\begin{glammar}
\L{\LB{        \K{builtin}: x.}}
\L{\LB{}}
\end{glammar}}

\item
      nondeterministic builtins that can be forced
             to behave deterministicly (-d option) have two productions:
{\small
\begin{glammar}
\L{\LB{          \K{builtin }: x;}}
\L{\LB{          builtin : x.}}
\L{\LB{}}
\end{glammar}}


\item
nondeterministic builtins have three productions:
{\small
\begin{glammar}
\L{\LB{          \Proc{1}\K{builtin }\index{builtin 1}: x;}}
\L{\LB{          builtin : x;}}
\L{\LB{          builtin : x.}}
\L{\LB{}}
\L{\LB{}}
\L{\LB{}}
\end{glammar}}
\item
 builtins that always succeed have no members in the right hand side:
{\small
\begin{glammar}
\L{\LB{          \Proc{1}\K{builtin }\index{builtin 1}: .}}
\L{\LB{}}
\L{\LB{}}
\end{glammar}}

\item
builtins that do not produce empty have a terminal ({\tt "a"} by default) in the right hand side:
{\small
\begin{glammar}
\L{\LB{          \Proc{1}\K{builtin }\index{builtin 1}: \T{}"x"\TE{}.}}
\L{\LB{}}
\end{glammar}}


\item
builtins that use an inherited affix for affix directed parsing
             have a single affix terminal on this position:
{\small
\begin{glammar}
\L{\LB{          \Proc{1}\K{builtin }\index{builtin 1}\D{}(\>\S{}\"x\"\SE{})\DE{} : x.}}
\L{\LB{}}
\end{glammar}}


\item
builtins that do not use an inherited affix for affix directed parsing
             have a single affix variable on this position:
{\small
\begin{glammar}
\L{\LB{         \Proc{1}\K{builtin }\index{builtin 1}\D{}(\>x)\DE{} : x.}}
\L{\LB{}}
\end{glammar}}
\end{enumerate}
%
empty ::.
nlcr :: "\n".
quote :: "\"".
back slash :: "\\".
%TEX
\noindent These are some metanotions which you can use to recognize frequently used special symbols.
\label{empty.}
\label{nlcr.}
\label{quote.}
\label{backslash.}
%
generation time :: "Fri Sep 22 11:53:37 1989".
generation date :: "Sep 22, 1989".
%TEX
\noindent The time and date the grammar was generated (\g was invoked).
This metarule is set when you compile the grammar.  
The definitions below show their form only.
\label{generationtime.}
\label{generationdate.}
%
%TEX
\noindent  All special symbols can be denoted by giving the 
octal ascii representation escaped by a back slash.
To remain compatible with earlier implementations some special symbols
are predefined. 
%
#plain
end of sentence= ?"\000".
%TEX
\noindent Tests for reached the end of the input.
\label{endofsentence.}
%
nlcr= "\n".
quote = "\"".
back slash = "\\".
%TEX
\noindent Recognize a newline, quote or back slash respectively.
%

%TEX
\noindent This is a list of predicates which have been implemented in very early
versions also.
%

#plain
equal (>"x",>"x")= "a".
#plain
not equal (>"x",>"y")= "a".
%TEX
\noindent Recognizes the empty string (`succeeds') if the values of {\tt x} and {\tt y} are
(not) equal.
\label{equal.}
\label{notequal.}
%

compare (>x,>y,tail x>, tail y>)= .
%TEX
\noindent Let K be the longest common prefix of (the values of)  {\tt x} and  {\tt y}.
Then   {\tt tail x} and  {\tt tail y} 
will assigned values such that   {\tt x}  = K +  {\tt tail x} and  {\tt y} = K +  {\tt tail y}.
\label{compare.}
%

charnum (pos>)= .
settabstop (>tabstop)= .
linenum (line>)= .
length (>in,length>)= .
%TEX
\noindent The affix {\tt pos} in {\tt charnum} is given the number of characters, in decimal, already recognized in the input.
The affix {\tt tabstop} in {\tt settabstop} specifies the value (in decimal) of the variable {\tt tabstop}.
This value is used in {\tt charnum}.
The default value for {\tt tabstop} is {\tt 8}.
The affix {\tt line} in {\tt linenum} is given the value (in decimal) of the current line number.
The affix {\tt length} in {\tt length} is given the value (in decimal) of the number of characters 
of affix {\tt in}.
\label{charnum.}
\label{settabstop.}
\label{linenum.}
\label{length.}
%

some name (name>)= .
%TEX
\noindent The affix name is given a value $L_X$, where $X$ is some large number in hexadecimal notation.
Each number can occur only once in one parse.
\label{somename.}
%

%TEX
\noindent GLAMMAR allows the manipulation of certain ordered sequences of pairs of values 
for historic reasons called {\em lists} but {\em tables} would be better instead.
%
add to (>table, >key,>value)= ;
add to (>table, >key,>value)= .
%TEX
\noindent Each table is recorded under a name which is an arbitrary affix value 
(it may be empty, contain newlines, be obtained from {\tt some name} etc.)
However it may not be composite
(i.e: {\small  add to ($>$x*y,>$k$, >$v$)} is not allowed).
Initiative every possible affix value is the name of an empty table.
The builtin {\tt add to} puts the pair (K,P) in front of the table
named L, where L,K and P are the values of {\tt table, key} and {\tt value}
respectively.
Upon backtracking the pair is 
deleted from the table, prohibited that it is not forced to behave deterministic.
\label{addto.}
%

value of (>"table", >"key",value>)= "a".
%TEX
\noindent If (K,P) is the first pair, if any, in table L such that L
and K are the values of {\tt table} and {\tt key} respectively
(which most be both known), then the value of {\tt value} is P otherwise it
is the empty string.
\label{valueof.}
%

select (>"table", key>,value>)=  "a";
select (>"table", key>,value>)=  "a";
select (>"table", key>,value>)=  "a".
%TEX
\noindent The values of key and value are K and P respectively, where (K,P)
is any pair in the table L, where L is the value of table (which must be known).
Note that {\tt select} succeeds once for each pair.
\label{select.}
%
delete (>"table", key>,value>)=  "a";
delete (>"table", key>,value>)=  "a";
delete (>"table", key>,value>)=  "a".
%TEX
\label{delete.}
\noindent {\tt delete} is quite a  strange predicate.
Basicly it deletes all pairs in the table, one at a time.
When a pair is deleted,  upon backtracking the  deleted item is 
not restored. 
Instead one more element is deleted,
until the table is empty.
%


contents of (>"table", >key val sep,>key key sep,contents>)=  "a".
%TEX
\label{contentsof.}
\noindent {\tt contents of} finds the contents of a table in one go,
with the pairs separated with {\tt key key sep} and
the two items of a pair separated by {\tt key val sep}.
%

size (>"table", size>)=  .
%TEX
\label{size.}
\noindent Affix {\tt size} is given the number of pairs of {\tt table}, in decimal notation.
%
lookup (>"table", >"key",value>)=  "a".
%TEX
\noindent {\tt lookup} finds the first pair in the table with matching key.
It fails if there is no such pair.
\label{lookup.}
%

includes (>"table", >"key")=  "a".
excludes (>"table", >"key")=  "a".
%TEX
\noindent Above predicates succeed if there is a (no) pair (K,P) in table L where
L and K  are the values of table and key respectively.
\label{includes.}
\label{excludes.}
%

#plain
pair (cell>, >key,>value)=  .
#plain
unpair (>"cell", key>,value>)=  "a".
%TEX
\label{pair.}
\label{unpair.}
\noindent In addition to the table (tables) processing builtins there are three builtins
which work on named tuples.
Each tuple has a unique name.
You can always work around  using the predicates {\tt pair} and {\tt unpair}.
Both are  generated by eliminating  the composition operator.
For example to two following rules are equal :
{\small
\begin{glammar}
\L{\LB{}}
\L{\LB{    \Proc{2}\K{b }\index{b 2}\D{}(\>L1)\DE{}:}}
\L{\LB{         unpair \D{}(\>L1,a\>,L2\>)\DE{},}}
\L{\LB{         unpair \D{}(\>L2,b\>,c\>)\DE{}.}}
\L{\LB{}}
\L{\LB{    \Proc{6}\K{b }\index{b 6}\D{}( \>a*b*c)\DE{}:.}}
\end{glammar}}
If the left position is applying instead of defining {\tt unpair} must be replaced with  {\tt pair}.
%

repair (>"cell", >key,>value)= "a";
repair (>"cell", >key,>value)= "a".
%TEX 
\noindent {\tt repair} updates the earlier created cell (with {\tt pair}) with new values.
Upon backtracking this is undone unless deterministic behavior is forced.
{\tt repair} fails if there is no tuple {\tt (key,value)} with name {\tt cell}.
\label{repair.}
%

#plain
where (>x,x>)=.
%TEX
\noindent Because the composition operator is not associative brackets are needed
to denote certain structures.
Glammar does not allow you to write brackets in affix expressions.
For example  {\tt >a*(x*y)*c} is not allowed.
You can however make such structures using where.
For example :
{\small
\begin{glammar}
\L{\LB{   \Proc{2}\K{t }\index{t 2}\D{}(\>a*b*c)\DE{}:}}
\L{\LB{       my own where \D{}(\>b,x*y\>)\DE{}.}}
\L{\LB{}}
\L{\LB{   \Proc{5}\K{my own where }\index{my own where 5}\D{}(\>x,x\>)\DE{}:.}}
\end{glammar}}
After the composition operator is removed :
{\small
\begin{glammar}
\L{\LB{    \Proc{2}\K{t }\index{t 2}\D{}(\>L1)\DE{}:}}
\L{\LB{        unpair \D{}(\>L1,a\>,L2\>)\DE{},}}
\L{\LB{        unpair \D{}(\>L2,b\>,c\>)\DE{},}}
\L{\LB{        my own where \D{}(\>b,L3\>)\DE{},}}
\L{\LB{        unpair \D{}(\>L3,x\>,y\>)\DE{}.}}
\L{\LB{}}
\L{\LB{    \Proc{8}\K{my own where }\index{my own where 8}\D{}(\>x,x\>)\DE{}:.}}
\end{glammar}}
\noindent It is obvious that {\tt my own where} has become superfluous if we substitute
{\tt b} for {\tt L3}.
This is the reason why there is a predefined built-in {\tt where}.
The compiler tries to remove {\tt where} if it is possible by renaming affixes.
So when used {\tt where} instead of {\tt my our own where} the result after transformation
would have been :
{\small
\begin{glammar}
\L{\LB{\Proc{1}\K{t }\index{t 1}\D{}(\>L1)\DE{}:}}
\L{\LB{    unpair \D{}(\>L1,a\>,L2\>)\DE{},}}
\L{\LB{    unpair \D{}(\>L2,b\>,c\>)\DE{},}}
\L{\LB{    unpair \D{}(\>b,x\>,y\>)\DE{}.}}
\L{\LB{}}
\end{glammar}}
\label{where.}
%

lexical less (>"x",>"y")= "a".
lexical more (>"x",>"y")= "a".
lexical less equal (>"x",>"y")= "a".
lexical more equal (>"x",>"y")= "a".
%TEX
\noindent These builtins compare affixes to their lexical ordening.
\label{lexicalless.}
\label{lexicalmore.}
\label{lexicallessequal.}
\label{lexicalmoreequal.}
%

compute (>expr, result>)= .
%TEX
\noindent If  {\tt expr} is a string representing a simple expression (like  {\tt 2*(3+1)})
 {\tt result} will assigned the value of this expression in decimal ( {\tt 8}).
The expression may only contain the one-character operators $! < > = + - * /$ and
signed integer notation.
The maximum integer value is inherited from the C value for this.  
Booleans are implemented as $\{0,X\}$,  where $0$ denotes {\tt false}, anything 
else (preferably $1$) is taken to be {\tt true}. 
Anything not an operator or integral number (spaces included) terminates 
the expression.
A not well-formed expression however gives an error message. 
For arbitrary precision arithmetic you can set up a connection to the unix
program {\tt dc}.
\label{compute.}
%
realtoint (>number, result>)= .
%TEX 
\noindent Converts a decimal real in scientific notation to the nearest decimal integer not greater than real.
\label{realtoint.}
%
decimal to unary (>number, result>)= .
%TEX 
\noindent Converts a decimal integer value to unary with base notation {\tt 1}.
\label{decimaltounary.}
%

identifier (x+y>)= !AB..Za..z! (x>), !A..Za..z1..0'_! * (y>).
letter digit sequence (x+y>)= !AB..Za..z! (x>), !A..Za..z1..0! * (y>).
%TEX 
\noindent These scan  an identifier as specified but 
only a little more efficient.
\label{identifier.}
\label{letterdigitsequence.}
%

upper case (>str, result>)= .
%TEX 
\noindent {\tt upper case} transformes a value with mixed cased letters to a upper cased value.
\label{uppercase.}
%

lower case (>str, result>)= .
%TEX 
\noindent {\tt lower case} transformes a value with mixed cased letters to a lower cased value.
\label{lowercase.}
%

keyword (>"upper case word")=  "a".
%TEX 
\noindent Matches upper case word to the lower or mixed case input.
So keyword\verb+(>"IF")+ scans {\tt if}, {\tt IF}, {\tt If}, and {\tt iF}.
\label{keyword.}
%


layout = !\n\t\f !*(ignored>). 
%TEX 
\noindent You can use this rule to skip spaces, tabs,  carriage returns, newlines, vertical tabs, and form feeds.
\label{layout.}
%

type out (>affix)=.
%TEX 
\noindent Write the value of affix to stderr.
\label{typeout.}
%

type in (affix>)=.
%TEX
\noindent Read affix from stdin.
\label{typein.}
%

fail = not equal (>empty,>empty).
%TEX
\noindent {\tt fail just always fails. 
\label{fail.}
%

#plain
cut= .
%TEX
\noindent This predicate forces to fail the next productions --- if any. 
This member may also be written as {\tt ->}, and can be removed
by adding complementary members to all other productions.
\label{cut.}
%

eval affix (>a,evaluated a>)= .
%TEX
\noindent This predicate reduces the evaluate count at 
run time (see -r option) for deterministic grammars.
Nondeterministic grammars overwrite the result of an affix evaluation 
and restore the original value upon backtracking.
Since deterministic grammars do not need to backtrack the original value 
cannot be restored that easily. 
Instead, the result of the affix evaluation is lost which means 
that the same affix most be evaluated over and over again.
When you have interest in making your compiler as efficient as possible
and you know that some affixes are tested frequently you can first 
evaluate this affix and use the result in the rest of the computation.
\label{evalaffix.}
%

set line file (>line,>file)= ;
set line file (>line,>file)= .
get line file (line>,file>)= ;
get line file (line>,file>)= .
%TEX
\label{setlinefile.}
\label{getlinefile.}
\noindent Line and file control.
In order to produce correct messages when the input was
the output of a preprocessor there is a predicate which
sets the current file name and line number.
%

get next option (option>)=.
%TEX
\noindent This predicate gets the next option (the first one will be the command name) 
from the command line that invoked the program. 
It fails if all option are processed.
\label{getnextoption.}
%

get env (>"name", val>)=.
%TEX
\noindent {\tt get env} searches the value of the environment variable. 
It fails if the environment variable is not defined. 
\label{getenv.}
%

error message (val>)=.
%TEX
\noindent {\tt error message} generates on error message based upon the 
most advanced input pointer.
It causes to surpress output generation,
and the exit status will be {\tt 1}.
It can also be used to implement error recovery.
\label{errormessage.}
%
set exit code=.
%TEX
\label{setexitcode.}
\noindent This predicates sets the exit code to {\tt 2},
but output generation is not surpressed. 
%

assign (>x,>y,old x>)=;
assign (>x,>y,old x>)=.
%TEX
\noindent {\tt assign} is a very tricky predicate and therefore its use is not recommended.
Basicly it serves for not needing to add an affix to each rule if
this affix changes in rare cases only.
If this is the case you can let a metanotion take another value,
although this sometimes give surprising results.
It  is for example in many cases necessary to copy the metanotion
(when putting it in tables etc.),
because of the fact that on an affix expression
containing  the  metanotion,
an assignation of that metanotion
has effect on previous evaluations of that affix expression
(consistent substitution is not guaranteed.)
\label{assign.}
%


get from unix (>cmd, cmd stdout>)=.
%TEX 
\noindent The most simple and reliable way to get some information from unix
is to give a command,  wait for the command to exit and store
the result it produced.
Examples of such unix utility programs are {\tt date, cat, ls} etc.
They all do not need any input.
\label{getfromunix.}
%

set up connection to unix (>cmd,>channel)=.
talk to unix (>channel,>to the programs stdin,from the programs stdout>)=.
%TEX
\noindent These two builtins are only available on an unix-system.
They can be used to set up and maintain a connection 
to unix.
Only 32 suchs connection can be made at a time.
When this limit is exceeded {\tt setup connection to unix} fails.

Useful programs to connect with are for instance {\tt dc} for
arbitrary precision arithmetic, or perhaps a program 
written by yourself.
Also the lexicon interface is implemented with connections.
 
As an example we will maintain a connection to {\tt dc}.
First we set up the connection.
{\small
\begin{glammar}
\L{\LB{ \Proc{1}\K{s}\index{s1}:}}
\L{\LB{ set up connection with unix \D{}(\>\S{}\"dc \"\SE{},\>DC CON)\DE{}.}}
\end{glammar}}

Here DC CON is a metadefined affix specifying the channel number.
This channel is assigned to the metarule by the set up builtin;
initiative it may be defined as empty (eg : DC CON::.).
 
Next we can give input to this programs {\tt stdin} and get the answer from
the programs {\tt stdout}. For example   
{\small
\begin{glammar}
\L{\LB{  \Proc{2}\K{x}\index{x2}:}}
\L{\LB{    talk to unix \D{}(\>DC CON , \>\S{}\"1 2 +ps.\!n\"\SE{},\S{}\"3\"\SE{}\>)\DE{}.}}
\end{glammar}}
will succeed.
Because the builtin  {\tt talk to unix} first supplies input to the unix-program 
and then waits for the answer communication problems can occur.
To solve this problem a very simple (not very reliable)  strategy is used :
As soon as a control char is read (iscntr) the answer reading is stopped
and the result so far is  made the total result. Caveat:
the next time the builtin is called there might be some output left from previous
 calls.  Also one has to take care that there always will be an answer. 
For instance in the above example the newline in the input given to 
{\tt talk to unix} is essential because {\tt dc} waits for it  before generating
any output, so both processes would have been locked forever ("deadlock")
if this new line was omitted.
\label{setupconnectiontounix.}
\label{talktounix.}
%


%TEX 
\noindent The following builtins are for internal usage and should therefore not be
used in your own grammar.
It is not possible to redefine these builtins and when its name is followed 
by an underscore it is also not possible to use the builtin  yourself.  
\label{resetinputptr.} 
\label{setinputptrto.}
\label{nestarset.}
\label{nestarset.}
\label{nestarset.}
\label{nestaralt.}
\label{metaterminal.}
\label{intersect.}
\label{explintersect.}
\label{transformlattice.}
\label{tltraditional.}
\label{tltraditional.}
\label{tltraditional.}
\label{transformlatticeterm.}
\label{tltraditionalterm.}
\label{tltraditionalterm.}
\label{tltraditionalterm.}
\label{initmint.}
\label{initmeta.}
\label{evalmeta.}
\label{getip.}
\label{restoreip.}
\label{falseip.}
\label{skip.}
%

reset inputptr_ (>saved ip)= .
set inputptr to_ (>x,saved ip>)= .
nestarset (a>)= ;
nestarset (a>)= ;
nestarset (a>)= .
nestaralt_ (a>,>saved ip)= .
metaterminal (>"m")=  "a".
intersect_ (>a,>b)= "a".
explintersect_ (>a,>b)= "a".
transform lattice_ ( >a,>b,c>)= .
tl traditional_ ( >a,>b,c>)= ;
tl traditional_ ( >a,>b,c>)= ;
tl traditional_ ( >a,>b,c>)= .
transform latticeterm_ ( >a,>b,c>)= .
tl traditionalterm_ ( >a,>b,c>)= ;
tl traditionalterm_ ( >a,>b,c>)= ;
tl traditionalterm_ ( >a,>b,c>)= .
init mint_ ( >a,>b)= .
initmeta ( >a,>b)= .
evalmeta ( >a)= .
get ip_ ( ip>)= "a".
restore ip_ ( >ip)= "a".
false ip_ ( >ip)= "a".
skip_= ;
skip_= ;
skip_= .


%TEX
\noindent The following builtins implement multiple precision integer
arithmetic.
These are implemented using the unix standard package {\tt mp}.
Some implementations of unix do however not support 
this package.
More information can be found in the manual page for {\tt mp}.
\label{equalto.}
\label{lessthan.}
\label{morethan.}
%
equal to ::"0".
less than ::"-1".
more than ::"1".

%TEX
\label{intplusintisint.}
\label{intsubintisint.}
\label{inttimesintisint.}
\label{intdividedbyintisintwithremainderint.}
\label{intisint.}
\label{intraisedbyintmodulointisint.}
\label{intisthegcdofintandint.}
\label{intisthesquarerootofintwithremainderint.}
\label{intisdec.}
\label{decisint.}
\label{freeint.}
\label{decrementsmallint.}
\label{incrementsmallint.}
\label{smalldecisint.}
\label{intisnegative.}
\label{negateint.}
\label{intispositive.}
\label{intiszero.}
%
int (>a) plus int (>b) is int (c>)=.
int (>a) sub int (>b) is int (c>)=.
int (>a) times int (>b) is int (c>)=.
int (>a) divided by int (>b) is int (q>) with remainder int (r>)=.
int (>a) is (c>) int (>b)=.
int (>a) raised by int (>b) modulo int (>d) is int (r>)=.
int (c>) is the gcd of int (>a) and int (>b)=.
int (s>) is the square root of int (>a) with remainder int (r>)=.
int (>a) is dec (dec a>)=.
dec (>a) is int (int a>)=.
free int (>d)=.
decrement small int (>d)=.
increment small int (>d)=.
small dec (>d) is int (i>)=.
int (>a) is negative=.
negate int (>a) =.
int (>a) is positive=.
int (>a) is zero=.

%$TEX
\input{\jobname.ind}
\end{document}
%
