This is the grammar on slides 28-37:
%startsymbol Session EOF
// Defines start symbol Session with EOF as terminator.
// %nodefaults
// Symbols and their attributes:
%symbol{ } EOF BAD
%symbol{ std::string } SCANERROR IDENT
%symbol SEMICOLON ASSIGN COMMA
%symbol{ double } DOUBLE
%symbol PLUS TIMES MINUS DIVIDES MODULO
%symbol FACTORIAL
%symbol LPAR RPAR
%symbol{ double } E F G H
%symbol{ std::vector } Arguments
%symbol Session Command
// No attribute type means void.
%symbol COMMENT WHITESPACE EMPTY
// These symbols are used internally in the tokenizer.
// One still has to declare them, so that they will
// be included in the symbol class.
%parameter{ varstore< double > } memory
%parameter{ std::vector< std::string > } errorlog
%parameter{ tokenizer } tok
// Declares additional parameters to the parser.
// They are reference parameters to the parser,
// and they can be used in action code.
// I see no point in allowing local variables
// in the parser, so maphoon doesn't allow this.
// If you want to read input from a file, or from
// somewhere, you have to include it here.
// This goes into the beginning of symbol.h :
%symbolcode_h{ #include }
%parsercode_h{ #include "varstore.h" }
%parsercode_h{ #include "tokenizer.h" }
%parsercode_h{ #include }
%symbolcode_cpp {
void print_attr( const std::vector< double > & vect, std::ostream& out )
{
print_range( vect. begin( ), vect. end( ), '{', '}', out );
}
}
%parsercode_cpp {
double fact( unsigned int n )
{
double res = 1.0;
while( n )
{
res *= n;
-- n;
}
return res;
}
// Not syntax errors, only computation errors:
void printerrors( const std::vector< std::string > & errors,
std::ostream& out )
{
std::cout << "Errors:\n";
for( const auto& err : errors )
out << " " << err << "\n";
}
}
// Namespaces of symbol, tokenizer and parser.
// One should probably put them in the same namespace.
%source{ tok. read( ); }
// Source from where the symbols come.
// It must compile in a context of form s = tok. read( );
%rules
Session => Session Command
|
;
Command => E:e SEMICOLON
{
if( errorlog. size( ))
{
printerrors( errorlog, std::cout );
errorlog. clear( );
}
else
{
if( debug )
std::cout << "\n";
std::cout << "---> " << e << "\n";
}
}
| IDENT:id ASSIGN E:e SEMICOLON
{
if( errorlog. empty( ))
{
std::cout << " assigning: " << id << " := " << e << "\n";
memory. assign( id, e );
}
else
{
printerrors( errorlog, std::cout );
errorlog. clear( );
}
}
| _recover_ SEMICOLON
{
std::cout << "recovered from syntax error\n\n";
std::cout << "Errors:\n";
for( const auto& err : errorlog )
std::cout << err << "\n";
errorlog. clear( );
}
;
E => E:e PLUS F:f { return e + f; }
| E:e MINUS F:f { return e - f; }
| F : f { return f; }
;
F => F:f TIMES G:g { return f * g; }
| F:f DIVIDES G:g
{
if( g == 0.0 )
{
errorlog. push_back( "division by zero" );
g = 1.0; // invent a value.
}
return f / g;
}
| F:f MODULO G:g // here you can put a comment
{
if( g == 0.0 )
{
errorlog. push_back( "division by zero" );
g = 1.0;
}
return f - g * floor( f / g );
}
| G : g /* here can also be comment */ { return g; }
;
G => MINUS G : g { return -g; }
| PLUS G : g { return g; }
| H : h { return h; }
;
H => H:h FACTORIAL
{
unsigned int f = static_cast< unsigned int >
( floor( h + 0.0001 ));
return fact(f);
}
| LPAR E:e RPAR { return e; }
| IDENT: id
{
if( memory. contains(id))
return *memory. lookup(id);
else
{
errorlog. push_back( std::string( "variable " ) + id +
" is undefined " );
return 0.0; // An atribrary value.
}
}
| DOUBLE : d { return d; }
| IDENT:id LPAR Arguments:args RPAR
{
if( id == "sin" && args. size( ) == 1 )
return sin( args[0] );
if( id == "cos" && args. size( ) == 1 )
return cos( args[0] );
if( id == "pow" && args. size( ) == 2 )
{
return pow( args[0], args[1] );
}
errorlog. push_back( std::string( "unrecognized function " ) + id );
return 0.0;
}
;
Arguments => E:e { return { e }; }
| Arguments:a COMMA E:e { a. push_back(e); return a; }
;
%errors
LPAR * => "a )";
IDENT LPAR 1 => "a function argument";
( TIMES | DIVIDES | MODULO ) => "factor";
( PLUS | MINUS ) => "summand";
The Prolog grammar:
%startsymbol Start EOF
%symbol Start
%symbol { term } Term OneTerm
%symbol ERROR
%symbol COMMENT WHITESPACE
%symbol EOF
%symbol { std::string } IDENTIFIER GLUEDIDENTIFIER
%symbol { std::string } QUOTEDIDENTIFIER GLUEDQUOTEDIDENTIFIER
%symbol LPAR RPAR
%symbol LSQBRACKET RSQBRACKET
%symbol { std::string } VARIABLE
%symbol { double } DOUBLE
%symbol { bigint } INTEGER
%symbol { std::vector< term > } MaybeTerms SomeTerms
%symbol { term } ListEnd
%symbol { opdef } Prefix
%symbol { opdef } Infix
%symbol { opdef } Postfix
%symbol COMMA BAR TERMINATOR
%reductionseq Prefix Term
%reductionseq Infix Postfix
// %usererror
// Means that the user prefer to define their own error.
%symbolcode_h{ #include "term.h" }
%symbolcode_h{ #include "syntax.h" }
%symbolcode_h{ #include "listconstr.h" }
%symbolcode_cpp
{
void
print_attr( const std::vector< prolog::term > & vect, std::ostream& out )
{
print_range( vect. begin( ), vect. end( ), '{', '}', out );
}
void
print_attr( const prolog::term& trm, std::ostream& out )
{
out << trm;
}
}
%parsercode_h{ #include "tokenizer.h" }
%parsercode_h{ #include "../calculator/varstore.h" }
%parsercode_cpp{
namespace
{
bool canbeprefix( const syntax& synt, const symbol& sym )
{
std::cout << "can be prefix " << sym << "\n";
if( sym. type != sym_IDENTIFIER ) return false;
const auto& s = sym. get< std::string > ( );
return synt. hasprefixdef(s);
}
bool canbeinfix( const syntax& synt, const symbol& sym )
{
std::cout << "can be infix " << sym << "\n";
if( sym. type != sym_IDENTIFIER ) return false;
const auto& s = sym. get< std::string > ( );
return synt. hasinfixdef(s);
}
bool canbepostfix( const syntax& synt, const symbol& sym )
{
std::cout << "can be postfix " << sym << "\n";
if( sym. type != sym_IDENTIFIER ) return false;
const auto& s = sym. get< std::string > ( );
return synt. haspostfixdef(s);
}
bool canstartterm( const symbol& sym )
{
std::cout << "can start term " << sym << "\n";
return sym.type == sym_IDENTIFIER ||
sym.type == sym_GLUEDIDENTIFIER ||
sym.type == sym_QUOTEDIDENTIFIER ||
sym.type == sym_GLUEDQUOTEDIDENTIFIER ||
sym.type == sym_LPAR ||
sym.type == sym_LSQBRACKET ||
sym.type == sym_VARIABLE ||
sym.type == sym_INTEGER ||
sym.type == sym_DOUBLE;
}
short int canreduce( const syntax& synt,
const opdef& op, const symbol& sym )
{
std::cout << "deciding priorities between " << op << " and " << sym << "\n";
if( sym. type != sym_IDENTIFIER )
return 1; // reduce.
const std::string& str = sym. get< std::string > ( );
// We do not really know what to do when there are
// conflicting priorities. I suppose it should not happen.
if( synt. haspostfixdef( str ))
{
auto op2 = synt. postfixdef( str );
auto dir = op. decide( op2 );
if( dir == -1 ) return 1;
if( dir == 1 ) return 0;
return -1;
}
if( synt. hasinfixdef( str ))
{
auto op2 = synt. infixdef( str );
auto dir = op. decide( op2 );
if( dir == -1 ) return 1;
if( dir == 1 ) return 0;
return -1;
}
return 1;
}
}
}
%parameter { tokenizer } tok
%parameter { varstore< term > } vs
%parameter { syntax } synt
%parameter { listconstr } list
%symbolspace prolog
%parserspace prolog
%source { tok. get( synt ); }
%rules
Start => OneTerm:t TERMINATOR ;
OneTerm => Term:t { timetosaygoodbye = true; return t; } ;
Term =>
VARIABLE:v { return new variable(v); }
| DOUBLE : d { return new constant< double > (d); }
| INTEGER : i { return new constant< bigint > (i); }
| IDENTIFIER : id { return new functional( function( id, 0 )); }
| QUOTEDIDENTIFIER : id { return new functional( function( id, 0 )); }
| GLUEDIDENTIFIER : id LPAR MaybeTerms : args RPAR
{ size_t ar = args. size( );
return new functional( function( id, ar ), std::move( args ));
}
| GLUEDQUOTEDIDENTIFIER : id LPAR MaybeTerms : args RPAR
{ size_t ar = args. size( );
return new functional( function( id, ar ), std::move( args ));
}
| Prefix:op Term:t
%requires
{ return canreduce( synt, op, lookahead. value( )); }
%reduces
{ return new functional( function( op. str, 1 ), { t } ); }
| Term:t1 Infix:op Term:t2
%requires
{ return canreduce( synt, op, lookahead. value( )); }
%reduces
{ return new functional( function( op. str, 2 ), { t1, t2 } ); }
| Term:t Postfix:op
%reduces
{ return new functional( function( op. str, 1 ), { t } ); }
| LPAR Term:t RPAR { return t; }
| LSQBRACKET MaybeTerms :args ListEnd :end RSQBRACKET
{ auto res = end;
size_t i = args. size( );
while( i -- )
res = new functional( list. cons, { args[i], res } );
return res;
}
;
MaybeTerms =>
{ return std::vector ( ); /* empty rhs. */ }
| SomeTerms : terms { return terms; }
;
SomeTerms
=> Term : t
%requires
{ return !canbeinfix( synt, lookahead. value( )) &&
!canbepostfix( synt, lookahead. value( ));
}
%reduces
{ auto res = std::vector< term > ( ); res. push_back(t); return res; }
| SomeTerms : some COMMA Term : onemore
%requires
{ return !canbeinfix( synt, lookahead. value( )) &&
!canbepostfix( synt, lookahead. value( ));
}
%reduces
{ some. push_back( onemore ); return some; }
;
ListEnd =>
{ return new functional( list. nil ); }
|
BAR Term:t { return t; }
;
Prefix => IDENTIFIER : id
%requires
{ return synt. hasprefixdef( id ) && canstartterm( lookahead. value( )); }
%reduces
{ return synt. prefixdef(id); }
;
Infix => IDENTIFIER : id
%requires
{ return synt. hasinfixdef(id) && canstartterm( lookahead. value( )); }
%reduces
{ return synt. infixdef(id); }
;
Postfix => IDENTIFIER : id
%requires
{ return synt. haspostfixdef(id); }
%reduces
{ return synt. postfixdef(id); }
;
%end