Presentation at C++ Now, May 2022.

Presentation at C++ Now

Slides of the presentation.
The automaton that recognizes general identifiers and the keyword 'while'. The code with which it was generated.
The automata on slide 20: NDFA, DFA, minimized DFA, DFA as C++ code. The example is based on the calculator example from the main page.

This is the grammar on slides 28-37:

 
%startsymbol Session EOF 
   // Defines start symbol Session with EOF as terminator. 

// %nodefaults

// Symbols and their attributes:

%symbol{       	     }             EOF BAD
%symbol{ std::string }             SCANERROR IDENT 
%symbol                            SEMICOLON ASSIGN COMMA 
%symbol{ double }                  DOUBLE
%symbol                            PLUS TIMES MINUS DIVIDES MODULO
%symbol                            FACTORIAL
%symbol                            LPAR RPAR 
%symbol{ double }                  E F G H 
%symbol{ std::vector }     Arguments  
%symbol                            Session Command 
   // No attribute type means void. 
%symbol                            COMMENT WHITESPACE EMPTY
   // These symbols are used internally in the tokenizer.
   // One still has to declare them, so that they will
   // be included in the symbol class.  

%parameter{ varstore< double > }           memory 
%parameter{ std::vector< std::string > }   errorlog 
%parameter{ tokenizer }                    tok 
   // Declares additional parameters to the parser.
   // They are reference parameters to the parser, 
   // and they can be used in action code. 
   // I see no point in allowing local variables
   // in the parser, so maphoon doesn't allow this.
   // If you want to read input from a file, or from
   // somewhere, you have to include it here.

// This goes into the beginning of symbol.h :

%symbolcode_h{ #include  } 

%parsercode_h{ #include "varstore.h" }
%parsercode_h{ #include "tokenizer.h" }
%parsercode_h{ #include  }

%symbolcode_cpp {

   void print_attr( const std::vector< double > & vect, std::ostream& out )
   {
      print_range( vect. begin( ), vect. end( ), '{', '}', out );
   }        
}

  
%parsercode_cpp {
   double fact( unsigned int n )
   {
      double res = 1.0;
      while( n )
      {
         res *= n;
         -- n;
      }
      return res;
   }

   // Not syntax errors, only computation errors:

   void printerrors( const std::vector< std::string > & errors,
                     std::ostream& out )
   {
      std::cout << "Errors:\n";
      for( const auto& err : errors )
         out << "   " << err << "\n";
   }
}
   
// Namespaces of symbol, tokenizer and parser.
// One should probably put them in the same namespace.


%source{ tok. read( ); } 
   // Source from where the symbols come. 
   // It must compile in a context of form s = tok. read( ); 

%rules

Session => Session Command 
|
;

Command => E:e SEMICOLON
{
   if( errorlog. size( ))
   {
      printerrors( errorlog, std::cout );  
      errorlog. clear( ); 
   }
   else
   {
      if( debug ) 
         std::cout << "\n";

      std::cout << "---> " << e << "\n"; 
   }
}

| IDENT:id ASSIGN E:e SEMICOLON

{
   if( errorlog. empty( )) 
   {
      std::cout << " assigning: " << id << " := " << e << "\n";
      memory. assign( id, e ); 
   }
   else
   {
      printerrors( errorlog, std::cout );  
      errorlog. clear( ); 
   }
}

|  _recover_ SEMICOLON

{
   std::cout << "recovered from syntax error\n\n";
   std::cout << "Errors:\n";
   for( const auto& err : errorlog )
      std::cout << err << "\n";
   errorlog. clear( );
}
;

E  => E:e PLUS F:f   { return e + f; }
    | E:e MINUS F:f  { return e - f; }
    | F : f          { return f; }
    ;

F  => F:f TIMES G:g { return f * g; }
    | F:f DIVIDES G:g 
{
   if( g == 0.0 ) 
   {
      errorlog. push_back( "division by zero" ); 
         g = 1.0;   // invent a value. 
   }
   return f / g;  
}

| F:f MODULO G:g    // here you can put a comment 

{
   if( g == 0.0 )
   {
      errorlog. push_back( "division by zero" );
      g = 1.0;
   }
    
   return f - g * floor( f / g ); 
}

| G : g   /* here can also be comment */ { return g; }
;

G => MINUS G : g    { return -g; }
  | PLUS G : g      { return g; } 
  | H : h           { return h; }
  ;

H  => H:h FACTORIAL
{
   unsigned int f = static_cast< unsigned int >
         ( floor( h + 0.0001 ));
   return fact(f);  
}
 
| LPAR E:e RPAR  { return e; }
| IDENT: id 
{
   if( memory. contains(id))
      return *memory. lookup(id);
   else
   {
      errorlog. push_back( std::string( "variable " ) + id + 
                           " is undefined " );
      return 0.0;  // An atribrary value. 
   }
}

| DOUBLE : d   { return d; } 

| IDENT:id LPAR Arguments:args RPAR 
{ 
   if( id == "sin" && args. size( ) == 1 ) 
      return sin( args[0] ); 

   if( id == "cos" && args. size( ) == 1 )
      return cos( args[0] );

   if( id == "pow" && args. size( ) == 2 )
   {
      return pow( args[0], args[1] );
   }

   errorlog. push_back( std::string( "unrecognized function " ) + id );
   return 0.0;
}
  
;

Arguments => E:e          {  return { e };   }
| Arguments:a COMMA E:e   { a. push_back(e); return a; }
;


%errors
   LPAR * => "a )";
   IDENT LPAR 1 => "a function argument";
   ( TIMES | DIVIDES | MODULO ) => "factor"; 
   ( PLUS | MINUS ) => "summand";

The Prolog grammar:

 
%startsymbol Start EOF 

%symbol Start 
%symbol { term } Term OneTerm 

%symbol ERROR 
%symbol COMMENT WHITESPACE   
%symbol EOF 
%symbol { std::string } IDENTIFIER GLUEDIDENTIFIER
%symbol { std::string } QUOTEDIDENTIFIER GLUEDQUOTEDIDENTIFIER
%symbol LPAR RPAR
%symbol LSQBRACKET RSQBRACKET
%symbol { std::string } VARIABLE 
%symbol { double } DOUBLE 
%symbol { bigint } INTEGER 
%symbol { std::vector< term > } MaybeTerms SomeTerms
%symbol { term } ListEnd

%symbol { opdef } Prefix 
%symbol { opdef } Infix 
%symbol { opdef } Postfix

%symbol COMMA BAR TERMINATOR

%reductionseq Prefix Term  
%reductionseq Infix Postfix 

// %usererror    
   // Means that the user prefer to define their own error. 

%symbolcode_h{ #include "term.h" }
%symbolcode_h{ #include "syntax.h" }
%symbolcode_h{ #include "listconstr.h" }

%symbolcode_cpp
{ 
   void 
   print_attr( const std::vector< prolog::term > & vect, std::ostream& out ) 
   {
      print_range( vect. begin( ), vect. end( ), '{', '}', out );
   }

   void 
   print_attr( const prolog::term& trm, std::ostream& out )
   { 
      out << trm; 
   } 
}

%parsercode_h{ #include "tokenizer.h" }
%parsercode_h{ #include "../calculator/varstore.h" }

%parsercode_cpp{ 

namespace
{

   bool canbeprefix( const syntax& synt, const symbol& sym )
   {
      std::cout << "can be prefix " << sym << "\n";
      if( sym. type != sym_IDENTIFIER ) return false;
      const auto& s = sym. get< std::string > ( );
      return synt. hasprefixdef(s); 
   }

   bool canbeinfix( const syntax& synt, const symbol& sym )
   {
      std::cout << "can be infix " << sym << "\n";
      if( sym. type != sym_IDENTIFIER ) return false;
      const auto& s = sym. get< std::string > ( );    
      return synt. hasinfixdef(s); 
   } 

   bool canbepostfix( const syntax& synt, const symbol& sym )
   {
      std::cout << "can be postfix " << sym << "\n";
      if( sym. type != sym_IDENTIFIER ) return false;
      const auto& s = sym. get< std::string > ( );    
      return synt. haspostfixdef(s);
   } 

   bool canstartterm( const symbol& sym )
   {
      std::cout << "can start term " << sym << "\n";
      return sym.type == sym_IDENTIFIER ||
             sym.type == sym_GLUEDIDENTIFIER ||
             sym.type == sym_QUOTEDIDENTIFIER ||
             sym.type == sym_GLUEDQUOTEDIDENTIFIER || 
             sym.type == sym_LPAR ||
             sym.type == sym_LSQBRACKET ||
             sym.type == sym_VARIABLE ||
             sym.type == sym_INTEGER ||
             sym.type == sym_DOUBLE;
   }

   short int canreduce( const syntax& synt,
                        const opdef& op, const symbol& sym )
   {
      std::cout << "deciding priorities between " << op << " and " << sym << "\n";
      if( sym. type != sym_IDENTIFIER )
         return 1;   // reduce.

      const std::string& str = sym. get< std::string > ( );

      // We do not really know what to do when there are
      // conflicting priorities. I suppose it should not happen. 

      if( synt. haspostfixdef( str ))
      { 
         auto op2 = synt. postfixdef( str );
         auto dir = op. decide( op2 );
         if( dir == -1 ) return 1;
         if( dir == 1 ) return 0;
         return -1;
      }

      if( synt. hasinfixdef( str )) 
      { 
         auto op2 = synt. infixdef( str );
         auto dir = op. decide( op2 );
         if( dir == -1 ) return 1;
         if( dir == 1 ) return 0;
         return -1;
      }

      return 1;
   }
}
}



%parameter { tokenizer } tok 
%parameter { varstore< term > } vs
%parameter { syntax } synt 
%parameter { listconstr } list

%symbolspace prolog
%parserspace prolog

%source { tok. get( synt ); }

%rules

Start => OneTerm:t TERMINATOR ;

OneTerm => Term:t  { timetosaygoodbye = true; return t; } ;

Term => 
   VARIABLE:v { return new variable(v); } 

|  DOUBLE : d { return new constant< double > (d); } 

|  INTEGER : i { return new constant< bigint > (i); } 

|  IDENTIFIER : id { return new functional( function( id, 0 )); } 

|  QUOTEDIDENTIFIER : id { return new functional( function( id, 0 )); } 

|  GLUEDIDENTIFIER : id LPAR MaybeTerms : args RPAR 
      { size_t ar = args. size( );
        return new functional( function( id, ar ), std::move( args ));
      }

|  GLUEDQUOTEDIDENTIFIER : id LPAR MaybeTerms : args RPAR
      { size_t ar = args. size( );
        return new functional( function( id, ar ), std::move( args ));
      }

|  Prefix:op Term:t
%requires
   { return canreduce( synt, op, lookahead. value( )); }
%reduces 
   { return new functional( function( op. str, 1 ), { t } ); } 

|  Term:t1 Infix:op Term:t2 
%requires
   { return canreduce( synt, op, lookahead. value( )); }
%reduces 
   { return new functional( function( op. str, 2 ), { t1, t2 } ); }    

|  Term:t Postfix:op 
%reduces
   { return new functional( function( op. str, 1 ), { t } ); }

|  LPAR Term:t RPAR { return t; } 

|  LSQBRACKET MaybeTerms :args ListEnd :end RSQBRACKET
   { auto res = end;
     size_t i = args. size( );
     while( i -- )
        res = new functional( list. cons, { args[i], res } );
     return res;    
   }
;

MaybeTerms => 
   { return std::vector ( );         /* empty rhs. */  }
   | SomeTerms : terms { return terms; } 
   ;

SomeTerms 
=> Term : t 
%requires 
   { return !canbeinfix( synt, lookahead. value( )) &&
            !canbepostfix( synt, lookahead. value( )); 
   }
%reduces
   { auto res = std::vector< term > ( ); res. push_back(t); return res; } 


| SomeTerms : some  COMMA  Term : onemore
%requires
   { return !canbeinfix( synt, lookahead. value( )) &&
            !canbepostfix( synt, lookahead. value( )); 
   }
%reduces 
   { some. push_back( onemore ); return some; } 
;

ListEnd => 
   { return new functional( list. nil ); } 
|
   BAR Term:t { return t; } 
;

Prefix => IDENTIFIER : id 
%requires
   { return synt. hasprefixdef( id ) && canstartterm( lookahead. value( )); }
%reduces
   { return synt. prefixdef(id); }  
;

Infix => IDENTIFIER : id  
%requires 
   { return synt. hasinfixdef(id) && canstartterm( lookahead. value( )); }
%reduces 
   { return synt. infixdef(id); }
;

Postfix => IDENTIFIER : id
%requires 
   { return synt. haspostfixdef(id); }
%reduces  
   { return synt. postfixdef(id); }  
;

%end