2008-11-17 18:29:00 -05:00
// ----------------------------------------------------------------------------
2008-03-05 09:52:00 -05:00
/ /
// Copyright (c) Microsoft Corporation. All rights reserved.
/ /
2008-11-17 18:29:00 -05:00
// ----------------------------------------------------------------------------
2008-03-05 09:52:00 -05:00
using System ;
using System.Collections ;
using System.Text.RegularExpressions ;
using Microsoft.Contracts ;
namespace Microsoft.Singularity.Applications
{
2008-11-17 18:29:00 -05:00
///
// This is a general lr parser driver.
// Parses the programming by first lexing it into a number
// of tokens for a token stack. Then parser then pops tokens off
// this stack, consults the parse tables and push the corresponding
// nonterminals and states on the operand stack. This is the same
// algorithm from the dragon book with some tweaks to support optional
// tokens (i.e. newlines).
//
2008-03-05 09:52:00 -05:00
abstract class Parser
{
private const int END_INPUT_MARKER_ID = 2 ;
protected abstract Action [ , ] ! ActionTable { get ; }
protected abstract State [ , ] ! GotoTable { get ; }
protected abstract Production [ ] ! ProductionTable { get ; }
protected abstract TokenType [ ] ! TokenList { get ; }
public class ParseException : Exception
{
public ParseException ( Token ! token , string ! error )
: base ( "Parse error on line " + token . lineNumber + ", char " + token . charIndex + ": " + error ) { }
}
public Object Parse ( string ! input )
{
Stack leftStack = new Stack ( ) ;
leftStack . Push ( new State ( 0 ) ) ;
ArrayList list = Lex ( ref input ) ;
list . Reverse ( ) ; //could return a stack out of lex instead
Stack rightStack = new Stack ( list ) ;
while ( rightStack . Count ! = 0 ) {
State st = ( State ! ) leftStack . Pop ( ) ;
Token tok = ( Token ! ) rightStack . Pop ( ) ;
Action action = ActionTable [ st . id , tok . id ] ;
while ( tok . optional & & action = = null & & rightStack . Count ! = 0 ) {
tok = ( Token ! ) rightStack . Pop ( ) ;
action = ActionTable [ st . id , tok . id ] ;
}
if ( action = = null ) {
if ( tok . id = = END_INPUT_MARKER_ID ) {
throw new ParseException ( tok , " unexpected EOF" ) ;
}
throw new ParseException ( tok , "'" + tok . value + "' unexpected input" ) ;
}
if ( action . type = = ActionType . SHIFT ) {
leftStack . Push ( st ) ;
leftStack . Push ( tok ) ;
leftStack . Push ( new State ( action . stateOrProduction ) ) ;
2008-11-17 18:29:00 -05:00
}
else if ( action . type = = ActionType . REDUCE ) {
2008-03-05 09:52:00 -05:00
Production production = ( ! ) ProductionTable [ action . stateOrProduction ] ;
Object value = production . Reduction ( leftStack ) ;
StackElement topLeft = ( StackElement ) leftStack . Pop ( ) ;
State previousState ;
if ( topLeft is State ) {
previousState = ( State ) topLeft ;
2008-11-17 18:29:00 -05:00
}
else {
2008-03-05 09:52:00 -05:00
leftStack . Push ( topLeft ) ;
2008-11-17 18:29:00 -05:00
previousState = st ; // took an epsilon transition
2008-03-05 09:52:00 -05:00
}
State nextState = GotoTable [ previousState . id , production . NonterminalType ] ;
if ( nextState = = null ) throw new Exception ( "missing state" ) ;
leftStack . Push ( previousState ) ;
leftStack . Push ( new Nonterminal ( production . NonterminalType , value ) ) ;
leftStack . Push ( nextState ) ;
rightStack . Push ( tok ) ;
2008-11-17 18:29:00 -05:00
}
else if ( action . type = = ActionType . ACCEPT ) {
2008-03-05 09:52:00 -05:00
break ;
}
}
Nonterminal result = ( Nonterminal ! ) leftStack . Pop ( ) ;
return result . value ;
}
private static int CountOccurrences ( string ! input , char c , out int last )
{
int count = 0 ;
last = - 1 ;
for ( int i = 0 ; i < input . Length ; + + i ) {
if ( c = = input [ i ] ) {
+ + count ;
last = i ;
}
}
return count ;
}
private static bool IsNewLine ( char t ) { return t = = '\n' ; }
public ArrayList ! Lex ( ref string ! input )
{
ArrayList tokens = new ArrayList ( ) ;
int lineNumber = 1 ;
int charIndex = 1 ;
outer :
while ( input . Length ! = 0 ) {
foreach ( TokenType ! spec in TokenList ) {
Match ! match = ( ! ) spec . Regex . Match ( input ) ;
if ( match = = null | | ! match . Success ) continue ;
input = input . Remove ( 0 , ( ( ! ) match . Value ) . Length ) ;
if ( spec . Lexer ! = null ) {
Token token = new Token ( spec . Type , match . Value , lineNumber , charIndex ) ;
spec . Lexer ( token ) ;
if ( token . value ! = null )
tokens . Add ( token ) ;
2008-11-17 18:29:00 -05:00
}
else {
2008-03-05 09:52:00 -05:00
tokens . Add ( new Token ( spec . Type , match . Value , lineNumber , charIndex ) ) ;
}
int lastIndex ;
int occurrences = CountOccurrences ( match . Value , '\n' , out lastIndex ) ;
if ( occurrences > 0 ) {
lineNumber + = occurrences ;
charIndex = match . Value . Length - ( lastIndex + 1 ) ;
2008-11-17 18:29:00 -05:00
}
else {
2008-03-05 09:52:00 -05:00
charIndex + = match . Value . Length ;
}
goto outer ;
}
throw new Exception ( "unknown input: " + input ) ;
}
tokens . Add ( new Token ( END_INPUT_MARKER_ID , null , lineNumber , charIndex ) ) ;
return tokens ;
}
public delegate Object Reducer ( Stack ! stack ) ;
public delegate void Lexer ( Token ! tok ) ;
public class Production
{
public readonly int NonterminalType ;
public readonly Reducer Reduction ;
public Production ( int nonterminalType , [ Delayed ] Reducer reduction )
{
this . NonterminalType = nonterminalType ;
this . Reduction = reduction ;
}
}
public class StackElement
{
public readonly int id ;
public Object value ;
public StackElement ( int id ) { this . id = id ; }
2008-11-17 18:29:00 -05:00
#if false
// Error: Bartok can not handle complicated override pattern of System.Object.ToString. [Specifically, it was overridden via a MethodImpl or replaced via interface method reimplementation, effectively merging two virtual slots. Then the original slot was overridden by Microsoft.Singularity.Applications.Parser.Token.ToString, effectively splitting the slots again. Bartok currently can not analyze the split.]
2008-03-05 09:52:00 -05:00
public override string ! ToString ( )
{
return id . ToString ( ) ;
}
2008-11-17 18:29:00 -05:00
#endif
2008-03-05 09:52:00 -05:00
}
public class State : StackElement
{
public State ( int number ) : base ( number ) { }
}
public class Token : StackElement
{
public readonly int lineNumber ;
public readonly int charIndex ;
public bool optional = false ;
public Token ( int type , Object value , int lineNumber , int charIndex )
: base ( type )
{
this . value = value ;
this . lineNumber = lineNumber ;
this . charIndex = charIndex ;
this . optional = optional ;
}
2008-11-17 18:29:00 -05:00
#if false
// Error: Bartok can not handle complicated override pattern of System.Object.ToString. [Specifically, it was overridden via a MethodImpl or replaced via interface method reimplementation, effectively merging two virtual slots. Then the original slot was overridden by Microsoft.Singularity.Applications.Parser.Token.ToString, effectively splitting the slots again. Bartok currently can not analyze the split.]
2008-03-05 09:52:00 -05:00
public override string ! ToString ( )
{
return value + ":" + id ;
}
2008-11-17 18:29:00 -05:00
#endif
2008-03-05 09:52:00 -05:00
}
private class Nonterminal : StackElement
{
public Nonterminal ( int type , Object value ) : base ( type ) { this . value = value ; }
}
public enum ActionType { SHIFT , REDUCE , ACCEPT }
public class Action
{
public ActionType type ;
public int stateOrProduction ;
public Action ( ActionType type , int stateOrProduction )
{
this . type = type ; this . stateOrProduction = stateOrProduction ;
}
public override int GetHashCode ( )
{
return type . GetHashCode ( ) ^ stateOrProduction . GetHashCode ( ) ;
}
public override bool Equals ( object obj )
{
Action that = obj as Action ;
if ( that = = null ) return false ;
return this . type = = that . type & & this . stateOrProduction = = that . stateOrProduction ;
}
public override string ! ToString ( )
{
return type + " " + stateOrProduction ;
}
}
protected class TokenType
{
public readonly int Type ;
public readonly Regex ! Regex ;
public readonly Lexer Lexer ;
public TokenType ( int type , Regex regex , [ Delayed ] Lexer lexer )
{
this . Type = type ; this . Regex = ( ! ) regex ; this . Lexer = lexer ;
}
}
}
}