singrdk/base/Windows/spg/Parser.cs

232 lines
8.6 KiB
C#

///////////////////////////////////////////////////////////////////////////////
//
// Microsoft Research Singularity
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
///////////////////////////////////////////////////////////////////////////////
using System;
using System.Collections;
using System.Text.RegularExpressions;
namespace Microsoft.Singularity.Applications
{
/**
* This is a general lr parser driver.
* Parses the programming by first lexing it into a number
* of tokens for a token stack. Then parser then pops tokens off
* this stack, consults the parse tables and push the corresponding
* nonterminals and states on the operand stack. This is the same
* algorithm from the dragon book with some tweaks to support optional
* tokens (i.e. newlines).
**/
abstract class Parser
{
private const int END_INPUT_MARKER_ID = 2;
protected Action[,] actionTable = null;
protected State[,] gotoTable = null;
protected Production[] productionTable = null;
protected TokenType[] tokenList = null;
public class ParseException : Exception
{
public ParseException(Token token, String error)
: base("Parse error on line " + token.lineNumber + ", char " + token.charIndex + ": " + error) { }
}
public Object parse(String input)
{
Stack leftStack = new Stack();
leftStack.Push(new State(0));
ArrayList list = Lex(ref input);
list.Reverse(); //could return a stack out of lex instead
Stack rightStack = new Stack(list);
while (rightStack.Count != 0) {
State st = (State)leftStack.Pop();
Token tok = (Token)rightStack.Pop();
Action action = actionTable[st.id, tok.id];
while (tok.optional && action == null && rightStack.Count != 0) {
tok = (Token)rightStack.Pop();
action = actionTable[st.id, tok.id];
}
if (action == null) {
if (tok.id == END_INPUT_MARKER_ID) {
throw new ParseException(tok, " unexpected EOF");
}
throw new ParseException(tok, tok.value + " unexpected input");
}
if (action.type == ActionType.SHIFT) {
leftStack.Push(st);
leftStack.Push(tok);
leftStack.Push(new State(action.stateOrProduction));
} else if (action.type == ActionType.REDUCE) {
Production production = productionTable[action.stateOrProduction];
Object value = production.reduction(leftStack);
StackElement topLeft = (StackElement)leftStack.Pop();
State previousState;
if (topLeft is State) {
previousState = (State)topLeft;
} else {
leftStack.Push(topLeft);
previousState = st; /* took an epsilon transition */
}
State nextState = gotoTable[previousState.id, production.nonterminalType];
if (nextState == null) throw new Exception("missing state");
leftStack.Push(previousState);
leftStack.Push(new Nonterminal(production.nonterminalType, value));
leftStack.Push(nextState);
rightStack.Push(tok);
} else if (action.type == ActionType.ACCEPT) {
break;
}
}
Nonterminal result = (Nonterminal)leftStack.Pop();
return result.value;
}
private static int CountOccurrences(String input, char c, out int last)
{
int count = 0;
last = -1;
for (int i = 0; i < input.Length; ++i) {
if (c == input[i]) {
++count;
last = i;
}
}
return count;
}
private static bool IsNewLine(char t) { return t == '\n'; }
public ArrayList Lex(ref String input)
{
ArrayList tokens = new ArrayList();
int lineNumber = 1;
int charIndex = 1;
outer:
while (input.Length != 0) {
foreach (TokenType spec in tokenList) {
Match match = spec.regex.Match(input);
if (match == null || !match.Success) continue;
input = input.Remove(0, match.Value.Length);
if (spec.lexer != null) {
Token token = new Token(spec.type, match.Value, lineNumber, charIndex);
spec.lexer(token);
if (token.value != null)
tokens.Add(token);
} else {
tokens.Add(new Token(spec.type, match.Value, lineNumber, charIndex));
}
int lastIndex;
int occurrences = CountOccurrences(match.Value, '\n', out lastIndex);
if (occurrences > 0) {
lineNumber += occurrences;
charIndex = match.Value.Length - (lastIndex + 1);
} else {
charIndex += match.Value.Length;
}
goto outer;
}
throw new Exception("unknown input: " + input);
}
tokens.Add(new Token(END_INPUT_MARKER_ID, null, lineNumber, charIndex));
return tokens;
}
public delegate Object Reducer(Stack stack);
public delegate void Lexer(Token tok);
public class Production
{
public int nonterminalType;
public Reducer reduction;
public Production(int nonterminalType, Reducer reduction)
{
this.nonterminalType = nonterminalType;
this.reduction = reduction;
}
}
public class StackElement
{
public readonly int id;
public Object value;
public StackElement(int id) { this.id = id; }
public override string ToString()
{
return id.ToString();
}
}
public class State : StackElement
{
public State(int number) : base(number) { }
}
public class Token : StackElement
{
public readonly int lineNumber;
public readonly int charIndex;
public bool optional = false;
public Token(int type, Object value, int lineNumber, int charIndex)
: base(type)
{
this.value = value;
this.lineNumber = lineNumber;
this.charIndex = charIndex;
}
public override string ToString()
{
return value + ":" + id;
}
}
private class Nonterminal : StackElement
{
public Nonterminal(int type, Object value) : base(type) { this.value = value; }
}
public enum ActionType { SHIFT, REDUCE, ACCEPT }
public class Action
{
public ActionType type;
public int stateOrProduction;
public Action(ActionType type, int stateOrProduction)
{
this.type = type; this.stateOrProduction = stateOrProduction;
}
public override int GetHashCode()
{
return type.GetHashCode() ^ stateOrProduction.GetHashCode();
}
public override bool Equals(object obj)
{
if (!(obj is Action)) return false;
Action that = obj as Action;
return this.type == that.type && this.stateOrProduction == that.stateOrProduction;
}
public override string ToString()
{
return type + " " + stateOrProduction;
}
}
protected class TokenType
{
public int type;
public Regex regex;
public Lexer lexer;
public TokenType(int type, Regex regex, Lexer lexer)
{
this.type = type; this.regex = regex; this.lexer = lexer;
}
}
}
}