/////////////////////////////////////////////////////////////////////////////// // // Microsoft Research Singularity // // Copyright (c) Microsoft Corporation. All rights reserved. // /////////////////////////////////////////////////////////////////////////////// using System; using System.Collections; using System.Text.RegularExpressions; namespace Microsoft.Singularity.Applications { /** * This is a general lr parser driver. * Parses the programming by first lexing it into a number * of tokens for a token stack. Then parser then pops tokens off * this stack, consults the parse tables and push the corresponding * nonterminals and states on the operand stack. This is the same * algorithm from the dragon book with some tweaks to support optional * tokens (i.e. newlines). **/ abstract class Parser { private const int END_INPUT_MARKER_ID = 2; protected Action[,] actionTable = null; protected State[,] gotoTable = null; protected Production[] productionTable = null; protected TokenType[] tokenList = null; public class ParseException : Exception { public ParseException(Token token, String error) : base("Parse error on line " + token.lineNumber + ", char " + token.charIndex + ": " + error) { } } public Object parse(String input) { Stack leftStack = new Stack(); leftStack.Push(new State(0)); ArrayList list = Lex(ref input); list.Reverse(); //could return a stack out of lex instead Stack rightStack = new Stack(list); while (rightStack.Count != 0) { State st = (State)leftStack.Pop(); Token tok = (Token)rightStack.Pop(); Action action = actionTable[st.id, tok.id]; while (tok.optional && action == null && rightStack.Count != 0) { tok = (Token)rightStack.Pop(); action = actionTable[st.id, tok.id]; } if (action == null) { if (tok.id == END_INPUT_MARKER_ID) { throw new ParseException(tok, " unexpected EOF"); } throw new ParseException(tok, tok.value + " unexpected input"); } if (action.type == ActionType.SHIFT) { leftStack.Push(st); leftStack.Push(tok); leftStack.Push(new State(action.stateOrProduction)); } else if (action.type == ActionType.REDUCE) { Production production = productionTable[action.stateOrProduction]; Object value = production.reduction(leftStack); StackElement topLeft = (StackElement)leftStack.Pop(); State previousState; if (topLeft is State) { previousState = (State)topLeft; } else { leftStack.Push(topLeft); previousState = st; /* took an epsilon transition */ } State nextState = gotoTable[previousState.id, production.nonterminalType]; if (nextState == null) throw new Exception("missing state"); leftStack.Push(previousState); leftStack.Push(new Nonterminal(production.nonterminalType, value)); leftStack.Push(nextState); rightStack.Push(tok); } else if (action.type == ActionType.ACCEPT) { break; } } Nonterminal result = (Nonterminal)leftStack.Pop(); return result.value; } private static int CountOccurrences(String input, char c, out int last) { int count = 0; last = -1; for (int i = 0; i < input.Length; ++i) { if (c == input[i]) { ++count; last = i; } } return count; } private static bool IsNewLine(char t) { return t == '\n'; } public ArrayList Lex(ref String input) { ArrayList tokens = new ArrayList(); int lineNumber = 1; int charIndex = 1; outer: while (input.Length != 0) { foreach (TokenType spec in tokenList) { Match match = spec.regex.Match(input); if (match == null || !match.Success) continue; input = input.Remove(0, match.Value.Length); if (spec.lexer != null) { Token token = new Token(spec.type, match.Value, lineNumber, charIndex); spec.lexer(token); if (token.value != null) tokens.Add(token); } else { tokens.Add(new Token(spec.type, match.Value, lineNumber, charIndex)); } int lastIndex; int occurrences = CountOccurrences(match.Value, '\n', out lastIndex); if (occurrences > 0) { lineNumber += occurrences; charIndex = match.Value.Length - (lastIndex + 1); } else { charIndex += match.Value.Length; } goto outer; } throw new Exception("unknown input: " + input); } tokens.Add(new Token(END_INPUT_MARKER_ID, null, lineNumber, charIndex)); return tokens; } public delegate Object Reducer(Stack stack); public delegate void Lexer(Token tok); public class Production { public int nonterminalType; public Reducer reduction; public Production(int nonterminalType, Reducer reduction) { this.nonterminalType = nonterminalType; this.reduction = reduction; } } public class StackElement { public readonly int id; public Object value; public StackElement(int id) { this.id = id; } public override string ToString() { return id.ToString(); } } public class State : StackElement { public State(int number) : base(number) { } } public class Token : StackElement { public readonly int lineNumber; public readonly int charIndex; public bool optional = false; public Token(int type, Object value, int lineNumber, int charIndex) : base(type) { this.value = value; this.lineNumber = lineNumber; this.charIndex = charIndex; } public override string ToString() { return value + ":" + id; } } private class Nonterminal : StackElement { public Nonterminal(int type, Object value) : base(type) { this.value = value; } } public enum ActionType { SHIFT, REDUCE, ACCEPT } public class Action { public ActionType type; public int stateOrProduction; public Action(ActionType type, int stateOrProduction) { this.type = type; this.stateOrProduction = stateOrProduction; } public override int GetHashCode() { return type.GetHashCode() ^ stateOrProduction.GetHashCode(); } public override bool Equals(object obj) { if (!(obj is Action)) return false; Action that = obj as Action; return this.type == that.type && this.stateOrProduction == that.stateOrProduction; } public override string ToString() { return type + " " + stateOrProduction; } } protected class TokenType { public int type; public Regex regex; public Lexer lexer; public TokenType(int type, Regex regex, Lexer lexer) { this.type = type; this.regex = regex; this.lexer = lexer; } } } }