243 lines
9.8 KiB
C#
243 lines
9.8 KiB
C#
// ----------------------------------------------------------------------------
|
|
//
|
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
//
|
|
// ----------------------------------------------------------------------------
|
|
|
|
using System;
|
|
using System.Collections;
|
|
using System.Text.RegularExpressions;
|
|
using Microsoft.Contracts;
|
|
|
|
namespace Microsoft.Singularity.Applications
|
|
{
|
|
///
|
|
// This is a general lr parser driver.
|
|
// Parses the programming by first lexing it into a number
|
|
// of tokens for a token stack. Then parser then pops tokens off
|
|
// this stack, consults the parse tables and push the corresponding
|
|
// nonterminals and states on the operand stack. This is the same
|
|
// algorithm from the dragon book with some tweaks to support optional
|
|
// tokens (i.e. newlines).
|
|
//
|
|
abstract class Parser
|
|
{
|
|
private const int END_INPUT_MARKER_ID = 2;
|
|
|
|
protected abstract Action[,]! ActionTable { get; }
|
|
protected abstract State[,]! GotoTable { get; }
|
|
protected abstract Production[]! ProductionTable { get; }
|
|
protected abstract TokenType[]! TokenList { get; }
|
|
|
|
|
|
public class ParseException : Exception
|
|
{
|
|
public ParseException(Token! token, string! error)
|
|
: base("Parse error on line " + token.lineNumber + ", char " + token.charIndex + ": " + error) { }
|
|
}
|
|
|
|
public Object Parse(string! input)
|
|
{
|
|
Stack leftStack = new Stack();
|
|
leftStack.Push(new State(0));
|
|
|
|
ArrayList list = Lex(ref input);
|
|
list.Reverse(); //could return a stack out of lex instead
|
|
Stack rightStack = new Stack(list);
|
|
while (rightStack.Count != 0) {
|
|
State st = (State!)leftStack.Pop();
|
|
Token tok = (Token!)rightStack.Pop();
|
|
Action action = ActionTable[st.id, tok.id];
|
|
while (tok.optional && action == null && rightStack.Count != 0) {
|
|
tok = (Token!)rightStack.Pop();
|
|
action = ActionTable[st.id, tok.id];
|
|
}
|
|
|
|
if (action == null) {
|
|
if (tok.id == END_INPUT_MARKER_ID) {
|
|
throw new ParseException(tok, " unexpected EOF");
|
|
}
|
|
throw new ParseException(tok, "'" + tok.value + "' unexpected input");
|
|
}
|
|
|
|
if (action.type == ActionType.SHIFT) {
|
|
leftStack.Push(st);
|
|
leftStack.Push(tok);
|
|
leftStack.Push(new State(action.stateOrProduction));
|
|
}
|
|
else if (action.type == ActionType.REDUCE) {
|
|
Production production = (!)ProductionTable[action.stateOrProduction];
|
|
Object value = production.Reduction(leftStack);
|
|
StackElement topLeft = (StackElement)leftStack.Pop();
|
|
State previousState;
|
|
if (topLeft is State) {
|
|
previousState = (State)topLeft;
|
|
}
|
|
else {
|
|
leftStack.Push(topLeft);
|
|
previousState = st; // took an epsilon transition
|
|
}
|
|
State nextState = GotoTable[previousState.id, production.NonterminalType];
|
|
if (nextState == null) throw new Exception("missing state");
|
|
leftStack.Push(previousState);
|
|
leftStack.Push(new Nonterminal(production.NonterminalType, value));
|
|
leftStack.Push(nextState);
|
|
rightStack.Push(tok);
|
|
}
|
|
else if (action.type == ActionType.ACCEPT) {
|
|
break;
|
|
}
|
|
}
|
|
Nonterminal result = (Nonterminal!)leftStack.Pop();
|
|
return result.value;
|
|
}
|
|
|
|
private static int CountOccurrences(string! input, char c, out int last)
|
|
{
|
|
int count = 0;
|
|
last = -1;
|
|
for (int i = 0; i < input.Length; ++i) {
|
|
if (c == input[i]) {
|
|
++count;
|
|
last = i;
|
|
}
|
|
}
|
|
return count;
|
|
}
|
|
|
|
private static bool IsNewLine(char t) { return t == '\n'; }
|
|
public ArrayList! Lex(ref string! input)
|
|
{
|
|
ArrayList tokens = new ArrayList();
|
|
int lineNumber = 1;
|
|
int charIndex = 1;
|
|
outer:
|
|
while (input.Length != 0) {
|
|
foreach (TokenType! spec in TokenList) {
|
|
Match! match = (!)spec.Regex.Match(input);
|
|
if (match == null || !match.Success) continue;
|
|
input = input.Remove(0, ((!)match.Value).Length);
|
|
if (spec.Lexer != null) {
|
|
Token token = new Token(spec.Type, match.Value, lineNumber, charIndex);
|
|
spec.Lexer(token);
|
|
if (token.value != null)
|
|
tokens.Add(token);
|
|
}
|
|
else {
|
|
tokens.Add(new Token(spec.Type, match.Value, lineNumber, charIndex));
|
|
}
|
|
int lastIndex;
|
|
int occurrences = CountOccurrences(match.Value, '\n', out lastIndex);
|
|
if (occurrences > 0) {
|
|
lineNumber += occurrences;
|
|
charIndex = match.Value.Length - (lastIndex + 1);
|
|
}
|
|
else {
|
|
charIndex += match.Value.Length;
|
|
}
|
|
goto outer;
|
|
}
|
|
throw new Exception("unknown input: " + input);
|
|
}
|
|
tokens.Add(new Token(END_INPUT_MARKER_ID, null, lineNumber, charIndex));
|
|
return tokens;
|
|
}
|
|
|
|
public delegate Object Reducer(Stack! stack);
|
|
public delegate void Lexer(Token! tok);
|
|
|
|
public class Production
|
|
{
|
|
public readonly int NonterminalType;
|
|
public readonly Reducer Reduction;
|
|
public Production(int nonterminalType, [Delayed] Reducer reduction)
|
|
{
|
|
this.NonterminalType = nonterminalType;
|
|
this.Reduction = reduction;
|
|
}
|
|
}
|
|
|
|
public class StackElement
|
|
{
|
|
public readonly int id;
|
|
public Object value;
|
|
public StackElement(int id) { this.id = id; }
|
|
#if false
|
|
// Error: Bartok can not handle complicated override pattern of System.Object.ToString. [Specifically, it was overridden via a MethodImpl or replaced via interface method reimplementation, effectively merging two virtual slots. Then the original slot was overridden by Microsoft.Singularity.Applications.Parser.Token.ToString, effectively splitting the slots again. Bartok currently can not analyze the split.]
|
|
public override string! ToString()
|
|
{
|
|
return id.ToString();
|
|
}
|
|
#endif
|
|
}
|
|
public class State : StackElement
|
|
{
|
|
public State(int number) : base(number) { }
|
|
|
|
}
|
|
public class Token : StackElement
|
|
{
|
|
public readonly int lineNumber;
|
|
public readonly int charIndex;
|
|
public bool optional = false;
|
|
public Token(int type, Object value, int lineNumber, int charIndex)
|
|
: base(type)
|
|
{
|
|
this.value = value;
|
|
this.lineNumber = lineNumber;
|
|
this.charIndex = charIndex;
|
|
this.optional = optional;
|
|
}
|
|
|
|
#if false
|
|
// Error: Bartok can not handle complicated override pattern of System.Object.ToString. [Specifically, it was overridden via a MethodImpl or replaced via interface method reimplementation, effectively merging two virtual slots. Then the original slot was overridden by Microsoft.Singularity.Applications.Parser.Token.ToString, effectively splitting the slots again. Bartok currently can not analyze the split.]
|
|
public override string! ToString()
|
|
{
|
|
return value + ":" + id;
|
|
}
|
|
#endif
|
|
}
|
|
private class Nonterminal : StackElement
|
|
{
|
|
public Nonterminal(int type, Object value) : base(type) { this.value = value; }
|
|
}
|
|
|
|
public enum ActionType { SHIFT, REDUCE, ACCEPT }
|
|
public class Action
|
|
{
|
|
public ActionType type;
|
|
public int stateOrProduction;
|
|
public Action(ActionType type, int stateOrProduction)
|
|
{
|
|
this.type = type; this.stateOrProduction = stateOrProduction;
|
|
}
|
|
public override int GetHashCode()
|
|
{
|
|
return type.GetHashCode() ^ stateOrProduction.GetHashCode();
|
|
}
|
|
public override bool Equals(object obj)
|
|
{
|
|
Action that = obj as Action;
|
|
if (that == null) return false;
|
|
return this.type == that.type && this.stateOrProduction == that.stateOrProduction;
|
|
}
|
|
|
|
public override string! ToString()
|
|
{
|
|
return type + " " + stateOrProduction;
|
|
}
|
|
}
|
|
|
|
protected class TokenType
|
|
{
|
|
public readonly int Type;
|
|
public readonly Regex! Regex;
|
|
public readonly Lexer Lexer;
|
|
public TokenType(int type, Regex regex, [Delayed] Lexer lexer)
|
|
{
|
|
this.Type = type; this.Regex = (!)regex; this.Lexer = lexer;
|
|
}
|
|
}
|
|
}
|
|
}
|