singrdk/base/Applications/Shell/Parser.cs

243 lines
9.8 KiB
C#
Raw Permalink Normal View History

2008-11-17 18:29:00 -05:00
// ----------------------------------------------------------------------------
2008-03-05 09:52:00 -05:00
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
2008-11-17 18:29:00 -05:00
// ----------------------------------------------------------------------------
2008-03-05 09:52:00 -05:00
using System;
using System.Collections;
using System.Text.RegularExpressions;
using Microsoft.Contracts;
namespace Microsoft.Singularity.Applications
{
2008-11-17 18:29:00 -05:00
///
// This is a general lr parser driver.
// Parses the programming by first lexing it into a number
// of tokens for a token stack. Then parser then pops tokens off
// this stack, consults the parse tables and push the corresponding
// nonterminals and states on the operand stack. This is the same
// algorithm from the dragon book with some tweaks to support optional
// tokens (i.e. newlines).
//
2008-03-05 09:52:00 -05:00
abstract class Parser
{
private const int END_INPUT_MARKER_ID = 2;
protected abstract Action[,]! ActionTable { get; }
protected abstract State[,]! GotoTable { get; }
protected abstract Production[]! ProductionTable { get; }
protected abstract TokenType[]! TokenList { get; }
public class ParseException : Exception
{
public ParseException(Token! token, string! error)
: base("Parse error on line " + token.lineNumber + ", char " + token.charIndex + ": " + error) { }
}
public Object Parse(string! input)
{
Stack leftStack = new Stack();
leftStack.Push(new State(0));
ArrayList list = Lex(ref input);
list.Reverse(); //could return a stack out of lex instead
Stack rightStack = new Stack(list);
while (rightStack.Count != 0) {
State st = (State!)leftStack.Pop();
Token tok = (Token!)rightStack.Pop();
Action action = ActionTable[st.id, tok.id];
while (tok.optional && action == null && rightStack.Count != 0) {
tok = (Token!)rightStack.Pop();
action = ActionTable[st.id, tok.id];
}
if (action == null) {
if (tok.id == END_INPUT_MARKER_ID) {
throw new ParseException(tok, " unexpected EOF");
}
throw new ParseException(tok, "'" + tok.value + "' unexpected input");
}
if (action.type == ActionType.SHIFT) {
leftStack.Push(st);
leftStack.Push(tok);
leftStack.Push(new State(action.stateOrProduction));
2008-11-17 18:29:00 -05:00
}
else if (action.type == ActionType.REDUCE) {
2008-03-05 09:52:00 -05:00
Production production = (!)ProductionTable[action.stateOrProduction];
Object value = production.Reduction(leftStack);
StackElement topLeft = (StackElement)leftStack.Pop();
State previousState;
if (topLeft is State) {
previousState = (State)topLeft;
2008-11-17 18:29:00 -05:00
}
else {
2008-03-05 09:52:00 -05:00
leftStack.Push(topLeft);
2008-11-17 18:29:00 -05:00
previousState = st; // took an epsilon transition
2008-03-05 09:52:00 -05:00
}
State nextState = GotoTable[previousState.id, production.NonterminalType];
if (nextState == null) throw new Exception("missing state");
leftStack.Push(previousState);
leftStack.Push(new Nonterminal(production.NonterminalType, value));
leftStack.Push(nextState);
rightStack.Push(tok);
2008-11-17 18:29:00 -05:00
}
else if (action.type == ActionType.ACCEPT) {
2008-03-05 09:52:00 -05:00
break;
}
}
Nonterminal result = (Nonterminal!)leftStack.Pop();
return result.value;
}
private static int CountOccurrences(string! input, char c, out int last)
{
int count = 0;
last = -1;
for (int i = 0; i < input.Length; ++i) {
if (c == input[i]) {
++count;
last = i;
}
}
return count;
}
private static bool IsNewLine(char t) { return t == '\n'; }
public ArrayList! Lex(ref string! input)
{
ArrayList tokens = new ArrayList();
int lineNumber = 1;
int charIndex = 1;
outer:
while (input.Length != 0) {
foreach (TokenType! spec in TokenList) {
Match! match = (!)spec.Regex.Match(input);
if (match == null || !match.Success) continue;
input = input.Remove(0, ((!)match.Value).Length);
if (spec.Lexer != null) {
Token token = new Token(spec.Type, match.Value, lineNumber, charIndex);
spec.Lexer(token);
if (token.value != null)
tokens.Add(token);
2008-11-17 18:29:00 -05:00
}
else {
2008-03-05 09:52:00 -05:00
tokens.Add(new Token(spec.Type, match.Value, lineNumber, charIndex));
}
int lastIndex;
int occurrences = CountOccurrences(match.Value, '\n', out lastIndex);
if (occurrences > 0) {
lineNumber += occurrences;
charIndex = match.Value.Length - (lastIndex + 1);
2008-11-17 18:29:00 -05:00
}
else {
2008-03-05 09:52:00 -05:00
charIndex += match.Value.Length;
}
goto outer;
}
throw new Exception("unknown input: " + input);
}
tokens.Add(new Token(END_INPUT_MARKER_ID, null, lineNumber, charIndex));
return tokens;
}
public delegate Object Reducer(Stack! stack);
public delegate void Lexer(Token! tok);
public class Production
{
public readonly int NonterminalType;
public readonly Reducer Reduction;
public Production(int nonterminalType, [Delayed] Reducer reduction)
{
this.NonterminalType = nonterminalType;
this.Reduction = reduction;
}
}
public class StackElement
{
public readonly int id;
public Object value;
public StackElement(int id) { this.id = id; }
2008-11-17 18:29:00 -05:00
#if false
// Error: Bartok can not handle complicated override pattern of System.Object.ToString. [Specifically, it was overridden via a MethodImpl or replaced via interface method reimplementation, effectively merging two virtual slots. Then the original slot was overridden by Microsoft.Singularity.Applications.Parser.Token.ToString, effectively splitting the slots again. Bartok currently can not analyze the split.]
2008-03-05 09:52:00 -05:00
public override string! ToString()
{
return id.ToString();
}
2008-11-17 18:29:00 -05:00
#endif
2008-03-05 09:52:00 -05:00
}
public class State : StackElement
{
public State(int number) : base(number) { }
}
public class Token : StackElement
{
public readonly int lineNumber;
public readonly int charIndex;
public bool optional = false;
public Token(int type, Object value, int lineNumber, int charIndex)
: base(type)
{
this.value = value;
this.lineNumber = lineNumber;
this.charIndex = charIndex;
this.optional = optional;
}
2008-11-17 18:29:00 -05:00
#if false
// Error: Bartok can not handle complicated override pattern of System.Object.ToString. [Specifically, it was overridden via a MethodImpl or replaced via interface method reimplementation, effectively merging two virtual slots. Then the original slot was overridden by Microsoft.Singularity.Applications.Parser.Token.ToString, effectively splitting the slots again. Bartok currently can not analyze the split.]
2008-03-05 09:52:00 -05:00
public override string! ToString()
{
return value + ":" + id;
}
2008-11-17 18:29:00 -05:00
#endif
2008-03-05 09:52:00 -05:00
}
private class Nonterminal : StackElement
{
public Nonterminal(int type, Object value) : base(type) { this.value = value; }
}
public enum ActionType { SHIFT, REDUCE, ACCEPT }
public class Action
{
public ActionType type;
public int stateOrProduction;
public Action(ActionType type, int stateOrProduction)
{
this.type = type; this.stateOrProduction = stateOrProduction;
}
public override int GetHashCode()
{
return type.GetHashCode() ^ stateOrProduction.GetHashCode();
}
public override bool Equals(object obj)
{
Action that = obj as Action;
if (that == null) return false;
return this.type == that.type && this.stateOrProduction == that.stateOrProduction;
}
public override string! ToString()
{
return type + " " + stateOrProduction;
}
}
protected class TokenType
{
public readonly int Type;
public readonly Regex! Regex;
public readonly Lexer Lexer;
public TokenType(int type, Regex regex, [Delayed] Lexer lexer)
{
this.Type = type; this.Regex = (!)regex; this.Lexer = lexer;
}
}
}
}