singrdk/base/Windows/spg/Generator.cs

1134 lines
50 KiB
C#

// ----------------------------------------------------------------------------
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ----------------------------------------------------------------------------
using System;
using System.Collections;
using System.Text;
using System.IO;
using System.Reflection;
using System.Text.RegularExpressions;
namespace Microsoft.Singularity.Applications
{
class Generator
{
private String specFile;
private String outputFile;
private String className;
private String targetNamespace;
private const String fieldsSectionHeader = "[fields]";
private String fieldsSectionCode;
private static bool compileRegex = false;
private const String initSection = "[init]";
private const String tokensSection = "[tokens]";
private const String grammarSection = "[grammar]";
private const String EPSILON = "epsilon";
private const int EPSILON_ID = 1;
private Terminal epsilonTerm;
private const String END_MARKER = "$";
private const int END_MARKER_ID = 2;
private Terminal endMarkerTerm;
private ArrayList terminalList;
private Hashtable productionStrings;
private Hashtable grammarStringTable;
private Hashtable precedenceTable;
private Symbol[] grammarTable;
private int numTerminals = 0;
public Generator(String specFile,
String targetNamespace,
String className,
String outputFile)
{
this.specFile = specFile;
this.targetNamespace = targetNamespace;
this.className = className;
this.outputFile = outputFile;
terminalList = new System.Collections.ArrayList();
productionStrings = new Hashtable();
grammarStringTable = new Hashtable();
precedenceTable = new Hashtable();
}
public static void Main(String[] args)
{
if (args.Length < 4) {
Console.WriteLine("Usage: spg specfile targetNamespace targetClass outputFile");
return;
}
//ScriptEngine.Run(fileString, new ScriptEngine.CommandLineRunner(nothing));
Generator gen = new Generator(args[0], args[1], args[2], args[3]);
gen.Generate();
}
public static int nothing(String[] args, bool isBackground){ return 0; }
private static Regex comment = new Regex("#.*"); //will eat more than it should
public void Generate()
{
String actionTableCode, gotoTableCode, tokenTypeTableCode, lexerActionMethodsCode,
productionTable, productionActionMethods;
epsilonTerm = new Terminal(EPSILON, 1);
grammarStringTable[EPSILON] = epsilonTerm;
numTerminals++;
endMarkerTerm = new Terminal(END_MARKER, 2);
grammarStringTable[END_MARKER] = epsilonTerm;
numTerminals++;
ParseSpecification();
GenerateGrammarTable();
GenerateParseTables(out actionTableCode, out gotoTableCode, out tokenTypeTableCode, out lexerActionMethodsCode, out productionTable, out productionActionMethods);
DerivedParserClassTemplate template = GenerateParserClass(actionTableCode, gotoTableCode, tokenTypeTableCode, lexerActionMethodsCode, productionTable, productionActionMethods);
WriteClassFile(template);
}
private void ParseSpecification()
{
FileStream file = new FileStream(specFile, FileMode.Open, FileAccess.Read);
StreamReader fr = new StreamReader(file);
String fileString = fr.ReadToEnd();
StringReader sr = new StringReader(fileString);
String line;
bool parsedTokens = false, parsedGrammar = false;
while ((line = sr.ReadLine()) != null) {
line = line.Trim();
if (line.Length == 0) continue;
if (line[0] == '#') continue; //skip comments
switch (line) {
case fieldsSectionHeader:
ParseFieldsSection(sr);
break;
case initSection:
ParseInitSection(sr);
break;
case tokensSection:
ParseTokenSection(sr);
parsedTokens = true;
break;
case grammarSection:
ParseGrammarSection(sr);
parsedGrammar = true;
break;
default:
continue;
}
}
sr.Close();
file.Close();
if (!parsedTokens) {
throw new Exception("missing lexical spec");
}
else if (!parsedGrammar) {
throw new Exception("missing grammar spec");
}
}
private void GenerateGrammarTable()
{
grammarTable = new Symbol[Symbol.idCount];
grammarTable[1] = epsilonTerm;
grammarTable[2] = endMarkerTerm;
foreach (Terminal token in terminalList) {
grammarTable[token.id] = token;
}
foreach (System.Collections.DictionaryEntry entry in productionStrings) {
String name = entry.Key as String;
Nonterminal nonterm = grammarStringTable[name] as Nonterminal;
grammarTable[nonterm.id] = nonterm;
System.Collections.ArrayList prods = entry.Value as System.Collections.ArrayList;
int[][] rhss = new int[prods.Count][];
String[][] allBindings = new string[prods.Count][];
ProductionAction[] actions = new ProductionAction[prods.Count];
int i = 0;
foreach (ProductionString prod in prods) {
actions[i] = new ProductionAction(this, nonterm.id, i, prod.action);
ArrayList elems = new System.Collections.ArrayList();
ArrayList bindingsList = new ArrayList();
foreach (String elemAndBinding in prod.rhs.Split(new char[] { ' ' })) {
if (elemAndBinding == "") continue;
String[] split = elemAndBinding.Split(":".ToCharArray());
if (split[0] == EPSILON) continue;
Symbol sym = grammarStringTable[split[0]] as Symbol;
if (sym == null) throw new Exception("Undefined symbol: " + split[0]);
elems.Add(sym.id);
String binding = null;
if (split.Length > 1) {
binding = split[1];
}
bindingsList.Add(binding);
}
int[] rhs = new int[elems.Count];
for (int j = 0; j < rhs.Length; ++j) {
rhs[j] = (int)elems[j];
}
rhss[i] = rhs;
String[] bindings = new String[bindingsList.Count];
for (int j = 0; j < bindings.Length; ++j) {
bindings[j] = (String)bindingsList[j];
}
allBindings[i] = bindings;
++i;
}
nonterm.rhss = rhss;
nonterm.bindings = allBindings;
nonterm.actions = actions;
}
}
private void WriteClassFile(DerivedParserClassTemplate template)
{
String classText = template.Dump();
FileStream file = new FileStream(outputFile, FileMode.Create, FileAccess.Write);
StreamWriter sw = new StreamWriter(file);
sw.Write(classText);
sw.Close();
file.Close();
}
private DerivedParserClassTemplate GenerateParserClass(String actionTableCode, String gotoTableCode, String tokenTypeTableCode, String lexerActionMethodsCode, String productionTable, String productionActionMethods)
{
DerivedParserClassTemplate template = new DerivedParserClassTemplate(targetNamespace, className);
template.AddUsingCode("using System;\r\nusing System.Collections;\r\nusing System.Text;\r\nusing System.IO;\r\nusing System.Text.RegularExpressions;\r\nusing Microsoft.Contracts;\r\n");
template.AddFieldCode(fieldsSectionCode);
template.AddConstructorCode(actionTableCode + gotoTableCode + tokenTypeTableCode + productionTable);
template.AddMethodCode(lexerActionMethodsCode + productionActionMethods);
template.AddDefaultMembers();
return template;
}
private void GenerateParseTables(out String actionTableCode, out String gotoTableCode, out String tokenTypeTableCode, out String lexerActionMethodsCode, out String productionTable, out String productionActionMethods)
{
ISet setsOfItems = ComputeSetsOfItems();
computeFirst();
computeFollow();
String parserType = "Parser.";
ArrayList productions;
Parser.Action[][] actionTable = computeActionTable(setsOfItems, out productions);
actionTableCode = GetProductionActionTableCode(parserType, "actionTable", actionTable);
Parser.State[][] gotoTable = computeGotoTable(setsOfItems);
gotoTableCode = GetGotoTableCode(parserType, "gotoTable", gotoTable);
GetTokenTypeTableCode("tokenList", out tokenTypeTableCode, out lexerActionMethodsCode);
GetProductionListCode(parserType, productions, out productionTable, out productionActionMethods);
}
public class DerivedParserClassTemplate
{
private String nspace;
private String className;
StringBuilder usings;
StringBuilder derivations;
StringBuilder fields;
StringBuilder constructor;
StringBuilder methods;
public DerivedParserClassTemplate(String nspace, String className)
{
this.nspace = nspace; this.className = className;
usings = new StringBuilder();
derivations = new StringBuilder(",");
fields = new StringBuilder();
constructor = new StringBuilder();
methods = new StringBuilder();
}
public void AddDefaultMembers() {
fields.Append(" private Action[,]! actionTable;\n");
fields.Append(" private State[,]! gotoTable;\n");
fields.Append(" private Production[]! productionTable;\n");
fields.Append(" private TokenType[]! tokenList;\n");
methods.Append(" protected override Action[,]! ActionTable { get { return this.actionTable; } }\n");
methods.Append(" protected override State[,]! GotoTable { get { return this.gotoTable; } }\n");
methods.Append(" protected override Production[]! ProductionTable { get { return this.productionTable; } }\n");
methods.Append(" protected override TokenType[]! TokenList { get { return this.tokenList; } }\n");
}
public void AddDerivationCode(String code)
{
derivations.Append(code);
}
public void AddMethodCode(String code){
methods.Append(code);
}
public void AddFieldCode(String code)
{
fields.Append(code);
}
public void AddConstructorCode(String code)
{
constructor.Append(code);
}
public void AddUsingCode(String code)
{
usings.Append(code);
}
public String Dump()
{
StringBuilder dump = new StringBuilder();
dump.Append("//This file is automatically generated\r\n" + usings + "\r\n" +
"namespace " + nspace + "{\r\n"
+ "\r\n class " + className + " : Parser" + "\r\n" +
"{\r\n" +
fields +
// "[NotDelayed]\r\n" +
"public " + className + "(){\r\n " + constructor +
";\r\n" +
//"base();\r\n" +
"}\r\n" +
methods +
"\t}\r\n" +
"}");
return dump.ToString();
}
}
private void GetProductionListCode(String parserTypeString, ArrayList productions, out String productionTable, out String productionActionMethods)
{
parserTypeString = "";
StringBuilder array = new StringBuilder();
StringBuilder methods = new StringBuilder();
int i = 0;
array.Append(parserTypeString + "productionTable = new[Delayed] Production[]{");
foreach (ProductionAction action in productions) {
String methodName = "production_action_" + i;
StringBuilder method = new StringBuilder();
method.Append("private Object " + methodName + "(Stack! stack) {\r\n");
method.Append("\tObject value = null;\r\n");
String[] bindings = (grammarTable[action.nonterm] as Nonterminal).bindings[action.production];
for (int j = bindings.Length - 1; j >= 0; --j) {
if (bindings[j] != null) {
method.Append("\tObject " + bindings[j] + " = ( (StackElement!) stack.Pop()).value;");
}
else{
method.Append("\tstack.Pop(); //pop non binding element");
}
method.Append(j != 0 ? "\r\n\tstack.Pop(); //pop state\r\n" : "");
}
method.Append("\r\n\t\t//user code:\r\n" + action.action);
method.Append("\r\n\treturn value;");
method.Append("\r\n}\r\n");
methods.Append(method);
array.Append("new " + parserTypeString + "Production( " + action.nonterm + ", new " + parserTypeString + "Reducer(this." + methodName + ")),");
++i;
}
array.Remove(array.Length - 1, 1);
array.Append("};\r\n");
productionTable = array.ToString();
productionActionMethods = methods.ToString();
}
private String lexActionMethodPrefix = "LexActionMethod_";
private void GetTokenTypeTableCode(String tableName, out String tokenTableCode, out String lexerMethodsCode)
{
StringBuilder sb = new StringBuilder();
StringBuilder methods = new StringBuilder();
sb.Append(tableName + " = new[Delayed] TokenType[] \r\n\t\t{");
int i = 0;
int regExId = 0;
RegexCompilationInfo[] regexCompInfo = new RegexCompilationInfo[terminalList.Count];
foreach (Terminal term in terminalList) {
String methodName = lexActionMethodPrefix + i;
String id = term.id.ToString();
if (compileRegex) {
regexCompInfo[regExId] = new RegexCompilationInfo(term.regex, RegexOptions.None, "LexRegex" + regExId, targetNamespace, true);
}
String regex = compileRegex ? "(new LexRegex" + regExId++ + "())" : "new Regex(@" + term.regex + ")" ;
String lexer;
if (term.action != null && term.action != "") {
String method =
"\tprivate void " + methodName + "(Token! tok) { \r\n" + term.action + "\r\n\t}\r\n";
methods.Append(method);
lexer = "new Lexer(this." + methodName + ")";
++i;
}
else {
lexer = "null";
}
sb.Append("new TokenType(" + id + "," + regex + "," + lexer + " ),");
}
if (terminalList.Count != 0)
sb.Remove(sb.Length - 1, 1);
sb.Append("}\r\n;");
if (compileRegex) {
AssemblyName assembly = new AssemblyName();
assembly.Name = "LexerRegex";
Regex.CompileToAssembly(regexCompInfo, assembly);
}
tokenTableCode = sb.ToString();
lexerMethodsCode = methods.ToString();
}
private String GetGotoTableCode(String parserTypeString, String tableName, Parser.State[][] gotoTable)
{
StringBuilder sb = new StringBuilder();
sb.Append(tableName + " = new State[,]\r\n\t{\r\n");
foreach (Parser.State[] row in gotoTable) {
sb.Append("\r\n\t\t{");
foreach (Parser.State state in row) {
if (state == null) {
sb.Append("null,");
continue;
}
sb.Append("new " + parserTypeString + "State(" + state.id.ToString() + "),");
}
if (row.Length != 0)
sb.Remove(sb.Length - 1, 1);
sb.Append("},");
}
if (gotoTable.Length != 0)
sb.Remove(sb.Length - 1, 1);
sb.Append("\r\n\t};\r\n");
return sb.ToString();
}
private Parser.State[][] computeGotoTable(ISet setsOfItems)
{
Parser.State[][] gotoTable = new Parser.State[setsOfItems.Count][];
Hashtable/*ISet -> int*/ stateTable = new Hashtable();
int i = 0;
foreach (ISet set in setsOfItems) {
gotoTable[i] = new Parser.State[Symbol.idCount];
stateTable[set] = i++;
}
foreach (ISet items in setsOfItems) {
foreach (Symbol symbol in grammarTable) {
if (symbol is Terminal) {
gotoTable[(int)stateTable[items]][symbol.id] = null;
continue;
}
ISet gotoSet = computeGoto(items, symbol);
if (gotoSet.Count == 0) continue;
Object gotoState = stateTable[gotoSet];
if (gotoState == null) throw new Exception("missing state"); ;
gotoTable[(int)stateTable[items]][symbol.id] = new Parser.State((int)gotoState);
}
}
return gotoTable;
}
private String GetProductionActionTableCode(String parserTypeString, String tableName, Parser.Action[][] actionTable)
{
StringBuilder sb = new StringBuilder();
sb.Append(tableName);
sb.Append(" = new Parser.Action[,]\r\n\t{\r\n");
foreach (Parser.Action[] row in actionTable) {
sb.Append("\r\n\t\t{");
foreach (Parser.Action action in row) {
if (action == null) {
sb.Append("null,");
continue;
}
sb.Append("new " + parserTypeString + "Action(");
String actionType = "";
switch (action.type) {
case Parser.ActionType.ACCEPT:
actionType = parserTypeString + "ActionType.ACCEPT";
break;
case Parser.ActionType.REDUCE:
actionType = parserTypeString + "ActionType.REDUCE";
break;
case Parser.ActionType.SHIFT:
actionType = parserTypeString + "ActionType.SHIFT";
break;
default:
throw new Exception("missed action type");
}
sb.Append(actionType + "," + action.stateOrProduction.ToString() + "),");
}
if (row.Length != 0)
sb.Remove(sb.Length - 1, 1);
sb.Append("},");
}
if (actionTable.Length != 0)
sb.Remove(sb.Length - 1, 1);
sb.Append("\r\n\t};\r\n");
return sb.ToString();
}
private Parser.Action[][] computeActionTable(ISet setsOfItems, out ArrayList productions)
{
//TODO: could be reduced since we only consult on terminals
productions = new ArrayList();
Parser.Action[][] actionTable = new Parser.Action[setsOfItems.Count][];
Hashtable/*ISet -> int*/ stateTable = new Hashtable();
Object[][] productionCacheTable = new Object[ Symbol.idCount][];
int i = 0;
foreach (ISet set in setsOfItems) {
actionTable[i] = new Parser.Action[Symbol.idCount];
stateTable[set] = i++;
}
foreach (ISet items in setsOfItems) {
foreach (Item item in items) {
Nonterminal nonterm = grammarTable[item.nonterm] as Nonterminal;
int[] production = nonterm.rhss[item.production];
if (item.position == production.Length) {
if (item.nonterm == 0) {
Object obj = stateTable[items];
if (obj == null) throw new Exception("Missing state:" + items);
if (actionTable[(int)obj][END_MARKER_ID] != null) throw new Exception("not slr");
actionTable[(int)obj][END_MARKER_ID] = new Parser.Action(Parser.ActionType.ACCEPT, 0);
continue;
}
Object[] nontermProds = productionCacheTable[item.nonterm];
if (nontermProds == null) {
productionCacheTable[item.nonterm] = nontermProds = new Object[nonterm.rhss.Length];
}
Object prodIndexObject = nontermProds[item.production];
foreach (int followSym in followTable[item.nonterm] as HashSet) {
if (prodIndexObject == null) {
nontermProds[item.production] = prodIndexObject = productions.Add(nonterm.actions[item.production]);
}
int prodIndex = (int) prodIndexObject;
Object obj = stateTable[items];
if (obj == null) throw new Exception("Missing state:" + items);
Parser.Action newAction = new Parser.Action(Parser.ActionType.REDUCE, prodIndex);
Parser.Action currentAction = actionTable[(Int32)obj][followSym];
if (currentAction != null && !currentAction.Equals(newAction)) {
newAction = ResolveAction(currentAction, newAction, (Terminal) grammarTable[followSym], items, productions);
}
actionTable[(Int32)obj][followSym] = newAction;
}
}
else {
foreach (Symbol symbol in grammarTable) {
if (symbol is Nonterminal) continue;
ISet gotoSet = computeGoto(items, symbol);
Object nextState = (Object)stateTable[gotoSet];
if (nextState == null) continue;
Parser.Action newAction = new Parser.Action(Parser.ActionType.SHIFT, (Int32)nextState);
Parser.Action currentAction = actionTable[(Int32)stateTable[items]][symbol.id];
if (currentAction != null && !currentAction.Equals(newAction)) {
newAction = ResolveAction(currentAction, newAction, (Terminal) symbol ,items, productions);
}
actionTable[(Int32)stateTable[items]][symbol.id] = newAction;
}
}
}
}
return actionTable;
}
private Parser.Action ResolveAction(Parser.Action a1, Parser.Action a2, Terminal symbol, ISet items, ArrayList productions)
{
Parser.Action reducer;
Parser.Action shifter;
if (a1.type == Parser.ActionType.REDUCE) {
reducer = a1; shifter = a2;
}
else {
reducer = a2; shifter = a1;
}
ProductionAction reduction = productions[reducer.stateOrProduction] as ProductionAction;
int[] reductionSymbols = (grammarTable[reduction.nonterm] as Nonterminal).rhss[reduction.production];
int j;
for (j = reductionSymbols.Length - 1; j >= 0; --j) {
if (grammarTable[reductionSymbols[j]] is Terminal) break;
}
if (j == -1) throw new Exception("non terminal in reduction");
int reductionTerminal = reductionSymbols[j];
int precedence;
PrecedenceRelation precedenceRelation = precedenceTable[new HashSet(new int[] { symbol.id, reductionTerminal })] as PrecedenceRelation;
if (precedenceRelation == null) {
//Console.Write("no precedence relation between " + grammarTable[symbol.id].name + " and " + grammarTable[reductionTerminal].name);
//Console.WriteLine(" defaulting to equivalent precedence");
precedence = PrecedenceRelation.EQUAL;
}
else {
precedence = precedenceRelation.Precedence(symbol.id);
}
Parser.Action action;
switch (precedence) {
case PrecedenceRelation.EQUAL:
action = reducer; //left associative by default?
break;
case PrecedenceRelation.GREATER:
action = shifter;
break;
case PrecedenceRelation.LESS:
action = reducer;
break;
default:
throw new Exception("missing precedence type");
}
return action;
}
private Parser.Action PromptAction(Parser.Action a1, Parser.Action a2, Terminal symbol, ISet items, ArrayList productions)
{
Console.WriteLine("In State: ");
foreach (Item item in items) {
Console.WriteLine(item.ToString());
}
Console.WriteLine("On symbol: " + symbol.name);
Console.WriteLine("1) " + ActionString(a1, productions));
Console.WriteLine("2) " + ActionString(a2, productions));
Console.Write("Selection: ");
int selection;
do {
selection = Convert.ToInt32(Console.ReadLine());
} while (selection != 1 && selection != 2);
return selection == 1 ? a1 : a2;
}
private string ActionString(Parser.Action action, ArrayList productions)
{
switch (action.type) {
case Parser.ActionType.ACCEPT:
return "ACCEPT";
case Parser.ActionType.REDUCE:
return "REDUCE by " + productions[action.stateOrProduction];
case Parser.ActionType.SHIFT:
return "SHIFT";
default:
throw new Exception("missing action type");
}
}
private class Item
{
public int nonterm;
public int production;
public int position;
public Generator gen;
public Item(Generator gen, int nonterm, int production, int position)
{
this.gen = gen; this.nonterm = nonterm; this.production = production; this.position = position;
}
public override int GetHashCode()
{
return nonterm.GetHashCode() ^ production.GetHashCode() ^ position.GetHashCode();
}
public override bool Equals(object obj)
{
if (!(obj is Item)) return false;
Item that = obj as Item;
return this.nonterm == that.nonterm && this.production == that.production && this.position == that.position;
}
public override String ToString()
{
StringBuilder sb = new StringBuilder();
Nonterminal nonterm = gen.grammarTable[this.nonterm] as Nonterminal;
for (int i = 0; i < nonterm.rhss[production].Length; ++i) {
String spacer = i == this.position ? " . " : " ";
sb.Append(spacer + gen.grammarTable[nonterm.rhss[production][i]].ToString());
}
if (this.position == nonterm.rhss[production].Length) sb.Append(" . ");
return gen.grammarTable[this.nonterm] + "=>" + sb.ToString();
}
}
private ISet doClosure(ISet items)
{
Queue toProcess = new Queue(items);
ISet newItems = new HashSet(items);
while (toProcess.Count != 0) {
Item item = toProcess.Dequeue() as Item;
Nonterminal nonterm = grammarTable[item.nonterm] as Nonterminal;
if (item.position == nonterm.rhss[item.production].Length) continue;
Symbol nextSymbol = grammarTable[nonterm.rhss[item.production][item.position]];
if (nextSymbol is Nonterminal) {
for (int j = 0; j < ((Nonterminal)nextSymbol).rhss.Length; ++j) {
Item newItem = new Item(this, nextSymbol.id, j, 0);
if (!newItems.Contains(newItem)) {
toProcess.Enqueue(newItem);
newItems.Add(newItem);
}
}
}
}
return newItems;
}
private ISet computeGoto(ISet items, Symbol symbol)
{
ISet gotoSet = new HashSet();
foreach (Item item in items) {
Nonterminal nonterm = grammarTable[item.nonterm] as Nonterminal;
if (item.position == nonterm.rhss[item.production].Length) continue;
if (nonterm.rhss[item.production][item.position] != symbol.id) continue;
Item nextItem = new Item(this, item.nonterm, item.production, item.position + 1);
gotoSet.Add(nextItem);
}
gotoSet = doClosure(gotoSet);
return gotoSet;
}
// these need to be ordered by creation so set 0 is the initial state
private ISet ComputeSetsOfItems()
{
ISet sets = new HashSet();
ISet rootSet = new HashSet();
rootSet.Add(new Item(this, 0, 0, 0));
rootSet = doClosure(rootSet);
sets.Add(rootSet);
Queue toProcess = new Queue(sets);
while (toProcess.Count != 0) {
ISet set = toProcess.Dequeue() as ISet;
foreach (Symbol symbol in grammarTable) {
ISet gotoSet = computeGoto(set, symbol);
if (gotoSet.Count == 0)
continue;
if (sets.Contains(gotoSet)) continue;
toProcess.Enqueue(gotoSet);
sets.Add(gotoSet);
}
}
return sets;
}
ISet[] firstTable;
public void computeFirst()
{
firstTable = new ISet[Symbol.idCount];
bool changed = true;
while (changed) {
changed = false;
foreach (Symbol symbol in grammarTable) {
if (symbol is Terminal) {
ISet first = firstTable[symbol.id];
if (first == null) {
first = new HashSet();
changed = true;
}
first.Add(symbol.id);
firstTable[symbol.id] = first;
}
else if (symbol is Nonterminal) {
Nonterminal nonterm = symbol as Nonterminal;
foreach (int[] rhs in nonterm.rhss) {
ISet newFirst = firstTable[nonterm.id];
if (newFirst == null) {
firstTable[nonterm.id] = newFirst = new HashSet();
}
HashSet oldFirst = new HashSet(newFirst);
if (rhs.Length == 0) { // epsilon production
newFirst.Add(EPSILON_ID);
}
else {
bool epsilonPrior = true;
for (int i = 0; i < rhs.Length && epsilonPrior; ++i) {
Symbol sym = grammarTable[rhs[i]];
if (sym is Terminal) {
newFirst.Add(rhs[i]);
if (sym.id != EPSILON_ID)
epsilonPrior = false;
}
else if (sym is Nonterminal) {
ISet moreFirst = firstTable[sym.id];
if (moreFirst == null) {
moreFirst = new HashSet();
firstTable[sym.id] = moreFirst;
break;
}
newFirst.AddAll(moreFirst);
if (!moreFirst.Contains(EPSILON_ID))
epsilonPrior = false;
}
}
}
changed |= !oldFirst.Equals(newFirst);
}
}
}
}
}
private ISet[] followTable;
public void computeFollow()
{
followTable = new ISet[Symbol.idCount];
followTable[0] = new HashSet();
followTable[0].Add(END_MARKER_ID);
bool changed = false;
do {
changed = false;
foreach (Symbol symbol in grammarTable) {
if (!(symbol is Nonterminal)) continue;
Nonterminal nonterm = symbol as Nonterminal;
changed |= ComputeFollowForNonterminal(changed, nonterm);
}
} while (changed);
}
private bool ComputeFollowForNonterminal(bool changed, Nonterminal nonterm)
{
foreach (int[] rhs in nonterm.rhss) {
changed |= ComputeFollowForProduction(changed, nonterm, rhs);
}
return changed;
}
private bool ComputeFollowForProduction(bool changed, Nonterminal nonterm, int[] rhs)
{
ISet endFirst = new HashSet();
for (int i = rhs.Length - 1; i >= 0; --i) {
Symbol sym = grammarTable[rhs[i]];
if (sym is Nonterminal) {
ISet follow = followTable[sym.id];
if (follow == null) {
followTable[sym.id] = follow = new HashSet();
}
ISet oldFollow = new HashSet(follow);
follow.AddAll(endFirst);
follow.Remove(EPSILON_ID);
if (i == rhs.Length - 1 || endFirst.Contains(EPSILON_ID)) {
ISet prodFollow = followTable[nonterm.id];
if (prodFollow == null) {
followTable[nonterm.id] = prodFollow = new HashSet();
}
follow.AddAll(prodFollow);
}
if (!oldFollow.Equals(follow)) changed = true;
if (!firstTable[sym.id].Contains(EPSILON_ID))
endFirst = new HashSet();
endFirst.AddAll(firstTable[sym.id]);
}
else {
endFirst = new HashSet();
endFirst.Add(sym.id);
}
}
return changed;
}
public void ParseFieldsSection(TextReader sr)
{
fieldsSectionCode = ParseSection(sr);
}
public String ParseInitSection(TextReader sr)
{
return ParseSection(sr);
}
private const String tokenAssign = ":=";
class TokenSpec
{
public Terminal token;
public Regex regex;
public TokenSpec(Terminal token, Regex regex)
{
this.token = token; this.regex = regex;
}
}
private class PrecedenceRelation
{
public const int LESS = -1, EQUAL = 0, GREATER = 1;
public int type;
public int leftTerminal;
public int rightTerminal;
public PrecedenceRelation(int type, int leftTerminal, int rightTerminal)
{
this.type = type; this.leftTerminal = leftTerminal; this.rightTerminal = rightTerminal;
}
public override int GetHashCode()
{
return type.GetHashCode() ^ leftTerminal.GetHashCode() ^ rightTerminal.GetHashCode();
}
public override bool Equals(object obj)
{
if (!(obj is PrecedenceRelation)) return false;
PrecedenceRelation that = obj as PrecedenceRelation;
return this.type == that.type && this.leftTerminal == that.leftTerminal && this.rightTerminal == that.rightTerminal;
}
//assumes that you that right side of the caller and this instance match
public int Precedence(int leftSide)
{
if (this.leftTerminal == leftSide) return this.type;
else {
switch (this.type) {
case LESS:
return GREATER;
case EQUAL:
return EQUAL;
case GREATER:
return LESS;
default:
throw new Exception("missed type");
}
}
}
}
public void ParseTokenSection(TextReader sr)
{
String line = sr.ReadLine();
if (line == null || line.Trim() != "{") throw new Exception("missing left brace");
while ((line = sr.ReadLine()) != null) {
line = line.Trim();
if (line == "") continue;
if (line == "}") {
return;
}
int index = line.IndexOf(tokenAssign);
if (index == -1) {
int precedence = PrecedenceRelation.GREATER;
index = line.IndexOf(">");
if (index == -1) {
precedence = PrecedenceRelation.LESS;
index = line.IndexOf("<");
if (index == -1) {
precedence = PrecedenceRelation.EQUAL;
index = line.IndexOf("=");
if (index == -1) throw new Exception("invalid token line");
}
}
String[] leftTerminalNames = whitespace.Split(line.Substring(0, index));
String[] rightTerminalNames = whitespace.Split(line.Substring(index + 1));
Symbol[] rightTerminalSymbols = new Symbol[rightTerminalNames.Length];
for (int i = 0; i < rightTerminalSymbols.Length; ++i) {
if (rightTerminalNames[i] == "") continue;
Symbol rightSymbol = grammarStringTable[rightTerminalNames[i]] as Terminal;
if (rightSymbol == null || !(rightSymbol is Terminal))
throw new Exception("Symbol is either not defined for non a terminal:" + rightTerminalNames[i]);
rightTerminalSymbols[i] = rightSymbol;
}
foreach (String leftTerminalName in leftTerminalNames) {
if (leftTerminalName == "") continue;
Symbol leftSymbol = grammarStringTable[leftTerminalName] as Symbol;
if (leftSymbol == null || !(leftSymbol is Terminal))
throw new Exception("Symbol is either not defined for non a terminal:" + leftTerminalName);
foreach (Symbol rightSymbol in rightTerminalSymbols) {
if (rightSymbol == null) continue;
precedenceTable[new HashSet(new int[] { leftSymbol.id, rightSymbol.id })] = new PrecedenceRelation(precedence, leftSymbol.id, rightSymbol.id);
}
}
continue;
}
String regex = line.Substring(index + tokenAssign.Length).Trim();
String action = ParseSection(sr);
Terminal token = new Terminal(line.Substring(0, index).Trim(), regex, action);
grammarStringTable[token.name] = token;
numTerminals++;
terminalList.Add(token);
}
}
private const String startSymbol = "S";
private const String productionSym = "=>";
public void ParseGrammarSection(TextReader sr)
{
String line = sr.ReadLine();
if (line == null || line.Trim() != "{") throw new Exception("missing left brace");
bool foundStart = false;
while ((line = sr.ReadLine()) != null) {
line = line.Trim();
if (line == "") continue;
if (line == "}") {
if (!foundStart) {
throw new Exception("No start symbol S");
}
return;
}
int index = line.IndexOf(productionSym);
if (index == -1) throw new Exception("bad grammar format");
String name = line.Substring(0, index).Trim();
if (!foundStart && name == startSymbol) {
Nonterminal augment = new Nonterminal(startSymbol + "'", 0);
grammarStringTable[augment.name] = augment;
System.Collections.ArrayList augProdList = new System.Collections.ArrayList();
productionStrings[augment.name] = augProdList;
augProdList.Add(new ProductionString(augment, startSymbol, ""));
foundStart = true;
}
String rhs = line.Substring(index + productionSym.Length).Trim();
String action = ParseSection(sr);
System.Collections.ArrayList prodList = productionStrings[name] as System.Collections.ArrayList;
if (prodList == null) {
productionStrings[name] = prodList = new System.Collections.ArrayList();
}
Nonterminal nonterm = grammarStringTable[name] as Nonterminal;
if (nonterm == null) {
grammarStringTable[name] = nonterm = new Nonterminal(name);
}
prodList.Add(new ProductionString(nonterm, rhs, action));
}
throw new Exception("missing right brace");
}
private static Regex whitespace = new Regex("\\s");
public static String ParseSection(TextReader sr)
{
StringBuilder section = new StringBuilder();
while (true) {
int peek = sr.Peek();
if (peek == -1) return null;
char cpeek = (char)peek;
if (whitespace.IsMatch(((char)peek).ToString())) {
sr.Read();
}
else if (peek != '{') {
return null;
}
else
break;
}
sr.Read();
//if(line == null || line.Trim() != "{") throw new Exception();
int numBrackets = 1;
int last = -1;
bool inString = false;
bool inQuotes = false;
int curr;
while ((curr = sr.Read()) != -1) {
switch (curr) {
case '{':
if (!inString && !inQuotes && last != '\\') numBrackets++;
break;
case '}':
if (!inString && !inQuotes && last != '\\') numBrackets--;
break;
case '"':
if (!inString && last != '\\') {
inQuotes = !inQuotes;
}
break;
case '\'':
if (!inQuotes && last != '\\')
inString = !inString;
break;
}
if (numBrackets == 0) break;
section.Append((char)curr);
last = curr;
}
if (numBrackets > 0) throw new Exception("missing bracket");
return section.ToString();
}
private class Symbol
{
public static int idCount = 3;
public int id;
public String name;
protected Symbol(String name)
{
id = idCount++;
this.name = name;
}
protected Symbol(String name, int id)
{
this.name = name;
this.id = id;
}
public override int GetHashCode()
{
return name.GetHashCode();
}
public override bool Equals(object obj)
{
if (!(obj is Symbol)) {
return false;
}
return name.Equals(((Symbol)obj).name);
}
public override string ToString()
{
return name + ":" + id;
}
}
private class Nonterminal : Symbol
{
public int[][] rhss;
public String[][] bindings;
public ProductionAction[] actions;
public Nonterminal(String name) : base(name) { }
public Nonterminal(String name, int id) : base(name, id) { }
}
private class Terminal : Symbol
{
public String regex;
public String action;
public Terminal(String name, String regex, String action) : base(name) { this.regex = regex; this.action = action; }
public Terminal(String name, int id) : base(name, id) { }
}
private class ProductionAction
{
public String action;
public int nonterm;
public int production;
public Generator gen;
public ProductionAction(Generator gen, int nonterm, int production, String action)
{
this.gen = gen;
this.nonterm = nonterm;
this.production = production;
this.action = action;
}
public override String ToString()
{
StringBuilder sb = new StringBuilder(gen.grammarTable[nonterm].name + " => ");
foreach (int sym in (gen.grammarTable[nonterm] as Nonterminal).rhss[production]) {
sb.Append(gen.grammarTable[sym].name + " ");
}
return sb.ToString();
}
}
private class ProductionString
{
Nonterminal nonterm;
public String rhs;
public String action;
public ProductionString(Nonterminal nonterm, String rhs, String action)
{
this.nonterm = nonterm;
this.rhs = rhs;
this.action = action;
}
}
}
}