singrdk/base/Libraries/ProtoLisp/Lexer.cs

211 lines
5.9 KiB
C#

///////////////////////////////////////////////////////////////////////////////
//
// Microsoft Research Singularity
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
///////////////////////////////////////////////////////////////////////////////
using System;
using System.Text;
using System.IO;
using System.Diagnostics;
//
// The Lexer class tokenizes an input stream of text
// and can also read off complete Proto-Lisp expressions
// for evaluation.
//
// The lexer has a one-character lookahead buffer.
//
namespace ProtoLisp
{
class Lexer
{
// Constructor
public Lexer(Stream stream)
{
inputStream = stream;
haveLookAhead = false;
}
// Returns the next character in the input stream.
// The stream is not (conceptually) advanced.
// Calling PeekNextChar or GetNextChar will
// return the same character.
//
// Returns null if the end of the stream has been
// reached.
private bool PeekNextChar(out char val)
{
if (haveLookAhead)
{
val = lookAheadVal;
return true;
}
else
{
int nextVal = inputStream.ReadByte();
if (nextVal == -1)
{
// Nothing more to read
val = '\0';
return false;
}
else
{
haveLookAhead = true;
// Hacky cast: just assume everything is 1-byte ASCII
lookAheadVal = (char)nextVal;
val = lookAheadVal;
return true;
}
}
}
// Retrieves the next character, and advances the stream.
// Returns null if the end has already been reached.
private bool GetNextChar(out char val)
{
char dummy;
if (!PeekNextChar(out dummy))
{
val = '\0';
return false;
}
Debug.Assert(haveLookAhead);
val = lookAheadVal;
haveLookAhead = false;
return true;
}
// Advances the stream without returning the current
// character.
private void DiscardNextChar()
{
char dummyChar;
GetNextChar(out dummyChar);
}
// Get the next token.
private string GetToken()
{
char peekChar;
// Skip whitespace
if (!PeekNextChar(out peekChar))
{
return null;
}
while (Char.IsWhiteSpace(peekChar))
{
// Throw away the whitespace char
DiscardNextChar();
if (!PeekNextChar(out peekChar))
{
return null;
}
}
// Special one-character tokens
if ((peekChar == '(') || (peekChar == ')') || (peekChar == '\''))
{
// Consume and return this character
DiscardNextChar();
return new String(peekChar, 1);
}
String token = "";
do
{
token += peekChar;
DiscardNextChar();
if (!PeekNextChar(out peekChar))
{
// Ran into the end of the stream
return token;
}
} while ((!Char.IsWhiteSpace(peekChar)) && (peekChar != '(') && (peekChar != ')'));
return token;
}
public PLObject GetExpression()
{
string token = GetToken();
if (token == null)
{
return null;
}
if (token.Equals("'"))
{
// The "'" character means "quote", and preserve
// the next expression.
PLList retval = new PLList();
retval.Add(new PLStringAtom("quote"));
PLObject nextExpr = GetExpression();
if (nextExpr == null)
{
throw new Exception("'quote' was not followed by a complete expression");
}
retval.Add(nextExpr);
return retval;
}
if (!token.Equals("("))
{
// Not the beginning of a list; the expression is
// simply this token. See if it's a number or a
// plain string.
try
{
return new PLNumberAtom(token);
}
catch (Exception)
{
// Just treat it as a regular string
return new PLStringAtom(token);
}
}
else
{
// We have the beginning of a list, which can contain
// any number of expressions. Keep building our list
// until we encounter the closing paren.
PLList retval = new PLList();
PLObject nextExpr = GetExpression();
while (! nextExpr.Equals(")"))
{
retval.Add(nextExpr);
nextExpr = GetExpression();
if (nextExpr == null)
{
throw new Exception("Incomplete expression (unbalanced parens?");
}
}
return retval;
}
}
private Stream inputStream;
private bool haveLookAhead;
private char lookAheadVal;
}
}