singrdk/base/Libraries/ProtoLisp/Lexer.cs

///////////////////////////////////////////////////////////////////////////////
//
//  Microsoft Research Singularity
//
//  Copyright (c) Microsoft Corporation.  All rights reserved.
//
///////////////////////////////////////////////////////////////////////////////

using System;
using System.Text;
using System.IO;
using System.Diagnostics;

//
// The Lexer class tokenizes an input stream of text
// and can also read off complete Proto-Lisp expressions
// for evaluation.
//
// The lexer has a one-character lookahead buffer.
//
namespace ProtoLisp
{
    class Lexer
    {
        // Constructor
        public Lexer(Stream stream)
        {
            inputStream = stream;
            haveLookAhead = false;
        }

        // Returns the next character in the input stream.
        // The stream is not (conceptually) advanced.
        // Calling PeekNextChar or GetNextChar will
        // return the same character.
        //
        // Returns null if the end of the stream has been
        // reached.
        private bool PeekNextChar(out char val)
        {
            if (haveLookAhead)
            {
                val = lookAheadVal;
                return true;
            }
            else
            {
                int nextVal = inputStream.ReadByte();

                if (nextVal == -1)
                {
                    // Nothing more to read
                    val = '\0';
                    return false;
                }
                else
                {
                    haveLookAhead = true;
                    // Hacky cast: just assume everything is 1-byte ASCII
                    lookAheadVal = (char)nextVal;
                    val = lookAheadVal;
                    return true;
                }
            }
        }

        // Retrieves the next character, and advances the stream.
        // Returns null if the end has already been reached.
        private bool GetNextChar(out char val)
        {
            char dummy;

            if (!PeekNextChar(out dummy))
            {
                val = '\0';
                return false;
            }

            Debug.Assert(haveLookAhead);
            val = lookAheadVal;
            haveLookAhead = false;
            return true;
        }

        // Advances the stream without returning the current
        // character.
        private void DiscardNextChar()
        {
            char dummyChar;
            GetNextChar(out dummyChar);
        }

        // Get the next token.
        private string GetToken()
        {
            char peekChar;

            // Skip whitespace
            if (!PeekNextChar(out peekChar))
            {
                return null;
            }

            while (Char.IsWhiteSpace(peekChar))
            {
                // Throw away the whitespace char
                DiscardNextChar();
                if (!PeekNextChar(out peekChar))
                {
                    return null;
                }
            }

            // Special one-character tokens
            if ((peekChar == '(') || (peekChar == ')') || (peekChar == '\''))
            {
                // Consume and return this character
                DiscardNextChar();
                return new String(peekChar, 1);
            }

            String token = "";

            do
            {
                token += peekChar;
                DiscardNextChar();

                if (!PeekNextChar(out peekChar))
                {
                    // Ran into the end of the stream
                    return token;
                }
            } while ((!Char.IsWhiteSpace(peekChar)) && (peekChar != '(') && (peekChar != ')'));

            return token;
        }

        public PLObject GetExpression()
        {
            string token = GetToken();

            if (token == null)
            {
                return null;
            }

            if (token.Equals("'"))
            {
                // The "'" character means "quote", and preserve
                // the next expression.
                PLList retval = new PLList();
                retval.Add(new PLStringAtom("quote"));

                PLObject nextExpr = GetExpression();

                if (nextExpr == null)
                {
                    throw new Exception("'quote' was not followed by a complete expression");
                }

                retval.Add(nextExpr);
                return retval;
            }

            if  (!token.Equals("("))
            {
                // Not the beginning of a list; the expression is
                // simply this token. See if it's a number or a
                // plain string.
                try
                {
                    return new PLNumberAtom(token);
                }
                catch (Exception)
                {
                    // Just treat it as a regular string
                    return new PLStringAtom(token);
                }
            }
            else
            {
                // We have the beginning of a list, which can contain
                // any number of expressions. Keep building our list
                // until we encounter the closing paren.
                PLList retval = new PLList();

                PLObject nextExpr = GetExpression();

                while (! nextExpr.Equals(")"))
                {
                    retval.Add(nextExpr);

                    nextExpr = GetExpression();

                    if (nextExpr == null)
                    {
                        throw new Exception("Incomplete expression (unbalanced parens?");
                    }
                }

                return retval;
            }
        }

        private Stream inputStream;
        private bool haveLookAhead;
        private char lookAheadVal;
    }
}