singrdk/base/Libraries/ProtoLisp/Lexer.cs

///////////////////////////////////////////////////////////////////////////////
//
//  Microsoft Research Singularity
//
//  Copyright (c) Microsoft Corporation.  All rights reserved.
//
///////////////////////////////////////////////////////////////////////////////

using System;
using System.Text;
using System.IO;
using System.Diagnostics;

//
// The Lexer class tokenizes an input stream of text
// and can also read off complete Proto-Lisp expressions
// for evaluation.
//
// The lexer has a one-character lookahead buffer.
//
namespace ProtoLisp
{
    class Lexer
    {
        // Constructor
        public Lexer(Stream stream)
        {
            inputStream = stream;
            haveLookAhead = false;
        }

        // Returns the next character in the input stream.
        // The stream is not (conceptually) advanced.
        // Calling PeekNextChar or GetNextChar will
        // return the same character.
        //
        // Returns null if the end of the stream has been
        // reached.
        private bool PeekNextChar(out char val)
        {
            if (haveLookAhead)
            {
                val = lookAheadVal;
                return true;
            }
            else
            {
                int nextVal = inputStream.ReadByte();

                if (nextVal == -1)
                {
                    // Nothing more to read
                    val = '\0';
                    return false;
                }
                else
                {
                    haveLookAhead = true;
                    // Hacky cast: just assume everything is 1-byte ASCII
                    lookAheadVal = (char)nextVal;
                    val = lookAheadVal;
                    return true;
                }
            }
        }

        // Retrieves the next character, and advances the stream.
        // Returns null if the end has already been reached.
        private bool GetNextChar(out char val)
        {
            char dummy;

            if (!PeekNextChar(out dummy))
            {
                val = '\0';
                return false;
            }

            Debug.Assert(haveLookAhead);
            val = lookAheadVal;
            haveLookAhead = false;
            return true;
        }

        // Advances the stream without returning the current
        // character.
        private void DiscardNextChar()
        {
            char dummyChar;
            GetNextChar(out dummyChar);
        }

        // Get the next token.
        private string GetToken()
        {
            char peekChar;

            // Skip whitespace
            if (!PeekNextChar(out peekChar))
            {
                return null;
            }

            while (Char.IsWhiteSpace(peekChar))
            {
                // Throw away the whitespace char
                DiscardNextChar();
                if (!PeekNextChar(out peekChar))
                {
                    return null;
                }
            }

            // Special one-character tokens
            if ((peekChar == '(') || (peekChar == ')') || (peekChar == '\''))
            {
                // Consume and return this character
                DiscardNextChar();
                return new String(peekChar, 1);
            }

            String token = "";

            do
            {
                token += peekChar;
                DiscardNextChar();

                if (!PeekNextChar(out peekChar))
                {
                    // Ran into the end of the stream
                    return token;
                }
            } while ((!Char.IsWhiteSpace(peekChar)) && (peekChar != '(') && (peekChar != ')'));

            return token;
        }

        public PLObject GetExpression()
        {
            string token = GetToken();

            if (token == null)
            {
                return null;
            }

            if (token.Equals("'"))
            {
                // The "'" character means "quote", and preserve
                // the next expression.
                PLList retval = new PLList();
                retval.Add(new PLStringAtom("quote"));

                PLObject nextExpr = GetExpression();

                if (nextExpr == null)
                {
                    throw new Exception("'quote' was not followed by a complete expression");
                }

                retval.Add(nextExpr);
                return retval;
            }

            if  (!token.Equals("("))
            {
                // Not the beginning of a list; the expression is
                // simply this token. See if it's a number or a
                // plain string.
                try
                {
                    return new PLNumberAtom(token);
                }
                catch (Exception)
                {
                    // Just treat it as a regular string
                    return new PLStringAtom(token);
                }
            }
            else
            {
                // We have the beginning of a list, which can contain
                // any number of expressions. Keep building our list
                // until we encounter the closing paren.
                PLList retval = new PLList();

                PLObject nextExpr = GetExpression();

                while (! nextExpr.Equals(")"))
                {
                    retval.Add(nextExpr);

                    nextExpr = GetExpression();

                    if (nextExpr == null)
                    {
                        throw new Exception("Incomplete expression (unbalanced parens?");
                    }
                }

                return retval;
            }
        }

        private Stream inputStream;
        private bool haveLookAhead;
        private char lookAheadVal;
    }
}
RDK 1.1 2008-03-05 09:52:00 -05:00			`///////////////////////////////////////////////////////////////////////////////`
			`//`
			`// Microsoft Research Singularity`
			`//`
			`// Copyright (c) Microsoft Corporation. All rights reserved.`
			`//`
			`///////////////////////////////////////////////////////////////////////////////`

			`using System;`
			`using System.Text;`
			`using System.IO;`
			`using System.Diagnostics;`

			`//`
			`// The Lexer class tokenizes an input stream of text`
			`// and can also read off complete Proto-Lisp expressions`
			`// for evaluation.`
			`//`
			`// The lexer has a one-character lookahead buffer.`
			`//`
			`namespace ProtoLisp`
			`{`
			`class Lexer`
			`{`
			`// Constructor`
			`public Lexer(Stream stream)`
			`{`
			`inputStream = stream;`
			`haveLookAhead = false;`
			`}`

			`// Returns the next character in the input stream.`
			`// The stream is not (conceptually) advanced.`
			`// Calling PeekNextChar or GetNextChar will`
			`// return the same character.`
			`//`
			`// Returns null if the end of the stream has been`
			`// reached.`
			`private bool PeekNextChar(out char val)`
			`{`
			`if (haveLookAhead)`
			`{`
			`val = lookAheadVal;`
			`return true;`
			`}`
			`else`
			`{`
			`int nextVal = inputStream.ReadByte();`

			`if (nextVal == -1)`
			`{`
			`// Nothing more to read`
			`val = '\0';`
			`return false;`
			`}`
			`else`
			`{`
			`haveLookAhead = true;`
			`// Hacky cast: just assume everything is 1-byte ASCII`
			`lookAheadVal = (char)nextVal;`
			`val = lookAheadVal;`
			`return true;`
			`}`
			`}`
			`}`

			`// Retrieves the next character, and advances the stream.`
			`// Returns null if the end has already been reached.`
			`private bool GetNextChar(out char val)`
			`{`
			`char dummy;`

			`if (!PeekNextChar(out dummy))`
			`{`
			`val = '\0';`
			`return false;`
			`}`

			`Debug.Assert(haveLookAhead);`
			`val = lookAheadVal;`
			`haveLookAhead = false;`
			`return true;`
			`}`

			`// Advances the stream without returning the current`
			`// character.`
			`private void DiscardNextChar()`
			`{`
			`char dummyChar;`
			`GetNextChar(out dummyChar);`
			`}`

			`// Get the next token.`
			`private string GetToken()`
			`{`
			`char peekChar;`

			`// Skip whitespace`
			`if (!PeekNextChar(out peekChar))`
			`{`
			`return null;`
			`}`

			`while (Char.IsWhiteSpace(peekChar))`
			`{`
			`// Throw away the whitespace char`
			`DiscardNextChar();`
			`if (!PeekNextChar(out peekChar))`
			`{`
			`return null;`
			`}`
			`}`

			`// Special one-character tokens`
			`if ((peekChar == '(') \|\| (peekChar == ')') \|\| (peekChar == '\''))`
			`{`
			`// Consume and return this character`
			`DiscardNextChar();`
			`return new String(peekChar, 1);`
			`}`

			`String token = "";`

			`do`
			`{`
			`token += peekChar;`
			`DiscardNextChar();`

			`if (!PeekNextChar(out peekChar))`
			`{`
			`// Ran into the end of the stream`
			`return token;`
			`}`
			`} while ((!Char.IsWhiteSpace(peekChar)) && (peekChar != '(') && (peekChar != ')'));`

			`return token;`
			`}`

			`public PLObject GetExpression()`
			`{`
			`string token = GetToken();`

			`if (token == null)`
			`{`
			`return null;`
			`}`

			`if (token.Equals("'"))`
			`{`
			`// The "'" character means "quote", and preserve`
			`// the next expression.`
			`PLList retval = new PLList();`
			`retval.Add(new PLStringAtom("quote"));`

			`PLObject nextExpr = GetExpression();`

			`if (nextExpr == null)`
			`{`
			`throw new Exception("'quote' was not followed by a complete expression");`
			`}`

			`retval.Add(nextExpr);`
			`return retval;`
			`}`

			`if (!token.Equals("("))`
			`{`
			`// Not the beginning of a list; the expression is`
			`// simply this token. See if it's a number or a`
			`// plain string.`
			`try`
			`{`
			`return new PLNumberAtom(token);`
			`}`
			`catch (Exception)`
			`{`
			`// Just treat it as a regular string`
			`return new PLStringAtom(token);`
			`}`
			`}`
			`else`
			`{`
			`// We have the beginning of a list, which can contain`
			`// any number of expressions. Keep building our list`
			`// until we encounter the closing paren.`
			`PLList retval = new PLList();`

			`PLObject nextExpr = GetExpression();`

			`while (! nextExpr.Equals(")"))`
			`{`
			`retval.Add(nextExpr);`

			`nextExpr = GetExpression();`

			`if (nextExpr == null)`
			`{`
			`throw new Exception("Incomplete expression (unbalanced parens?");`
			`}`
			`}`

			`return retval;`
			`}`
			`}`

			`private Stream inputStream;`
			`private bool haveLookAhead;`
			`private char lookAheadVal;`
			`}`
			`}`