singrdk/base/Kernel/System/Text/RegularExpressions/regexrunner.cs

//------------------------------------------------------------------------------
// <copyright company='Microsoft Corporation'>
//
//      Copyright (c) Microsoft Corporation.  All rights reserved.
//
// </copyright>
//------------------------------------------------------------------------------

// This RegexRunner class is a base class for compiled regex code.
//

// Implementation notes:
//
// RegexRunner provides a common calling convention and a common
// runtime environment for the interpreter and the compiled code.
//
// It provides the driver code that call's the subclass's Go()
// method for either scanning or direct execution.
//
// It also maintains memory allocation for the backtracking stack,
// the grouping stack and the longjump crawlstack, and provides
// methods to push new subpattern match results into (or remove
// backtracked results from) the Match instance.
#define ECMA

namespace System.Text.RegularExpressions {

    using System.Collections;
    using System.Diagnostics;
    //using System.ComponentModel;

    //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner"]/*' />
    /// <internalonly/>
    abstract public class RegexRunner {
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.runtextbeg"]/*' />
        protected internal int runtextbeg;         // beginning of text to search
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.runtextend"]/*' />
        protected internal int runtextend;         // end of text to search
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.runtextstart"]/*' />
        protected internal int runtextstart;       // starting point for search

        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.runtext"]/*' />
        protected internal String runtext;         // text to search
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.runtextpos"]/*' />
        protected internal int runtextpos;         // current position in text

        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.runtrack"]/*' />
        protected internal int [] runtrack;        // backtracking stack
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.runtrackpos"]/*' />
        protected internal int runtrackpos;        // current position in backtracking stack

        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.runstack"]/*' />
        protected internal int [] runstack;        // ordinary stack
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.runstackpos"]/*' />
        protected internal int runstackpos;        // current position in ordinary stack

        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.runcrawl"]/*' />
        protected internal int [] runcrawl;        // longjump crawl stack
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.runcrawlpos"]/*' />
        protected internal int runcrawlpos;        // current position in crawl stack

        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.runtrackcount"]/*' />
        protected internal int runtrackcount;      // count of states that may do backtracking

        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.runmatch"]/*' />
        protected internal Match runmatch;         // result object
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.runregex"]/*' />
        protected internal Regex runregex;         // regex object

        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.RegexRunner"]/*' />
        protected internal RegexRunner() {}

        // Scans the string to find the first match. Uses the Match object
        // both to feed text in and as a place to store matches that come out.
        //
        // All the action is in the abstract Go() method defined by subclasses. Our
        // responsibility is to load up the class members (as done here) before
        // calling Go.
        //
        //
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.Scan"]/*' />
        protected internal Match Scan(Regex regex, String text, int textbeg, int textend, int textstart, int prevlen, bool quick) {
            int bump;
            int stoppos;
            bool initted = false;

            runregex      = regex;
            runtext       = text;
            runtextbeg    = textbeg;
            runtextend    = textend;
            runtextstart  = textstart;

            bump    = runregex.RightToLeft ? -1 : 1;
            stoppos = runregex.RightToLeft ? runtextbeg : runtextend;

            runtextpos    = textstart;

            // If previous match was empty or failed, advance by one before matching

            if (prevlen == 0) {
                if (runtextpos == stoppos)
                    return Match.Empty;

                runtextpos += bump;
            }

            for (;;) {
#if DBG
                if (runregex.Debug) {
                    Debug.WriteLine("");
                    Debug.WriteLine("Search range: from " + runtextbeg.ToString() + " to " + runtextend.ToString());
                    Debug.WriteLine("Firstchar search starting at " + runtextpos.ToString() + " stopping at " + stoppos.ToString());
                }
#endif
                if (FindFirstChar()) {
                    if (!initted) {
                        InitMatch();
                        initted = true;
                    }
#if DBG
                    if (runregex.Debug) {
                        Debug.WriteLine("Executing engine starting at " + runtextpos.ToString());
                        Debug.WriteLine("");
                    }
#endif
                    Go();

                    if (runmatch._matchcount[0] > 0) {
                        return TidyMatch(quick);
                    }

                    // reset state for another go
                    runtrackpos = runtrack.Length;
                    runstackpos = runstack.Length;
                    runcrawlpos = runcrawl.Length;
                }

                // failure!

                if (runtextpos == stoppos) {
                    TidyMatch(true);
                    return Match.Empty;
                }


                // Bump by one and start again

                runtextpos += bump;
            }

        }

        // The responsibility of Go() is to run the regular expression at
        // runtextpos and call Capture() on all the captured subexpressions,
        // then to leave runtextpos at the ending position. It should leave
        // runtextpos where it started if there was no match.
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.Go"]/*' />
        protected abstract void Go();

        // The responsibility of FindFirstChar() is to advance runtextpos
        // until it is at the next position which is a candidate for the
        // beginning of a successful match.
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.FindFirstChar"]/*' />
        protected abstract bool FindFirstChar();

        // InitTrackCount must initialize the runtrackcount field; this is
        // used to know how large the initial runtrack and runstack arrays
        // must be.
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.InitTrackCount"]/*' />
        protected abstract void InitTrackCount();

        // Initializes all the data members that are used by Go()
        private void InitMatch() {
            // Use a hashtable'ed Match object if the capture numbers are sparse

            if (runmatch == null) {
                if (runregex.caps != null)
                    runmatch = new MatchSparse(runregex, runregex.caps, runregex.capsize, runtext, runtextbeg, runtextend - runtextbeg, runtextstart);
                else
                    runmatch = new Match(runregex, runregex.capsize, runtext, runtextbeg, runtextend - runtextbeg, runtextstart);
            }
            else {
                runmatch.Reset(runregex, runtext, runtextbeg, runtextend, runtextstart);
            }

            // note we test runcrawl, because it is the last one to be allocated
            // If there is an alloc failure in the middle of the three allocations,
            // we may still return to reuse this instance, and we want to behave
            // as if the allocations didn't occur. (we used to test _trackcount != 0)

            if (runcrawl != null) {
                runtrackpos = runtrack.Length;
                runstackpos = runstack.Length;
                runcrawlpos = runcrawl.Length;
                return;
            }

            InitTrackCount();

            int tracksize = runtrackcount * 8;
            int stacksize = runtrackcount * 8;

            if (tracksize < 32)
                tracksize = 32;
            if (stacksize < 16)
                stacksize = 16;

            runtrack = new int[tracksize];
            runtrackpos = tracksize;

            runstack = new int[stacksize];
            runstackpos = stacksize;

            runcrawl = new int[32];
            runcrawlpos = 32;
        }

        // Put match in its canonical form before returning it.
        private Match TidyMatch(bool quick) {
            if (!quick) {
                Match match = runmatch;

                runmatch = null;

                match.Tidy(runtextpos);
                return match;
            }
            else {
                // in quick mode, a successful match returns null, and
                // the allocated match object is left in the cache

                return null;
            }
        }

        // Called by the implementation of Go() to increase the size of storage
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.EnsureStorage"]/*' />
        protected void EnsureStorage() {
            if (runstackpos < runtrackcount * 4)
                DoubleStack();
            if (runtrackpos < runtrackcount * 4)
                DoubleTrack();
        }

        // Called by the implementation of Go() to decide whether the pos
        // at the specified index is a boundary or not. It's just not worth
        // emitting inline code for this logic.
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.IsBoundary"]/*' />
        protected bool IsBoundary(int index, int startpos, int endpos) {
            return (index > startpos && RegexCharClass.IsWordChar(runtext[index - 1])) !=
                   (index < endpos && RegexCharClass.IsWordChar(runtext[index]));
        }

        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.IsECMABoundary"]/*' />
        protected bool IsECMABoundary(int index, int startpos, int endpos) {
            return (index > startpos && RegexCharClass.IsECMAWordChar(runtext[index - 1])) !=
                   (index < endpos && RegexCharClass.IsECMAWordChar(runtext[index]));
        }

        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.CharInSet"]/*' />
        protected static bool CharInSet(char ch, String set, String category) {
            return RegexCharClass.CharInSet(ch, set, category);
        }

        // Called by the implementation of Go() to increase the size of the
        // backtracking stack.
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.DoubleTrack"]/*' />
        protected void DoubleTrack() {
            int[] newtrack;

            newtrack = new int[runtrack.Length * 2];

            System.Array.Copy(runtrack, 0, newtrack, runtrack.Length, runtrack.Length);
            runtrackpos += runtrack.Length;
            runtrack = newtrack;
        }

        // Called by the implementation of Go() to increase the size of the
        // grouping stack.
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.DoubleStack"]/*' />
        protected void DoubleStack() {
            int[] newstack;

            newstack = new int[runstack.Length * 2];

            System.Array.Copy(runstack, 0, newstack, runstack.Length, runstack.Length);
            runstackpos += runstack.Length;
            runstack = newstack;
        }

        // Increases the size of the longjump unrolling stack.
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.DoubleCrawl"]/*' />
        protected void DoubleCrawl() {
            int[] newcrawl;

            newcrawl = new int[runcrawl.Length * 2];

            System.Array.Copy(runcrawl, 0, newcrawl, runcrawl.Length, runcrawl.Length);
            runcrawlpos += runcrawl.Length;
            runcrawl = newcrawl;
        }

        // Save a number on the longjump unrolling stack
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.Crawl"]/*' />
        protected void Crawl(int i) {
            if (runcrawlpos == 0)
                DoubleCrawl();

            runcrawl[--runcrawlpos] = i;
        }

        // Remove a number from the longjump unrolling stack
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.Popcrawl"]/*' />
        protected int Popcrawl() {
            return runcrawl[runcrawlpos++];
        }

        // Get the height of the stack
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.Crawlpos"]/*' />
        protected int Crawlpos() {
            return runcrawl.Length - runcrawlpos;
        }

        // Called by Go() to capture a subexpression. Note that the
        // capnum used here has already been mapped to a non-sparse
        // index (by the code generator RegexWriter).
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.Capture"]/*' />
        protected void Capture(int capnum, int start, int end) {
            if (end < start) {
                int T;

                T = end;
                end = start;
                start = T;
            }

            Crawl(capnum);
            runmatch.AddMatch(capnum, start, end - start);
        }

        // Called by Go() to capture a subexpression. Note that the
        // capnum used here has already been mapped to a non-sparse
        // index (by the code generator RegexWriter).
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.TransferCapture"]/*' />
        protected void TransferCapture(int capnum, int uncapnum, int start, int end) {
            int start2;
            int end2;

            // these are the two intervals that are cancelling each other

            if (end < start) {
                int T;

                T = end;
                end = start;
                start = T;
            }

            start2 = MatchIndex(uncapnum);
            end2 = start2 + MatchLength(uncapnum);

            // The new capture gets the innermost defined interval

            if (start >= end2) {
                end = start;
                start = end2;
            }
            else if (end <= start2) {
                start = start2;
            }
            else {
                if (end > end2)
                    end = end2;
                if (start2 > start)
                    start = start2;
            }

            Crawl(uncapnum);
            runmatch.BalanceMatch(uncapnum);

            if (capnum != -1) {
                Crawl(capnum);
                runmatch.AddMatch(capnum, start, end - start);
            }
        }

        // Called by Go() to revert the last capture
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.Uncapture"]/*' />
        protected void Uncapture() {
            int capnum = Popcrawl();
            runmatch.RemoveMatch(capnum);
        }

        // Call out to runmatch to get around visibility issues
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.IsMatched"]/*' />
        protected bool IsMatched(int cap) {
            return runmatch.IsMatched(cap);
        }

        // Call out to runmatch to get around visibility issues
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.MatchIndex"]/*' />
        protected int MatchIndex(int cap) {
            return runmatch.MatchIndex(cap);
        }

        // Call out to runmatch to get around visibility issues
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.MatchLength"]/*' />
        protected int MatchLength(int cap) {
            return runmatch.MatchLength(cap);
        }

#if DBG
        // Dump the current state
        //| <include file='doc\RegexRunner.uex' path='docs/doc[@for="RegexRunner.DumpState"]/*' />
        public virtual void DumpState() {
            Debug.WriteLine("Text:  " + TextposDescription());
            Debug.WriteLine("Track: " + StackDescription(runtrack, runtrackpos));
            Debug.WriteLine("Stack: " + StackDescription(runstack, runstackpos));
        }

        internal static String StackDescription(int[] A, int Index) {
            StringBuilder Sb = new StringBuilder();

            Sb.Append(A.Length - Index);
            Sb.Append('/');
            Sb.Append(A.Length);

            if (Sb.Length < 8)
                Sb.Append(' ',8 - Sb.Length);

            Sb.Append("(");

            for (int i = Index; i < A.Length; i++) {
                if (i > Index)
                    Sb.Append(' ');
                Sb.Append(A[i]);
            }

            Sb.Append(')');

            return Sb.ToString();
        }

        internal virtual String TextposDescription() {
            StringBuilder Sb = new StringBuilder();
            int remaining;

            Sb.Append(runtextpos);

            if (Sb.Length < 8)
                Sb.Append(' ',8 - Sb.Length);

            if (runtextpos > runtextbeg)
                Sb.Append(RegexCharClass.CharDescription(runtext[runtextpos - 1]));
            else
                Sb.Append('^');

            Sb.Append('>');

            remaining = runtextend - runtextpos;

            for (int i = runtextpos; i < runtextend; i++) {
                Sb.Append(RegexCharClass.CharDescription(runtext[i]));
            }
            if (Sb.Length >= 64) {
                Sb.Length = 61;
                Sb.Append("...");
            }
            else {
                Sb.Append('$');
            }

            return Sb.ToString();
        }
#endif
    }


}