singrdk/base/Kernel/System/Text/RegularExpressions/regexcharclass.cs

//------------------------------------------------------------------------------
// <copyright company='Microsoft Corporation'>
//
//      Copyright (c) Microsoft Corporation.  All rights reserved.
//
// </copyright>
//------------------------------------------------------------------------------

// This RegexCharClass class provides the "set of Unicode chars" functionality
// used by the regexp engine.
//

// RegexCharClass supports a "string representation" of a character class.
// The string representation is NOT human-readable. It is a sequence of
// strictly increasing Unicode characters that begin ranges of characters
// that are alternately included in and excluded from the class.
//
// Membership of a character in the class can be determined by binary
// searching the string representation and determining if the including
// range is at an even or odd index.
//
// The RegexCharClass class itself is a builder class. One can add char ranges
// or sets or invert the class; then, the class can be converted to its
// string representation via RegexCharClass.ToSet().
//
#define ECMA

namespace System.Text.RegularExpressions {

    using System.Collections;
    using System.Globalization;
    using System.Diagnostics;

    internal sealed class RegexCharClass {
        internal const char   Nullchar   = '\0';
        internal const char   Lastchar   = '\uFFFF';

        internal const String Any      = "\0";
        internal const String Empty    = "";
        internal const char GroupChar = (char) 0;
        internal static readonly RegexCharClass AnyClass      = new RegexCharClass("\0");
        internal static readonly RegexCharClass EmptyClass    = new RegexCharClass(String.Empty);

        internal static readonly String Word;
        internal static readonly String NotWord;

        internal const short SpaceConst = 100;
        internal const short NotSpaceConst = -100;
        internal static readonly String Space = ((char) SpaceConst).ToString();
        internal static readonly String NotSpace = NegateCategory(Space);

        internal const String ECMASpace    = "\u0009\u000E\u0020\u0021";
        internal const String NotECMASpace = "\0\u0009\u000E\u0020\u0021";
        internal const String ECMAWord     = "\u0030\u003A\u0041\u005B\u005F\u0060\u0061\u007B\u0130\u0131";
        internal const String NotECMAWord  = "\0\u0030\u003A\u0041\u005B\u005F\u0060\u0061\u007B\u0130\u0131";
        internal const String ECMADigit    = "\u0030\u003A";
        internal const String NotECMADigit = "\0\u0030\u003A";

        internal static Hashtable          _definedCategories;

        internal ArrayList          _rangelist;
        internal StringBuilder      _categories;
        internal bool               _canonical;
        internal bool               _negate;


        static RegexCharClass() {
            _definedCategories = new Hashtable(31);

            char[] groups = new char[9];
            StringBuilder word = new StringBuilder(11);

            word.Append(GroupChar);
            groups[0] = GroupChar;

            // We need the UnicodeCategory enum values as a char so we can put them in a string
            // in the hashtable.  In order to get there, we first must cast to an int,
            // then cast to a char
            // Also need to distinguish between positive and negative values.  UnicodeCategory is zero
            // based, so we add one to each value and subtract it off later

            // Others
            groups[1] = (char) ((int) UnicodeCategory.Control + 1);
            _definedCategories["Cc"] = groups[1].ToString();     // Control
            groups[2] = (char) ((int) UnicodeCategory.Format + 1);
            _definedCategories["Cf"] = groups[2].ToString();     // Format
            groups[3] = (char) ((int) UnicodeCategory.OtherNotAssigned + 1);
            _definedCategories["Cn"] = groups[3].ToString();     // Not assigned
            groups[4] = (char) ((int) UnicodeCategory.PrivateUse + 1);
            _definedCategories["Co"] = groups[4].ToString();     // Private use
            groups[5] = (char) ((int) UnicodeCategory.Surrogate + 1);
            _definedCategories["Cs"] = groups[5].ToString();     // Surrogate

            groups[6] = GroupChar;
            _definedCategories["C"] = new String(groups, 0, 7);

            // Letters
            groups[1] = (char) ((int) UnicodeCategory.LowercaseLetter + 1);
            _definedCategories["Ll"] = groups[1].ToString();     // Lowercase
            groups[2] = (char) ((int) UnicodeCategory.ModifierLetter + 1);
            _definedCategories["Lm"] = groups[2].ToString();     // Modifier
            groups[3] = (char) ((int) UnicodeCategory.OtherLetter + 1);
            _definedCategories["Lo"] = groups[3].ToString();     // Other
            groups[4] = (char) ((int) UnicodeCategory.TitlecaseLetter + 1);
            _definedCategories["Lt"] = groups[4].ToString();     // Titlecase
            groups[5] = (char) ((int) UnicodeCategory.UppercaseLetter + 1);
            _definedCategories["Lu"] = groups[5].ToString();     // Uppercase

            //groups[6] = GroupChar;
            _definedCategories["L"] = new String(groups, 0, 7);
            word.Append(groups[1]);
            word.Append(new String(groups, 3, 3));

            // Marks
            groups[1] = (char) ((int) UnicodeCategory.SpacingCombiningMark + 1);
            _definedCategories["Mc"] = groups[1].ToString();     // Spacing combining
            groups[2] = (char) ((int) UnicodeCategory.EnclosingMark + 1);
            _definedCategories["Me"] = groups[2].ToString();     // Enclosing
            groups[3] = (char) ((int) UnicodeCategory.NonSpacingMark + 1);
            _definedCategories["Mn"] = groups[3].ToString();     // Non-spacing

            groups[4] = GroupChar;
            _definedCategories["M"] = new String(groups, 0, 5);
            //word.Append(groups[1]);
            //word.Append(groups[3]);

            // Numbers
            groups[1] = (char) ((int) UnicodeCategory.DecimalDigitNumber + 1);
            _definedCategories["Nd"] = groups[1].ToString();     // Decimal digit
            groups[2] = (char) ((int) UnicodeCategory.LetterNumber + 1);
            _definedCategories["Nl"] = groups[2].ToString();     // Letter
            groups[3] = (char) ((int) UnicodeCategory.OtherNumber + 1);
            _definedCategories["No"] = groups[3].ToString();     // Other

            //groups[4] = GroupChar;
            _definedCategories["N"] = new String(groups, 0, 5);
            word.Append(groups[1]);
            //word.Append(new String(groups, 1, 3));

            // Punctuation
            groups[1] = (char) ((int) UnicodeCategory.ConnectorPunctuation + 1);
            _definedCategories["Pc"] = groups[1].ToString();     // Connector
            groups[2] = (char) ((int) UnicodeCategory.DashPunctuation + 1);
            _definedCategories["Pd"] = groups[2].ToString();     // Dash
            groups[3] = (char) ((int) UnicodeCategory.ClosePunctuation + 1);
            _definedCategories["Pe"] = groups[3].ToString();     // Close
            groups[4] = (char) ((int) UnicodeCategory.OtherPunctuation + 1);
            _definedCategories["Po"] = groups[4].ToString();     // Other
            groups[5] = (char) ((int) UnicodeCategory.OpenPunctuation + 1);
            _definedCategories["Ps"] = groups[5].ToString();     // Open
            groups[6] = (char) ((int) UnicodeCategory.FinalQuotePunctuation + 1);
            _definedCategories["Pi"] = groups[6].ToString();     // Initial quote
            groups[7] = (char) ((int) UnicodeCategory.InitialQuotePunctuation + 1);
            _definedCategories["Pf"] = groups[7].ToString();     // Final quote

            groups[8] = GroupChar;
            _definedCategories["P"] = new String(groups, 0, 9);
            word.Append(groups[1]);

            // Symbols
            groups[1] = (char) ((int) UnicodeCategory.CurrencySymbol + 1);
            _definedCategories["Sc"] = groups[1].ToString();     // Currency
            groups[2] = (char) ((int) UnicodeCategory.ModifierSymbol + 1);
            _definedCategories["Sk"] = groups[2].ToString();     // Modifier
            groups[3] = (char) ((int) UnicodeCategory.MathSymbol + 1);
            _definedCategories["Sm"] = groups[3].ToString();     // Math
            groups[4] = (char) ((int) UnicodeCategory.OtherSymbol + 1);
            _definedCategories["So"] = groups[4].ToString();     // Other

            groups[5] = GroupChar;
            _definedCategories["S"] = new String(groups, 0, 6);

            // Separators
            groups[1] = (char) ((int) UnicodeCategory.LineSeparator + 1);
            _definedCategories["Zl"] = groups[1].ToString();     // Line
            groups[2] = (char) ((int) UnicodeCategory.ParagraphSeparator + 1);
            _definedCategories["Zp"] = groups[2].ToString();     // Paragraph
            groups[3] = (char) ((int) UnicodeCategory.SpaceSeparator + 1);
            _definedCategories["Zs"] = groups[3].ToString();     // Space

            groups[4] = GroupChar;
            _definedCategories["Z"] = new String(groups, 0, 5);


            word.Append(GroupChar);
            Word = word.ToString();
            NotWord = NegateCategory(Word);

#if DBG
            // make sure the _propTable is correctly ordered
            int len = _propTable.GetLength(0);
            for (int i=0; i<len-1; i++)
                Debug.Assert(String.Compare(_propTable[i,0], _propTable[i+1,0], false, CultureInfo.InvariantCulture) < 0, "RegexCharClass _propTable is out of order at (" + _propTable[i,0] +", " + _propTable[i+1,0] + ")");
#endif
        }

        // RegexCharClass()
        //
        // Creates an empty character class.
        internal RegexCharClass() {
            _rangelist = new ArrayList(6);
            _canonical = true;
            _categories = new StringBuilder();

        }

        // RegexCharClass()
        //
        // Creates a character class out of a string representation.
        internal RegexCharClass(String set) {
            _rangelist = new ArrayList((set.Length + 1) / 2);
            _canonical = true;
            _categories = new StringBuilder();

            AddSet(set);
        }

        // RegexCharClass()
        //
        // Creates a character class with a single range.
        internal RegexCharClass(char first, char last) {
            _rangelist = new ArrayList(1);
            _rangelist.Add(new SingleRange(first, last));
            _canonical = true;
            _categories = new StringBuilder();
        }

        internal static RegexCharClass CreateFromCategory(string categoryName, bool invert, bool caseInsensitive, string pattern) {
            RegexCharClass cc = new RegexCharClass();
            cc.AddCategoryFromName(categoryName, invert, caseInsensitive, pattern);
            return cc;
        }


        // AddCharClass()
        //
        // Adds a regex char class
        internal void AddCharClass(RegexCharClass cc) {
            int i;

            if (_canonical && RangeCount() > 0 && cc.RangeCount() > 0 &&
                cc.Range(cc.RangeCount() - 1)._last <= Range(RangeCount() - 1)._last)
                _canonical = false;

            for (i = 0; i < cc.RangeCount(); i += 1) {
                _rangelist.Add(cc.Range(i));
            }

            _categories.Append(cc._categories.ToString());
        }

        // AddSet()
        //
        // Adds a set (specified by its string representation) to the class.
        internal void AddSet(String set) {
            int i;

            if (_canonical && RangeCount() > 0 && set.Length > 0 &&
                set[0] <= Range(RangeCount() - 1)._last)
                _canonical = false;

            for (i = 0; i < set.Length - 1; i += 2) {
                _rangelist.Add(new SingleRange(set[i], (char)(set[i + 1] - 1)));
            }

            if (i < set.Length) {
                _rangelist.Add(new SingleRange(set[i], Lastchar));
            }
        }

        // AddRange()
        //
        // Adds a single range of characters to the class.
        internal void AddRange(char first, char last) {
            _rangelist.Add(new SingleRange(first, last));
            if (_canonical && _rangelist.Count > 0 &&
                first <= ((SingleRange)_rangelist[_rangelist.Count - 1])._last) {
                _canonical = false;
            }
        }

        internal string Category {
            get {
                //if (_negate)
                //    return NegateCategory(_categories.ToString());
                //else
                    return _categories.ToString();
            }
        }

        internal bool Negate {
            set { _negate = value; }
        }

        internal void AddCategoryFromName(string categoryName, bool invert, bool caseInsensitive, string pattern) {

            object cat = _definedCategories[categoryName];
            if (cat != null) {
                string catstr = (string) cat;

                if (caseInsensitive) {
                    if (categoryName.Equals("Lu") || categoryName.Equals("Lt"))
                        catstr = /*catstr +*/ (string) _definedCategories["Ll"];
                }

                if (invert)
                    catstr = NegateCategory(catstr); // negate the category

                _categories.Append((string) catstr);
            }
            else
                AddSet(SetFromProperty(categoryName, invert, pattern));
        }

        internal void AddCategory(string category) {
            _categories.Append(category);
        }


        /***************************************************************************
            Let U be the set of Unicode character values and let L be the lowercase
            function, mapping from U to U. To perform case insensitive matching of
            character sets, we need to be able to map an interval I in U, say

                I = [chMin, chMax] = { ch : chMin <= ch <= chMax }

            to a set A such that A contains L(I) and A is contained in the union of
            I and L(I).

            The table below partitions U into intervals on which L is non-decreasing.
            Thus, for any interval J = [a, b] contained in one of these intervals,
            L(J) is contained in [L(a), L(b)].

            It is also true that for any such J, [L(a), L(b)] is contained in the
            union of J and L(J). This does not follow from L being non-decreasing on
            these intervals. It follows from the nature of the L on each interval.
            On each interval, L has one of the following forms:

                (1) L(ch) = constant            (LowercaseSet)
                (2) L(ch) = ch + offset         (LowercaseAdd)
                (3) L(ch) = ch | 1              (LowercaseBor)
                (4) L(ch) = ch + (ch & 1)       (LowercaseBad)

            It is easy to verify that for any of these forms [L(a), L(b)] is
            contained in the union of [a, b] and L([a, b]).
        ***************************************************************************/

        internal const int LowercaseSet = 0;    // Set to arg.
        internal const int LowercaseAdd = 1;    // Add arg.
        internal const int LowercaseBor = 2;    // Bitwise or with 1.
        internal const int LowercaseBad = 3;    // Bitwise and with 1 and add original.

        // Lower case mapping descriptor.
        private sealed class LC {
            internal LC(char chMin, char chMax, int lcOp, int data) {
                _chMin = chMin;
                _chMax = chMax;
                _lcOp  = lcOp;
                _data  = data;
            }

            internal char _chMin;
            internal char _chMax;
            internal int _lcOp;
            internal int _data;
        }


        private static readonly LC[] _lcTable = new LC[]
        {
            new LC('\u0041', '\u005A', LowercaseAdd, 32),
            new LC('\u00C0', '\u00DE', LowercaseAdd, 32),
            new LC('\u0100', '\u012E', LowercaseBor, 0),
            new LC('\u0130', '\u0130', LowercaseSet, 0x0069),
            new LC('\u0132', '\u0136', LowercaseBor, 0),
            new LC('\u0139', '\u0147', LowercaseBad, 0),
            new LC('\u014A', '\u0176', LowercaseBor, 0),
            new LC('\u0178', '\u0178', LowercaseSet, 0x00FF),
            new LC('\u0179', '\u017D', LowercaseBad, 0),
            new LC('\u0181', '\u0181', LowercaseSet, 0x0253),
            new LC('\u0182', '\u0184', LowercaseBor, 0),
            new LC('\u0186', '\u0186', LowercaseSet, 0x0254),
            new LC('\u0187', '\u0187', LowercaseSet, 0x0188),
            new LC('\u0189', '\u018A', LowercaseAdd, 205),
            new LC('\u018B', '\u018B', LowercaseSet, 0x018C),
            new LC('\u018E', '\u018F', LowercaseAdd, 202),
            new LC('\u0190', '\u0190', LowercaseSet, 0x025B),
            new LC('\u0191', '\u0191', LowercaseSet, 0x0192),
            new LC('\u0193', '\u0193', LowercaseSet, 0x0260),
            new LC('\u0194', '\u0194', LowercaseSet, 0x0263),
            new LC('\u0196', '\u0196', LowercaseSet, 0x0269),
            new LC('\u0197', '\u0197', LowercaseSet, 0x0268),
            new LC('\u0198', '\u0198', LowercaseSet, 0x0199),
            new LC('\u019C', '\u019C', LowercaseSet, 0x026F),
            new LC('\u019D', '\u019D', LowercaseSet, 0x0272),
            new LC('\u01A0', '\u01A4', LowercaseBor, 0),
            new LC('\u01A7', '\u01A7', LowercaseSet, 0x01A8),
            new LC('\u01A9', '\u01A9', LowercaseSet, 0x0283),
            new LC('\u01AC', '\u01AC', LowercaseSet, 0x01AD),
            new LC('\u01AE', '\u01AE', LowercaseSet, 0x0288),
            new LC('\u01AF', '\u01AF', LowercaseSet, 0x01B0),
            new LC('\u01B1', '\u01B2', LowercaseAdd, 217),
            new LC('\u01B3', '\u01B5', LowercaseBad, 0),
            new LC('\u01B7', '\u01B7', LowercaseSet, 0x0292),
            new LC('\u01B8', '\u01B8', LowercaseSet, 0x01B9),
            new LC('\u01BC', '\u01BC', LowercaseSet, 0x01BD),
            new LC('\u01C4', '\u01C5', LowercaseSet, 0x01C6),
            new LC('\u01C7', '\u01C8', LowercaseSet, 0x01C9),
            new LC('\u01CA', '\u01CB', LowercaseSet, 0x01CC),
            new LC('\u01CD', '\u01DB', LowercaseBad, 0),
            new LC('\u01DE', '\u01EE', LowercaseBor, 0),
            new LC('\u01F1', '\u01F2', LowercaseSet, 0x01F3),
            new LC('\u01F4', '\u01F4', LowercaseSet, 0x01F5),
            new LC('\u01FA', '\u0216', LowercaseBor, 0),
            new LC('\u0386', '\u0386', LowercaseSet, 0x03AC),
            new LC('\u0388', '\u038A', LowercaseAdd, 37),
            new LC('\u038C', '\u038C', LowercaseSet, 0x03CC),
            new LC('\u038E', '\u038F', LowercaseAdd, 63),
            new LC('\u0391', '\u03AB', LowercaseAdd, 32),
            new LC('\u03E2', '\u03EE', LowercaseBor, 0),
            new LC('\u0401', '\u040F', LowercaseAdd, 80),
            new LC('\u0410', '\u042F', LowercaseAdd, 32),
            new LC('\u0460', '\u0480', LowercaseBor, 0),
            new LC('\u0490', '\u04BE', LowercaseBor, 0),
            new LC('\u04C1', '\u04C3', LowercaseBad, 0),
            new LC('\u04C7', '\u04C7', LowercaseSet, 0x04C8),
            new LC('\u04CB', '\u04CB', LowercaseSet, 0x04CC),
            new LC('\u04D0', '\u04EA', LowercaseBor, 0),
            new LC('\u04EE', '\u04F4', LowercaseBor, 0),
            new LC('\u04F8', '\u04F8', LowercaseSet, 0x04F9),
            new LC('\u0531', '\u0556', LowercaseAdd, 48),
            new LC('\u10A0', '\u10C5', LowercaseAdd, 48),
            new LC('\u1E00', '\u1EF8', LowercaseBor, 0),
            new LC('\u1F08', '\u1F0F', LowercaseAdd, -8),
            new LC('\u1F18', '\u1F1F', LowercaseAdd, -8),
            new LC('\u1F28', '\u1F2F', LowercaseAdd, -8),
            new LC('\u1F38', '\u1F3F', LowercaseAdd, -8),
            new LC('\u1F48', '\u1F4D', LowercaseAdd, -8),
            new LC('\u1F59', '\u1F59', LowercaseSet, 0x1F51),
            new LC('\u1F5B', '\u1F5B', LowercaseSet, 0x1F53),
            new LC('\u1F5D', '\u1F5D', LowercaseSet, 0x1F55),
            new LC('\u1F5F', '\u1F5F', LowercaseSet, 0x1F57),
            new LC('\u1F68', '\u1F6F', LowercaseAdd, -8),
            new LC('\u1F88', '\u1F8F', LowercaseAdd, -8),
            new LC('\u1F98', '\u1F9F', LowercaseAdd, -8),
            new LC('\u1FA8', '\u1FAF', LowercaseAdd, -8),
            new LC('\u1FB8', '\u1FB9', LowercaseAdd, -8),
            new LC('\u1FBA', '\u1FBB', LowercaseAdd, -74),
            new LC('\u1FBC', '\u1FBC', LowercaseSet, 0x1FB3),
            new LC('\u1FC8', '\u1FCB', LowercaseAdd, -86),
            new LC('\u1FCC', '\u1FCC', LowercaseSet, 0x1FC3),
            new LC('\u1FD8', '\u1FD9', LowercaseAdd, -8),
            new LC('\u1FDA', '\u1FDB', LowercaseAdd, -100),
            new LC('\u1FE8', '\u1FE9', LowercaseAdd, -8),
            new LC('\u1FEA', '\u1FEB', LowercaseAdd, -112),
            new LC('\u1FEC', '\u1FEC', LowercaseSet, 0x1FE5),
            new LC('\u1FF8', '\u1FF9', LowercaseAdd, -128),
            new LC('\u1FFA', '\u1FFB', LowercaseAdd, -126),
            new LC('\u1FFC', '\u1FFC', LowercaseSet, 0x1FF3),
            new LC('\u2160', '\u216F', LowercaseAdd, 16),
            new LC('\u24B6', '\u24D0', LowercaseAdd, 26),
            new LC('\uFF21', '\uFF3A', LowercaseAdd, 32),
        };

        // AddLowerCase()
        //
        // Adds to the class any lowercase versions of characters already
        // in the class. Used for case-insensitivity.
        internal void AddLowercase(CultureInfo culture) {
            int i;
            int origSize;
            SingleRange range;

            _canonical = false;

            for (i = 0, origSize = _rangelist.Count; i < origSize; i++) {
                range = (SingleRange)_rangelist[i];
                if (range._first == range._last)
                    range._first = range._last = Char.ToLower(range._first);//, culture);
                else
                    AddLowercaseImpl(range._first, range._last, culture);
            }
        }

        // AddLowerCaseImpl()
        //
        // For a single range that's in the set, adds any additional ranges
        // necessary to ensure that lowercase equivalents are also included.
        internal void AddLowercaseImpl(char chMin, char chMax, CultureInfo culture) {
            int i, iMax, iMid;
            char chMinT, chMaxT;
            LC lc;

            if (chMin == chMax) {
                chMin = Char.ToLower(chMin);//, culture);
                if (chMin != chMax)
                    AddRange(chMin, chMin);
                return;
            }

            for (i = 0, iMax = _lcTable.Length; i < iMax; ) {
                iMid = (i + iMax) / 2;
                if (_lcTable[iMid]._chMax < chMin)
                    i = iMid + 1;
                else
                    iMax = iMid;
            }

            if (i >= _lcTable.Length)
                return;

            for ( ; i < _lcTable.Length && (lc = _lcTable[i])._chMin <= chMax; i++) {
                if ((chMinT = lc._chMin) < chMin)
                    chMinT = chMin;

                if ((chMaxT = lc._chMax) > chMax)
                    chMaxT = chMax;

                switch (lc._lcOp) {
                    case LowercaseSet:
                        chMinT = (char)lc._data;
                        chMaxT = (char)lc._data;
                        break;
                    case LowercaseAdd:
                        chMinT += (char)lc._data;
                        chMaxT += (char)lc._data;
                        break;
                    case LowercaseBor:
                        chMinT |= (char)1;
                        chMaxT |= (char)1;
                        break;
                    case LowercaseBad:
                        chMinT += (char)(chMinT & 1);
                        chMaxT += (char)(chMaxT & 1);
                        break;
                }

                if (chMinT < chMin || chMaxT > chMax)
                    AddRange(chMinT, chMaxT);
            }
        }


        // ToSet()
        //
        // Constructs the string representation of the class.
        internal String ToSet() {
            int i;
            StringBuilder sb;

            if (!_canonical)
                Canonicalize();

            if (_negate) {
                sb = new StringBuilder(_rangelist.Count * 2 + 2);
                sb.Append(Nullchar);
                sb.Append(Nullchar);
            }
            else
                sb = new StringBuilder(_rangelist.Count * 2);


            for (i = 0; i < _rangelist.Count; i++) {
                sb.Append(((SingleRange)_rangelist[i])._first);

                if (((SingleRange)_rangelist[i])._last != Lastchar)
                    sb.Append((char)(((SingleRange)_rangelist[i])._last + 1));
            }

            return sb.ToString();
        }

        // ToSetCi()
        //
        // Constructs the string representation of the class.
        internal String ToSetCi(bool caseInsensitive, CultureInfo culture) {
            if (caseInsensitive)
                AddLowercase(culture);

            return ToSet();
        }

        // SetSize()
        //
        // Returns the number of characters included in the set.
        internal static int SetSize(String set) {
            int i;
            int c;

            c = 0;

            for (i = 0; i < set.Length - 1; i += 2) {
                c += set[i + 1] - set[i];
            }

            if (i < set.Length) {
                c += 0x10000 - set[i];
            }

            return c;
        }

        // SetInverse()
        //
        // Inverts a string representation of a class directly.
        internal static String SetInverse(String set) {
            if (set.Length == 0 || set[0] != Nullchar)
                return Any + set;

            if (set.Length == 1)
                return Empty;

            return set.Substring(1, set.Length - 1);
        }


        // SetUnion()
        //
        // Builds the union of two string representations of a class directly.
        internal static String SetUnion(String setI, String setJ) {
            int i;
            int j;
            int s;
            String swap;
            StringBuilder sb;
            char chExc;

            if (setI.Equals(Empty) || setJ.Equals(Any))
                return setJ;

            if (setJ.Equals(Empty) || setI.Equals(Any))
                return setI;

            if (setI == setJ)
                return setI;

            i = 0;
            j = 0;
            sb = new StringBuilder(setI.Length + setJ.Length);

            for (;;) {
                if (j == setJ.Length) {
                    sb.Append(setI, i, setI.Length - i);
                    break;
                }

                if (i == setI.Length) {
                    sb.Append(setJ, j, setJ.Length - j);
                    break;
                }

                if (setJ[j] > setI[i]) {
                    s = i;
                    i = j;
                    j = s;
                    swap = setI;
                    setI = setJ;
                    setJ = swap;
                }

                sb.Append(setJ[j++]);
                if (j == setJ.Length)
                    break;

                chExc = setJ[j++];

                for (;;) {
                    while (i < setI.Length && setI[i] <= chExc)
                        i++;

                    if ((i & 0x1) == 0) {
                        sb.Append(chExc);
                        goto OuterContinue;
                    }
                    else {
                        if (i == setI.Length)
                            goto OuterBreak;

                        chExc = setI[i++];
                    }

                    s = i;
                    i = j;
                    j = s;
                    swap = setI;
                    setI = setJ;
                    setJ = swap;
                }

                OuterContinue:
                ;
            }

            OuterBreak:
            ;

            return sb.ToString();
        }

        internal static String CategoryUnion(string catI, string catJ) {
            return catI + catJ;
        }

        // SetFromChar()
        //
        // Builds the string representations of a class with a single character.
        internal static String SetFromChar(char ch) {
            StringBuilder sb = new StringBuilder(2);

            sb.Append(ch);

            if (ch != Lastchar)
                sb.Append((char)(ch + 1));

            return sb.ToString();
        }

        // SetInverseFromChar()
        //
        // Builds the string representation of a class that omits a single character.
        internal static String SetInverseFromChar(char ch) {
            StringBuilder sb = new StringBuilder(3);

            if (ch != Nullchar) {
                sb.Append(Nullchar);
                sb.Append(ch);
            }

            if (ch != Lastchar)
                sb.Append((char)(ch + 1));

            return sb.ToString();
        }

        // IsSingleton()
        //
        // True if the set contains a single character only
        internal static bool IsSingleton(String set) {
            return(set.Length == 2 && set[0] == set[1] - 1); // && _categories.Length == 0);
        }

        // SingletonChar()
        //
        // Returns the char
        internal static char SingletonChar(String set) {
            return set[0];
        }

        internal static bool IsECMAWordChar(char ch) {
            return CharInSet(ch, ECMAWord, String.Empty);
        }

        internal static bool IsWordChar(char ch) {
            return CharInCategory(ch, Word);
        }

        internal static bool CharInSet(char ch, String set, String category) {
            bool b = CharInSetInternal(ch, set, category);

            if (set.Length >= 2 && (set[0] == 0) && (set[1] == 0))
                return !b;
            else
                return b;
        }

        // CharInSet()
        //
        // Determines a character's membership in a character class (via the
        // string representation of the class).
        internal static bool CharInSetInternal(char ch, string set, String category) {
            int min;
            int max;
            int mid;
            min = 0;
            max = set.Length;

            while (min != max) {
                mid = (min + max) / 2;
                if (ch < set[mid])
                    max = mid;
                else
                    min = mid + 1;
            }

            if ((min & 0x1) != 0)
                return true;
            else
                return CharInCategory(ch, category);
        }

        internal static bool CharInCategory(char ch, string category) {

            if (category.Length == 0)
                return false;

            UnicodeCategory chcategory = char.GetUnicodeCategory(ch);

            int i=0;
            while (i<category.Length) {
                int curcat = (short) category[i];

                if (curcat == 0) {
                    // zero is our marker for a group of categories - treated as a unit
                    if (CharInCategoryGroup(ch, chcategory, category, ref i))
                        return true;
                }
                else if (curcat > 0) {
                    // greater than zero is a positive case

                    if (curcat  == SpaceConst) {
                        if (Char.IsWhiteSpace(ch))
                            return true;
                        else  {
                            i++;
                            continue;
                        }
                    }
                    --curcat;

                    if (chcategory == (UnicodeCategory) curcat)
                        return true;
                }
                else {
                    // less than zero is a negative case
                    if (curcat == NotSpaceConst) {
                        if (!Char.IsWhiteSpace(ch))
                            return true;
                        else  {
                            i++;
                            continue;
                        }
                    }

                    curcat = -curcat;
                    --curcat;

                    if (chcategory != (UnicodeCategory) curcat)
                        return true;
                }
                i++;
            }
            return false;
        }

       //  CharInCategoryGroup
       //  This is used for categories which are composed of other categories - L, N, Z, W...
       //  These groups need special treatment when they are negated
        private static bool CharInCategoryGroup(char ch, UnicodeCategory chcategory, string category, ref int i) {
            i++;

            int curcat = (short) category[i];
            if (curcat > 0) {
                // positive case - the character must be in ANY of the categories in the group
                bool answer = false;

                while (curcat != 0) {
                    if (!answer) {
                        --curcat;
                        if (chcategory == (UnicodeCategory) curcat)
                            answer = true;
                    }
                    i++;
                    curcat = (short) category[i];
                }
                return answer;
            }
            else {

                // negative case - the character must be in NONE of the categories in the group
                bool answer = true;

                while (curcat != 0) {
                    if (answer) {
                        curcat = -curcat;
                        --curcat;
                        if (chcategory == (UnicodeCategory) curcat)
                            answer = false;
                    }
                    i++;
                    curcat = (short) category[i];
                }
                return answer;
            }
        }

        internal static string NegateCategory(string category) {
            if (category == null)
                return null;

            StringBuilder sb = new StringBuilder();

            for (int i=0; i<category.Length; i++) {
                short ch = (short) category[i];
                sb.Append( (char) -ch);
            }
            return sb.ToString();
        }

        // RangeCount()
        //
        // The number of single ranges that have been accumulated so far.
        private int RangeCount() {
            return _rangelist.Count;
        }

        // Range(int i)
        //
        // The ith range.
        private SingleRange Range(int i) {
            return(SingleRange)_rangelist[i];
        }

        // SingleRangeComparer
        //
        // For sorting ranges; compare based on the first char in the range.
        private sealed class SingleRangeComparer : IComparer {
            public int Compare(Object x, Object y) {
                return(((SingleRange)x)._first < ((SingleRange)y)._first ? -1
                       : (((SingleRange)x)._first > ((SingleRange)y)._first ? 1 : 0));
            }
        }

        // SingleRange
        //
        // A first/last pair representing a single range of characters.
        private sealed class SingleRange {
            internal SingleRange(char first, char last) {
                _first = first;
                _last = last;
            }

            internal char _first;
            internal char _last;
        }

        // Canonicalize()
        //
        // Logic to reduce a character class to a unique, sorted form.
        private void Canonicalize() {
            SingleRange CurrentRange;
            int i;
            int j;
            char last;
            bool Done;

            _canonical = true;
            _rangelist.Sort(0, _rangelist.Count, new SingleRangeComparer());

            //
            // Find and eliminate overlapping or abutting ranges
            //

            if (_rangelist.Count > 1) {
                Done = false;

                for (i = 1, j = 0; ; i++) {
                    for (last = ((SingleRange)_rangelist[j])._last; ; i++) {
                        if (i == _rangelist.Count || last == Lastchar) {
                            Done = true;
                            break;
                        }

                        if ((CurrentRange = (SingleRange)_rangelist[i])._first > last + 1)
                            break;

                        if (last < CurrentRange._last)
                            last = CurrentRange._last;
                    }

                    ((SingleRange)_rangelist[j])._last = last;

                    j++;

                    if (Done)
                        break;

                    if (j < i)
                        _rangelist[j] = _rangelist[i];
                }
                _rangelist.RemoveRange(j, _rangelist.Count - j);
            }
        }

        //   The property table contains all the block definitions defined in the
        //   XML schema spec (http://www.w3.org/TR/2001/PR-xmlschema-2-20010316/#charcter-classes), Unicode 3.0 spec (www.unicode.org),
        //   and Perl 5.6 (see Programming Perl, 3rd edition page 167).   Three blocks defined by Perl (and here) may
        //   not be in the Unicode: IsHighPrivateUseSurrogates, IsHighSurrogates, and IsLowSurrogates.
        //
        //   In addition, there was some inconsistency in the definition of IsTibetan and IsArabicPresentationForms-B.
        //   Regex goes with with the XML spec on both of these, since it seems to be (oddly enough) more correct than the Unicode spec!
        //
        //   This is what we use:
        //   IsTibetan:  0xF00 - 0x0FFF
        //   IsArabicPresentationForms-B: 0xFE70-0xFEFE
        //
        //   The Unicode spec is inconsistent for IsTibetan.  Its range is 0x0F00 - 0x0FBF.  However, it clearly defines
        //   Tibetan characters above 0x0FBF.  This appears to be an error between the 2.0 and 3.0 spec.
        //
        //   The Unicode spec is also unclear on IsArabicPresentationForms-B, defining it as 0xFE70-0xFEFF.
        //   There is only one character different here, 0xFEFF, which is a byte-order mark character and
        //   is labeled in the spec as special.  I have excluded it from IsArabicPresentationForms-B and left it in IsSpecial.
        // Has to be sorted by the first column
        private static readonly String[,] _propTable = {
            {"_xmlC", /* Name Char              */   "\u002D\u002F\u0030\u003B\u0041\u005B\u005F\u0060\u0061\u007B\u00AA\u00AB\u00B2\u00B4\u00B5\u00B6\u00B9\u00BB\u00BC\u00BF\u00C0\u00D7\u00D8\u00F7\u00F8\u01AA\u01AB\u01BB\u01BC\u01BE\u01C4\u01F6\u01FA\u0218\u0250\u02A9\u0386\u0387\u0388\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03D7\u03DA\u03DB\u03DC\u03DD\u03DE\u03DF\u03E0\u03E1\u03E2\u03F3\u0401\u040D\u040E\u0450\u0451\u045D\u045E\u0482\u0490\u04C0\u04C1\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04EC\u04EE\u04F6\u04F8\u04FA\u0531\u0557\u0561\u0588\u0660\u066A\u06F0\u06FA\u0966\u0970\u09E6\u09F0\u09F4\u09FA\u0A66\u0A70\u0AE6\u0AF0\u0B66\u0B70\u0BE7\u0BF3\u0C66\u0C70\u0CE6\u0CF0\u0D66\u0D70\u0E50\u0E5A\u0ED0\u0EDA\u0F20\u0F34\u10A0\u10C6\u10D0\u10F7\u1E00\u1E9C\u1EA0\u1EFA\u1F00\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FBD\u1FBE\u1FBF\u1FC2\u1FC5\u1FC6\u1FCD\u1FD0\u1FD4\u1FD6\u1FDC\u1FE0\u1FED\u1FF2\u1FF5\u1FF6\u1FFD\u2070\u2071\u2074"
                +"\u207A\u207F\u208A\u20A8\u20A9\u2102\u2103\u2107\u2108\u210A\u2114\u2115\u211E\u2120\u2123\u2124\u2125\u2126\u2127\u2128\u2129\u212A\u2132\u2133\u2135\u2153\u2183\u2460\u249C\u24B6\u24EB\u2776\u2794\u3007\u3008\u3021\u302A\u3280\u328A\u3372\u3375\u3376\u3377\u3385\u338A\u338D\u3391\u3399\u339F\u33A9\u33AA\u33AD\u33AE\u33B0\u33B4\u33B9\u33BA\u33BF\u33C0\u33C1\u33C2\u33C3\u33C6\u33C7\u33C8\u33C9\u33D8\u33D9\u33DE"},
            {"_xmlD",                                "\u0030\u003A\u0660\u066A\u06F0\u06FA\u0966\u0970\u09E6\u09F0\u0A66\u0A70\u0AE6\u0AF0\u0B66\u0B70\u0BE7\u0BF0\u0C66\u0C70\u0CE6\u0CF0\u0D66\u0D70\u0E50\u0E5A\u0ED0\u0EDA\u0F20\u0F2A\u2070\u2071\u2074\u207A\u2080\u208A"},
            {"_xmlI", /* Start Name Char       */   "\u003A\u003B\u0041\u005B\u005F\u0060\u0061\u007B\u00A8\u00A9\u00AA\u00AB\u00AF\u00B0\u00B4\u00B6\u00B8\u00B9\u00BA\u00BB\u00C0\u00D7\u00D8\u00F7\u00F8\u01F6\u01FA\u0218\u0250\u02A9\u02B0\u02DF\u02E0\u02EA\u0374\u0375\u037A\u037B\u0384\u0387\u0388\u038B\u038C\u038D\u038E\u03A2\u03A3\u03CF\u03D0\u03D7\u03DA\u03DB\u03DC\u03DD\u03DE\u03DF\u03E0\u03E1\u03E2\u03F4\u0401\u040D\u040E\u0450\u0451\u045D\u045E\u0482\u0490\u04C5\u04C7\u04C9\u04CB\u04CD\u04D0\u04EC\u04EE\u04F6\u04F8\u04FA\u0531\u0557\u0559\u055A\u0561\u0588\u05D0\u05EB\u05F0\u05F3\u0621\u063B\u0640\u064B\u0671\u06B8\u06BA\u06BF\u06C0\u06CF\u06D0\u06D4\u06D5\u06D6\u06E5\u06E7\u0905\u093A\u0958\u0962\u0985\u098D\u098F\u0991\u0993\u09A9\u09AA\u09B1\u09B2\u09B3\u09B6\u09BA\u09DC\u09DE\u09DF\u09E2\u09F0\u09F2\u0A05\u0A0B\u0A0F\u0A11\u0A13\u0A29\u0A2A\u0A31\u0A32\u0A34\u0A35\u0A37\u0A38\u0A3A\u0A59\u0A5D\u0A5E\u0A5F\u0A85\u0A8C\u0A8D\u0A8E\u0A8F\u0A92\u0A93\u0AA9\u0AAA\u0AB1\u0AB2\u0AB4\u0AB5\u0ABA\u0AE0\u0AE1\u0B05"
                +"\u0B0D\u0B0F\u0B11\u0B13\u0B29\u0B2A\u0B31\u0B32\u0B34\u0B36\u0B3A\u0B5C\u0B5E\u0B5F\u0B62\u0B85\u0B8B\u0B8E\u0B91\u0B92\u0B96\u0B99\u0B9B\u0B9C\u0B9D\u0B9E\u0BA0\u0BA3\u0BA5\u0BA8\u0BAB\u0BAE\u0BB6\u0BB7\u0BBA\u0C05\u0C0D\u0C0E\u0C11\u0C12\u0C29\u0C2A\u0C34\u0C35\u0C3A\u0C60\u0C62\u0C85\u0C8D\u0C8E\u0C91\u0C92\u0CA9\u0CAA\u0CB4\u0CB5\u0CBA\u0CDE\u0CDF\u0CE0\u0CE2\u0D05\u0D0D\u0D0E\u0D11\u0D12\u0D29\u0D2A\u0D3A\u0D60\u0D62\u0E01\u0E31\u0E32\u0E34\u0E40\u0E47\u0E4F\u0E50\u0E5A\u0E5C\u0E81\u0E83\u0E84\u0E85\u0E87\u0E89\u0E8A\u0E8B\u0E8D\u0E8E\u0E94\u0E98\u0E99\u0EA0\u0EA1\u0EA4\u0EA5\u0EA6\u0EA7\u0EA8\u0EAA\u0EAC\u0EAD\u0EAF\u0EB0\u0EB1\u0EB2\u0EB4\u0EBD\u0EBE\u0EC0\u0EC5\u0EDC\u0EDE\u0F18\u0F1A\u0F40\u0F48\u0F49\u0F6A\u10A0\u10C6\u10D0\u10F7\u1100\u115A\u115F\u11A3\u11A8\u11FA\u1E00\u1E9C\u1EA0\u1EFA\u1F00\u1F16\u1F18\u1F1E\u1F20\u1F46\u1F48\u1F4E\u1F50\u1F58\u1F59\u1F5A\u1F5B\u1F5C\u1F5D\u1F5E\u1F5F\u1F7E\u1F80\u1FB5\u1FB6\u1FC5\u1FC6\u1FD4\u1FD6\u1FDC\u1FDD\u1FF0"
                +"\u1FF2\u1FF5\u1FF6\u1FFF\u207F\u2080\u20A8\u20A9\u2102\u2103\u2107\u2108\u210A\u2114\u2115\u211E\u2120\u2123\u2124\u2125\u2126\u2127\u2128\u2129\u212A\u2132\u2133\u2139\u24B6\u24EA\u3041\u3095\u309B\u309F\u30A1\u30FB\u30FC\u30FF\u3105\u312D\u3131\u318F\u3192\u31A0\u3260\u327C\u328A\u32B1\u32D0\u32FF\u3300\u3358\u3371\u3377\u337B\u3395\u3399\u339F\u33A9\u33AE\u33B0\u33C2\u33C3\u33C6\u33C7\u33D8\u33D9\u33DE\u4E00\u4E01\u9FA5\u9FA6\uAC00\uAC01\uD7A3\uD7A4\uF900"},
            {"_xmlW",                                "\u0023\u0025\u0026\u0027\u002A\u002C\u0030\u003A\u003C\u003F\u0040\u005B\u005E\u007B\u007C\u007D\u007E\u007F\u00A2\u00AB\u00AC\u00AD\u00AE\u00B7\u00B8\u00BB\u00BC\u00BF\u00C0\u037E\u037F\u0387\u0388\u055A\u0560\u0589\u058A\u05BE\u05BF\u05C0\u05C1\u05C3\u05C4\u05F3\u05F5\u060C\u060D\u061B\u061C\u061F\u0620\u06D4\u06D5\u093D\u093E\u0970\u0971\u0ABD\u0ABE\u0B3D\u0B3E\u0EAF\u0EB0\u0F04\u0F13\u0F3A\u0F3E\u0F85\u0F86\u10FB\u10FC\u2000\u202F\u2030\u203D\u2045\u2047\u206A\u2070\u207D\u207F\u208D\u208F\u2329\u232B\u3000\u3004\u3005\u3007\u3008\u3012\u3014\u301D\u3030\u3031\u30FB\u30FC\uD800\uD801\uDB7F\uDB81\uDBFF\uDC01\uDFFF\uE001\uF8FF\uF900\uFD3E\uFD40\uFE30\uFE33\uFE35\uFE45\uFE50\uFE53\uFE54\uFE5F\uFE63\uFE64\uFE68\uFE69\uFE6A\uFE6B\uFEFF\uFF00\uFF01\uFF03\uFF05\uFF06\uFF07\uFF0A\uFF0C\uFF10\uFF1A\uFF1C\uFF1F\uFF20\uFF3B\uFF3E\uFF5B\uFF5C\uFF5D\uFF5E\uFF61\uFF66"},

            {"IsAlphabeticPresentationForms",       "\uFB00\uFB50"},
            {"IsArabic",                            "\u0600\u0700"},
            {"IsArabicPresentationForms-A",         "\uFB50\uFE00"},
            {"IsArabicPresentationForms-B",         "\uFE70\uFEFF"},
            {"IsArmenian",                          "\u0530\u0590"},
            {"IsArrows",                            "\u2190\u2200"},
            {"IsBasicLatin",                        "\u0000\u0080"},
            {"IsBengali",                           "\u0980\u0A00"},
            {"IsBlockElements",                     "\u2580\u25A0"},
            {"IsBopomofo",                          "\u3100\u3130"},
            {"IsBopomofoExtended",                  "\u31A0\u31C0"},
            {"IsBoxDrawing",                        "\u2500\u2580"},
            {"IsBraillePatterns",                   "\u2800\u2900"},
            {"IsCherokee",                          "\u13A0\u1400"},
            {"IsCJKCompatibility",                  "\u3300\u3400"},
            {"IsCJKCompatibilityForms",             "\uFE30\uFE50"},
            {"IsCJKCompatibilityIdeographs",        "\uF900\uFB00"},
            {"IsCJKRadicalsSupplement",             "\u2E80\u2F00"},
            {"IsCJKSymbolsandPunctuation",          "\u3000\u3040"},
            {"IsCJKUnifiedIdeographs",              "\u4E00\uA000"},
            {"IsCJKUnifiedIdeographsExtensionA",    "\u3400\u4DB6"},
            {"IsCombiningDiacriticalMarks",         "\u0300\u0370"},
            {"IsCombiningHalfMarks",                "\uFE20\uFE30"},
            {"IsCombiningMarksforSymbols",          "\u20D0\u2100"},
            {"IsControlPictures",                   "\u2400\u2440"},
            {"IsCurrencySymbols",                   "\u20A0\u20D0"},
            {"IsCyrillic",                          "\u0400\u0500"},
            {"IsDevanagari",                        "\u0900\u0980"},
            {"IsDingbats",                          "\u2700\u27C0"},
            {"IsEnclosedAlphanumerics",             "\u2460\u2500"},
            {"IsEnclosedCJKLettersandMonths",       "\u3200\u3300"},
            {"IsEthiopic",                          "\u1200\u1380"},
            {"IsGeneralPunctuation",                "\u2000\u2070"},
            {"IsGeometricShapes",                   "\u25A0\u2600"},
            {"IsGeorgian",                          "\u10A0\u1100"},
            {"IsGreek",                             "\u0370\u0400"},
            {"IsGreekExtended",                     "\u1F00\u2000"},
            {"IsGujarati",                          "\u0A80\u0B00"},
            {"IsGurmukhi",                          "\u0A00\u0A80"},
            {"IsHalfwidthandFullwidthForms",        "\uFF00\uFFF0"},
            {"IsHangulCompatibilityJamo",           "\u3130\u3190"},
            {"IsHangulJamo",                        "\u1100\u1200"},
            {"IsHangulSyllables",                   "\uAC00\uD7A4"},
            {"IsHebrew",                            "\u0590\u0600"},
            {"IsHighPrivateUseSurrogates",          "\uDB80\uDC00"},
            {"IsHighSurrogates",                    "\uD800\uDB80"},
            {"IsHiragana",                          "\u3040\u30A0"},
            {"IsIdeographicDescriptionCharacters",  "\u2FF0\u3000"},
            {"IsIPAExtensions",                     "\u0250\u02B0"},
            {"IsKanbun",                            "\u3190\u31A0"},
            {"IsKangxiRadicals",                    "\u2F00\u2FE0"},
            {"IsKannada",                           "\u0C80\u0D00"},
            {"IsKatakana",                          "\u30A0\u3100"},
            {"IsKhmer",                             "\u1780\u1800"},
            {"IsLao",                               "\u0E80\u0F00"},
            {"IsLatin-1Supplement",                 "\u0080\u0100"},
            {"IsLatinExtended-A",                   "\u0100\u0180"},
            {"IsLatinExtendedAdditional",           "\u1E00\u1F00"},
            {"IsLatinExtended-B",                   "\u0180\u0250"},
            {"IsLetterlikeSymbols",                 "\u2100\u2150"},
            {"IsLowSurrogates",                     "\uDC00\uE000"},
            {"IsMalayalam",                         "\u0D00\u0D80"},
            {"IsMathematicalOperators",             "\u2200\u2300"},
            {"IsMiscellaneousSymbols",              "\u2600\u2700"},
            {"IsMiscellaneousTechnical",            "\u2300\u2400"},
            {"IsMongolian",                         "\u1800\u18B0"},
            {"IsMyanmar",                           "\u1000\u10A0"},
            {"IsNumberForms",                       "\u2150\u2190"},
            {"IsOgham",                             "\u1680\u16A0"},
            {"IsOpticalCharacterRecognition",       "\u2440\u2460"},
            {"IsOriya",                             "\u0B00\u0B80"},
            {"IsPrivateUse",                        "\uE000\uF900"},
            {"IsRunic",                             "\u16A0\u1700"},
            {"IsSinhala",                           "\u0D80\u0E00"},
            {"IsSmallFormVariants",                 "\uFE50\uFE70"},
            {"IsSpacingModifierLetters",            "\u02B0\u0300"},
            {"IsSpecials",                          "\uFEFF\uFF00\uFFF0\uFFFE"},
            {"IsSuperscriptsandSubscripts",         "\u2070\u20A0"},
            {"IsSyriac",                            "\u0700\u0750"},
            {"IsTamil",                             "\u0B80\u0C00"},
            {"IsTelugu",                            "\u0C00\u0C80"},
            {"IsThaana",                            "\u0780\u07C0"},
            {"IsThai",                              "\u0E00\u0E80"},
            {"IsTibetan",                           "\u0F00\u1000"},
            {"IsUnifiedCanadianAboriginalSyllabics","\u1400\u1680"},
            {"IsYiRadicals",                        "\uA490\uA4D0"},
            {"IsYiSyllables",                       "\uA000\uA490"},
        };

        internal static String SetFromProperty(String capname, bool invert, string pattern) {
            int min = 0;
            int max = _propTable.GetLength(0);
            while (min != max) {
                int mid = (min + max) / 2;
                int res = String.Compare(capname, _propTable[mid,0], false);//, CultureInfo.InvariantCulture);
                if (res < 0)
                    max = mid;
                else if (res > 0)
                    min = mid + 1;
                else {
                    String set = _propTable[mid,1];
                    return invert ? SetInverse(set): set;
                }
            }
            throw new ArgumentException("Unknown property");//XXX: SR.GetString(SR.MakeException, pattern, SR.GetString(SR.UnknownProperty, capname)), pattern);
            //return invert ? Any : Empty ;
        }

#if DBG
        // SetDescription()
        //
        // Produces a human-readable description for a set string.
        internal static String SetDescription(String set) {
            if (set.Equals(Any))
                return "[^]";

            if (set.Equals(Empty))
                return "[]";

            StringBuilder desc = new StringBuilder("[");

            int index;
            char ch1;
            char ch2;

            if (set[0] == Nullchar) {
                index = 1;
                desc.Append('^');
            }
            else {
                index = 0;
            }

            while (index < set.Length) {
                ch1 = set[index];
                if (index + 1 < set.Length)
                    ch2 = (char)(set[index + 1] - 1);
                else
                    ch2 = Lastchar;

                desc.Append(CharDescription(ch1));

                if (ch2 != ch1) {
                    if (ch1 + 1 != ch2)
                        desc.Append('-');
                    desc.Append(CharDescription(ch2));
                }
                index += 2;
            }

            desc.Append(']');

            return desc.ToString();
        }

        internal static readonly char [] Hex = new char [] {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};

        // CharDescription()
        //
        // Produces a human-readable description for a single character.
        internal static String CharDescription(char ch) {
            StringBuilder sb = new StringBuilder();
            int shift;

            if (ch == '\\')
                return "\\\\";

            if (ch >= ' ' && ch <= '~') {
                sb.Append(ch);
                return sb.ToString();
            }

            if (ch < 256) {
                sb.Append("\\x");
                shift = 8;
            }
            else {
                sb.Append("\\u");
                shift = 16;
            }

            while (shift > 0) {
                shift -= 4;
                sb.Append(Hex[(ch >> shift) & 0xF]);
            }

            return sb.ToString();
        }
#endif

    }

}