// ==++== // // Copyright (c) Microsoft Corporation. All rights reserved. // // ==--== namespace System.Text { using System; using System.Collections; using System.Runtime.CompilerServices; using System.Globalization; // This abstract base class represents a character encoding. The class provides // methods to convert arrays and strings of Unicode characters to and from // arrays of bytes. A number of Encoding implementations are provided in // the System.Text package, including: // // ASCIIEncoding, which encodes Unicode characters as single 7-bit // ASCII characters. This encoding only supports character values between 0x00 // and 0x7F. // UnicodeEncoding, which encodes each Unicode character as two // consecutive bytes. Both little-endian (code page 1200) and big-endian (code // page 1201) encodings are supported. // UTF8Encoding, which encodes Unicode characters using the UTF-8 // encoding (UTF-8 stands for UCS Transformation Format, 8-bit form). This // encoding supports all Unicode character values, and can also be accessed // as code page 65001. // // In addition to directly instantiating Encoding objects, an // application can use the ForCodePage, GetASCII, // GetDefault, GetUnicode, and GetUTF8 // methods in this class to obtain encodings. // // Through an encoding, the GetBytes method is used to convert arrays // of characters to arrays of bytes, and the GetChars method is used to // convert arrays of bytes to arrays of characters. The GetBytes and // GetChars methods maintain no state between conversions, and are // generally intended for conversions of complete blocks of bytes and // characters in one operation. When the data to be converted is only available // in sequential blocks (such as data read from a stream) or when the amount of // data is so large that it needs to be divided into smaller blocks, an // application may choose to use a Decoder or an Encoder to // perform the conversion. Decoders and encoders allow sequential blocks of // data to be converted and they maintain the state required to support // conversions of data that spans adjacent blocks. Decoders and encoders are // obtained using the GetDecoder and GetEncoder methods. // // The core GetBytes and GetChars methods require the caller // to provide the destination buffer and ensure that the buffer is large enough // to hold the entire result of the conversion. When using these methods, // either directly on an Encoding object or on an associated // Decoder or Encoder, an application can use one of two methods // to allocate destination buffers. // // The GetByteCount and GetCharCount methods can be used to // compute the exact size of the result of a particular conversion, and an // appropriately sized buffer for that conversion can then be allocated. // The GetMaxByteCount and GetMaxCharCount methods can be // be used to compute the maximum possible size of a conversion of a given // number of bytes or characters, and a buffer of that size can then be reused // for multiple conversions. // // The first method generally uses less memory, whereas the second method // generally executes faster. // //| public abstract class Encoding { private static Encoding unicodeEncoding; private static Encoding bigEndianUnicode; private static Encoding utf8Encoding; private static Encoding asciiEncoding; private static Hashtable encodings; internal int m_codePage; // Useful for Encodings whose GetPreamble method must return an // empty byte array. internal static readonly byte[] emptyByteArray = new byte[0]; //| protected Encoding() : this(0) { } //| protected Encoding(int codePage) { if (codePage < 0) { throw new ArgumentOutOfRangeException("codePage"); } m_codePage = codePage; } // Converts a byte array from one encoding to another. The bytes in the // bytes array are converted from srcEncoding to // dstEncoding, and the returned value is a new byte array // containing the result of the conversion. // //| public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding, byte[] bytes) { if (bytes == null) throw new ArgumentNullException("bytes"); return Convert(srcEncoding, dstEncoding, bytes, 0, bytes.Length); } // Converts a range of bytes in a byte array from one encoding to another. // This method converts count bytes from bytes starting at // index index from srcEncoding to dstEncoding, and // returns a new byte array containing the result of the conversion. // //| public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding, byte[] bytes, int index, int count) { if (srcEncoding == null || dstEncoding == null) { throw new ArgumentNullException((srcEncoding == null ? "srcEncoding" : "dstEncoding"), "ArgumentNull_Array"); } if (bytes == null) { throw new ArgumentNullException("bytes", "ArgumentNull_Array"); } return dstEncoding.GetBytes(srcEncoding.GetChars(bytes, index, count)); } //| public virtual byte[] GetPreamble() { return emptyByteArray; } // Returns an encoding for the ASCII character set. The returned encoding // will be an instance of the ASCIIEncoding class. // //| public static Encoding ASCII { get { if (asciiEncoding == null) asciiEncoding = new ASCIIEncoding(); return asciiEncoding; } } // Returns the number of bytes required to encode the given character // array. // //| public virtual int GetByteCount(char[] chars) { if (chars == null) { throw new ArgumentNullException("chars", "ArgumentNull_Array"); } return GetByteCount(chars, 0, chars.Length); } //| public virtual int GetByteCount(String s) { if (s == null) throw new ArgumentNullException("s"); char[] chars = s.ToCharArray(); return GetByteCount(chars, 0, chars.Length); } // Returns the number of bytes required to encode a range of characters in // a character array. // //| public abstract int GetByteCount(char[] chars, int index, int count); // Returns a byte array containing the encoded representation of the given // character array. // //| public virtual byte[] GetBytes(char[] chars) { if (chars == null) { throw new ArgumentNullException("chars", "ArgumentNull_Array"); } return GetBytes(chars, 0, chars.Length); } // Returns a byte array containing the encoded representation of a range // of characters in a character array. // //| public virtual byte[] GetBytes(char[] chars, int index, int count) { byte[] result = new byte[GetByteCount(chars, index, count)]; GetBytes(chars, index, count, result, 0); return result; } // Encodes a range of characters in a character array into a range of bytes // in a byte array. An exception occurs if the byte array is not large // enough to hold the complete encoding of the characters. The // GetByteCount method can be used to determine the exact number of // bytes that will be produced for a given range of characters. // Alternatively, the GetMaxByteCount method can be used to // determine the maximum number of bytes that will be produced for a given // number of characters, regardless of the actual character values. // //| public abstract int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex); // Returns a byte array containing the encoded representation of the given // string. // //| public virtual byte[] GetBytes(String s) { if (s == null) { throw new ArgumentNullException("s", "ArgumentNull_String"); } char[] chars = s.ToCharArray(); return GetBytes(chars, 0, chars.Length); } //| public virtual int GetBytes(String s, int charIndex, int charCount, byte[] bytes, int byteIndex) { if (s == null) throw new ArgumentNullException("s"); return GetBytes(s.ToCharArray(), charIndex, charCount, bytes, byteIndex); } // Returns the number of characters produced by decoding the given byte // array. // //| public virtual int GetCharCount(byte[] bytes) { if (bytes == null) { throw new ArgumentNullException("bytes", "ArgumentNull_Array"); } return GetCharCount(bytes, 0, bytes.Length); } // Returns the number of characters produced by decoding a range of bytes // in a byte array. // //| public abstract int GetCharCount(byte[] bytes, int index, int count); // Returns a character array containing the decoded representation of a // given byte array. // //| public virtual char[] GetChars(byte[] bytes) { if (bytes == null) { throw new ArgumentNullException("bytes", "ArgumentNull_Array"); } return GetChars(bytes, 0, bytes.Length); } // Returns a character array containing the decoded representation of a // range of bytes in a byte array. // //| public virtual char[] GetChars(byte[] bytes, int index, int count) { char[] result = new char[GetCharCount(bytes, index, count)]; GetChars(bytes, index, count, result, 0); return result; } // Decodes a range of bytes in a byte array into a range of characters in a // character array. An exception occurs if the character array is not large // enough to hold the complete decoding of the bytes. The // GetCharCount method can be used to determine the exact number of // characters that will be produced for a given range of bytes. // Alternatively, the GetMaxCharCount method can be used to // determine the maximum number of characters that will be produced for a // given number of bytes, regardless of the actual byte values. // //| public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex); // Returns a Decoder object for this encoding. The returned object // can be used to decode a sequence of bytes into a sequence of characters. // Contrary to the GetChars family of methods, a Decoder can // convert partial sequences of bytes into partial sequences of characters // by maintaining the appropriate state between the conversions. // // This default implementation returns a Decoder that simply // forwards calls to the GetCharCount and GetChars methods to // the corresponding methods of this encoding. Encodings that require state // to be maintained between successive conversions should override this // method and return an instance of an appropriate Decoder // implementation. // //| public virtual Decoder GetDecoder() { return new DefaultDecoder(this); } // Returns an encoding for the system's current ANSI code page. // Singularity always defaults to UTF8 encoding. // //| public static Encoding Default { get { return UTF8; } } // Returns an Encoder object for this encoding. The returned object // can be used to encode a sequence of characters into a sequence of bytes. // Contrary to the GetBytes family of methods, an Encoder can // convert partial sequences of characters into partial sequences of bytes // by maintaining the appropriate state between the conversions. // // This default implementation returns an Encoder that simply // forwards calls to the GetByteCount and GetBytes methods to // the corresponding methods of this encoding. Encodings that require state // to be maintained between successive conversions should override this // method and return an instance of an appropriate Encoder // implementation. // //| public virtual Encoder GetEncoder() { return new DefaultEncoder(this); } // Returns the maximum number of bytes required to encode a given number of // characters. This method can be used to determine an appropriate buffer // size for byte arrays passed to the GetBytes method of this // encoding or the GetBytes method of an Encoder for this // encoding. All encodings must guarantee that no buffer overflow // exceptions will occur if buffers are sized according to the results of // this method. // //| public abstract int GetMaxByteCount(int charCount); // Returns the maximum number of characters produced by decoding a given // number of bytes. This method can be used to determine an appropriate // buffer size for character arrays passed to the GetChars method of // this encoding or the GetChars method of a Decoder for this // encoding. All encodings must guarantee that no buffer overflow // exceptions will occur if buffers are sized according to the results of // this method. // //| public abstract int GetMaxCharCount(int byteCount); // Returns a string containing the decoded representation of a given byte // array. // //| public virtual String GetString(byte[] bytes) { if (bytes == null) { throw new ArgumentNullException("bytes", "ArgumentNull_Array"); } return new String(GetChars(bytes)); } // Returns a string containing the decoded representation of a range of // bytes in a byte array. // //| public virtual String GetString(byte[] bytes, int index, int count) { return new String(GetChars(bytes, index, count)); } // Returns an encoding for Unicode format. The returned encoding will be // an instance of the UnicodeEncoding class. // // It will use little endian byte order, but will detect // input in big endian if it finds a byte order mark // (see Unicode 2.0 spec). // //| public static Encoding Unicode { get { if (unicodeEncoding == null) unicodeEncoding = new UnicodeEncoding(false, true); return unicodeEncoding; } } // Returns an encoding for Unicode format. The returned encoding will be // an instance of the UnicodeEncoding class. // // It will use big endian byte order, but will detect // input in little endian if it finds a byte order mark // (see Unicode 2.0 spec). // //| public static Encoding BigEndianUnicode { get { if (bigEndianUnicode == null) bigEndianUnicode = new UnicodeEncoding(true, true); return bigEndianUnicode; } } // Returns an encoding for the UTF-8 format. The returned encoding will be // an instance of the UTF8Encoding class. // //| public static Encoding UTF8 { get { if (utf8Encoding == null) utf8Encoding = new UTF8Encoding(true); return utf8Encoding; } } //| public override bool Equals(Object value) { Encoding enc = value as Encoding; if (enc != null) return (m_codePage == enc.m_codePage); return (false); } //| public override int GetHashCode() { return m_codePage; } // Default decoder implementation. The GetCharCount and // GetChars methods simply forward to the corresponding methods of // the encoding. This implementation is appropriate for encodings that do // not require state to be maintained between successive conversions. private class DefaultDecoder : Decoder { private Encoding encoding; public DefaultDecoder(Encoding encoding) { this.encoding = encoding; } public override int GetCharCount(byte[] bytes, int index, int count) { return encoding.GetCharCount(bytes, index, count); } public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) { return encoding.GetChars(bytes, byteIndex, byteCount, chars, charIndex); } } // Default encoder implementation. The GetByteCount and // GetBytes methods simply forward to the corresponding methods of // the encoding. This implementation is appropriate for encodings that do // not require state to be maintained between successive conversions. private class DefaultEncoder : Encoder { private Encoding encoding; public DefaultEncoder(Encoding encoding) { this.encoding = encoding; } public override int GetByteCount(char[] chars, int index, int count, bool flush) { return encoding.GetByteCount(chars, index, count); } public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, bool flush) { return encoding.GetBytes(chars, charIndex, charCount, bytes, byteIndex); } } } }