singrdk/base/Libraries/System.IO/StreamReader.cs

569 lines
25 KiB
C#
Raw Permalink Normal View History

2008-03-05 09:52:00 -05:00
// ==++==
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// ==--==
2008-11-17 18:29:00 -05:00
//============================================================
//
// Class: StreamReader
//
// Purpose: For reading text from streams in a particular
// encoding.
//
//===========================================================
2008-03-05 09:52:00 -05:00
using System;
using System.Diagnostics;
using System.Text;
using System.Runtime.InteropServices;
2008-11-17 18:29:00 -05:00
namespace System.IO
{
2008-03-05 09:52:00 -05:00
// This class implements a TextReader for reading characters to a Stream.
// This is designed for character input in a particular Encoding,
// whereas the Stream class is designed for byte input and output.
//
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader"]/*' />
public class StreamReader : TextReader
{
#if DONT
// Note StreamReader.Null is threadsafe.
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.Null"]/*' />
public new static readonly StreamReader Null = new NullStreamReader();
#endif
// Using a 1K byte buffer and a 4K FileStream buffer works out pretty well
// perf-wise. On even a 40 MB text file, any perf loss by using a 4K
// buffer is negated by the win of allocating a smaller byte[], which
2008-11-17 18:29:00 -05:00
// saves construction time. This does break the adaptive buffering,
2008-03-05 09:52:00 -05:00
// but that shouldn't be a problem since this is slightly faster. The
2008-11-17 18:29:00 -05:00
// web services guys will benefit here the most.
2008-03-05 09:52:00 -05:00
internal const int DefaultBufferSize = 1024; // Byte buffer size
private const int DefaultFileStreamBufferSize = 4096;
private const int MinBufferSize = 128;
private Stream stream;
private Encoding encoding;
private Decoder decoder;
private byte[] byteBuffer;
private char[] charBuffer;
private byte[] _preamble; // Encoding's preamble, which identifies this encoding.
private int charPos;
private int charLen;
// Record the number of valid bytes in the byteBuffer, for a few checks.
private int byteLen;
// This is the maximum number of chars we can get from one call to
// ReadBuffer. Used so ReadBuffer can tell when to copy data into
// a user's char[] directly, instead of our internal char[].
private int _maxCharsPerBuffer;
// We will support looking for byte order marks in the stream and trying
// to decide what the encoding might be from the byte order marks, IF they
// exist. But that's all we'll do. Note this is fragile.
private bool _detectEncoding;
// Whether we must still check for the encoding's given preamble at the
// beginning of this file.
private bool _checkPreamble;
// Whether the stream is most likely not going to give us back as much
// data as we want the next time we call it. We must do the computation
// before we do any byte order mark handling and save the result. Note that
// we need this to allow people to handle streams where they block waiting
// for you to send a response, like logging in on a Unix machine.
private bool _isBlocked;
[Microsoft.Contracts.NotDelayed]
internal StreamReader() {
}
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.StreamReader"]/*' />
[Microsoft.Contracts.NotDelayed]
public StreamReader(Stream stream)
: this(stream, Encoding.UTF8, true, DefaultBufferSize) {
}
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.StreamReader8"]/*' />
[Microsoft.Contracts.NotDelayed]
public StreamReader(Stream stream, bool detectEncodingFromByteOrderMarks)
: this(stream, Encoding.UTF8, detectEncodingFromByteOrderMarks, DefaultBufferSize) {
}
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.StreamReader1"]/*' />
[Microsoft.Contracts.NotDelayed]
public StreamReader(Stream stream, Encoding encoding)
: this(stream, encoding, true, DefaultBufferSize) {
}
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.StreamReader2"]/*' />
[Microsoft.Contracts.NotDelayed]
public StreamReader(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks)
: this(stream, encoding, detectEncodingFromByteOrderMarks, DefaultBufferSize) {
}
// Creates a new StreamReader for the given stream. The
// character encoding is set by encoding and the buffer size,
// in number of 16-bit characters, is set by bufferSize.
//
// Note that detectEncodingFromByteOrderMarks is a very
// loose attempt at detecting the encoding by looking at the first
// 3 bytes of the stream. It will recognize UTF-8, little endian
// unicode, and big endian unicode text, but that's it. If neither
// of those three match, it will use the Encoding you provided.
//
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.StreamReader3"]/*' />
[Microsoft.Contracts.NotDelayed]
public StreamReader(Stream stream, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize)
{
2008-11-17 18:29:00 -05:00
if (stream == null || encoding == null)
2008-03-05 09:52:00 -05:00
throw new ArgumentNullException((stream==null ? "stream" : "encoding"));
if (!stream.CanRead)
throw new ArgumentException("Argument_StreamNotReadable");
if (bufferSize <= 0)
throw new ArgumentOutOfRangeException("bufferSize", "ArgumentOutOfRange_NeedPosNum");
Init(stream, encoding, detectEncodingFromByteOrderMarks, bufferSize);
}
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.StreamReader4"]/*' />
[Microsoft.Contracts.NotDelayed]
public StreamReader(String path)
: this(path, Encoding.UTF8, true, DefaultBufferSize) {
}
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.StreamReader9"]/*' />
[Microsoft.Contracts.NotDelayed]
public StreamReader(String path, bool detectEncodingFromByteOrderMarks)
: this(path, Encoding.UTF8, detectEncodingFromByteOrderMarks, DefaultBufferSize) {
}
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.StreamReader5"]/*' />
[Microsoft.Contracts.NotDelayed]
public StreamReader(String path, Encoding encoding)
: this(path, encoding, true, DefaultBufferSize) {
}
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.StreamReader6"]/*' />
[Microsoft.Contracts.NotDelayed]
public StreamReader(String path, Encoding encoding, bool detectEncodingFromByteOrderMarks)
: this(path, encoding, detectEncodingFromByteOrderMarks, DefaultBufferSize) {
}
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.StreamReader7"]/*' />
[Microsoft.Contracts.NotDelayed]
public StreamReader(String path, Encoding encoding, bool detectEncodingFromByteOrderMarks, int bufferSize)
{
// Don't open a Stream before checking for invalid arguments,
// or we'll create a FileStream on disk and we won't close it until
// the finalizer runs, causing problems for applications.
2008-11-17 18:29:00 -05:00
if (path == null || encoding == null)
2008-03-05 09:52:00 -05:00
throw new ArgumentNullException((path==null ? "path" : "encoding"));
2008-11-17 18:29:00 -05:00
if (path.Length == 0)
2008-03-05 09:52:00 -05:00
throw new ArgumentException("Argument_EmptyPath");
if (bufferSize <= 0)
throw new ArgumentOutOfRangeException("bufferSize", "ArgumentOutOfRange_NeedPosNum");
Stream stream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read, DefaultFileStreamBufferSize);
Init(stream, encoding, detectEncodingFromByteOrderMarks, bufferSize);
}
private void Init(Stream stream, Encoding! encoding, bool detectEncodingFromByteOrderMarks, int bufferSize) {
this.stream = stream;
this.encoding = encoding;
decoder = encoding.GetDecoder();
if (bufferSize < MinBufferSize) bufferSize = MinBufferSize;
byteBuffer = new byte[bufferSize];
_maxCharsPerBuffer = encoding.GetMaxCharCount(bufferSize);
charBuffer = new char[_maxCharsPerBuffer];
byteLen = 0;
_detectEncoding = detectEncodingFromByteOrderMarks;
_preamble = encoding.GetPreamble();
_checkPreamble = (_preamble.Length > 0);
_isBlocked = false;
}
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.Close"]/*' />
public override void Close()
{
Dispose(true);
}
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.Dispose"]/*' />
protected override void Dispose(bool disposing)
{
if (disposing) {
if (stream != null)
stream.Close();
}
if (stream != null) {
stream = null;
encoding = null;
decoder = null;
byteBuffer = null;
charBuffer = null;
charPos = 0;
charLen = 0;
}
base.Dispose(disposing);
}
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.CurrentEncoding"]/*' />
public virtual Encoding CurrentEncoding {
get { return encoding; }
}
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.BaseStream"]/*' />
public virtual Stream BaseStream {
get { return stream; }
}
// DiscardBufferedData tells StreamReader to throw away its internal
// buffer contents. This is useful if the user needs to seek on the
// underlying stream to a known location then wants the StreamReader
// to start reading from this new point. This method should be called
// very sparingly, if ever, since it can lead to very poor performance.
// However, it may be the only way of handling some scenarios where
// users need to re-read the contents of a StreamReader a second time.
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.DiscardBufferedData"]/*' />
public void DiscardBufferedData() {
byteLen = 0;
charLen = 0;
charPos = 0;
decoder = encoding.GetDecoder();
_isBlocked = false;
}
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.Peek"]/*' />
public override int Peek() {
if (stream == null)
__Error.ReaderClosed();
if (charPos == charLen) {
if (_isBlocked || ReadBuffer() == 0) return -1;
}
return charBuffer[charPos];
}
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.Read"]/*' />
public override int Read() {
if (stream == null)
__Error.ReaderClosed();
if (charPos == charLen) {
if (ReadBuffer() == 0) return -1;
}
return charBuffer[charPos++];
}
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.Read1"]/*' />
public override int Read([In, Out] char[] buffer, int index, int count) {
if (stream == null)
__Error.ReaderClosed();
2008-11-17 18:29:00 -05:00
if (buffer == null)
2008-03-05 09:52:00 -05:00
throw new ArgumentNullException("buffer", "ArgumentNull_Buffer");
if (index < 0 || count < 0)
throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), "ArgumentOutOfRange_NeedNonNegNum");
if (buffer.Length - index < count)
throw new ArgumentException("Argument_InvalidOffLen");
int charsRead = 0;
// As a perf optimization, if we had exactly one buffer's worth of
// data read in, let's try writing directly to the user's buffer.
bool readToUserBuffer = false;
while (count > 0) {
int n = charLen - charPos;
if (n == 0) n = ReadBuffer(buffer, index + charsRead, count, out readToUserBuffer);
2008-11-17 18:29:00 -05:00
if (n == 0) break; // We're at EOF
2008-03-05 09:52:00 -05:00
if (n > count) n = count;
if (!readToUserBuffer) {
Buffer.BlockCopy(charBuffer, charPos * 2, buffer, (index + charsRead) * 2, n*2);
charPos += n;
}
charsRead += n;
count -= n;
// This function shouldn't block for an indefinite amount of time,
// or reading from a network stream won't work right. If we got
// fewer bytes than we requested, then we want to break right here.
if (_isBlocked)
break;
}
return charsRead;
}
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.ReadToEnd"]/*' />
public override String ReadToEnd()
{
if (stream == null)
__Error.ReaderClosed();
// For performance, call Read(char[], int, int) with a buffer
// as big as the StreamReader's internal buffer, to get the
// readToUserBuffer optimization.
char[] chars = new char[charBuffer.Length];
int len;
StringBuilder sb = new StringBuilder(charBuffer.Length);
2008-11-17 18:29:00 -05:00
while ((len = Read(chars, 0, chars.Length)) != 0) {
2008-03-05 09:52:00 -05:00
sb.Append(chars, 0, len);
}
return sb.ToString();
}
// Trims n bytes from the front of the buffer.
private void CompressBuffer(int n)
{
Buffer.BlockCopy(byteBuffer, n, byteBuffer, 0, byteLen - n);
byteLen -= n;
}
// returns whether the first array starts with the second array.
private static bool BytesMatch(byte[]! buffer, byte[]! compareTo)
{
Debug.Assert(buffer.Length >= compareTo.Length, "Your Encoding's Preamble array is pretty darn huge!");
2008-11-17 18:29:00 -05:00
for (int i = 0; i < compareTo.Length; i++)
2008-03-05 09:52:00 -05:00
if (buffer[i] != compareTo[i])
return false;
return true;
}
private void DetectEncoding()
{
if (byteLen < 2)
return;
_detectEncoding = false;
bool changedEncoding = false;
2008-11-17 18:29:00 -05:00
if (byteBuffer[0] == 0xFE && byteBuffer[1] == 0xFF) {
2008-03-05 09:52:00 -05:00
// Big Endian Unicode
encoding = new UnicodeEncoding(true, true);
decoder = encoding.GetDecoder();
CompressBuffer(2);
changedEncoding = true;
}
2008-11-17 18:29:00 -05:00
else if (byteBuffer[0] == 0xFF && byteBuffer[1] == 0xFE) {
2008-03-05 09:52:00 -05:00
// Little Endian Unicode
encoding = new UnicodeEncoding(false, true);
decoder = encoding.GetDecoder();
CompressBuffer(2);
changedEncoding = true;
}
2008-11-17 18:29:00 -05:00
else if (byteLen >= 3 && byteBuffer[0] == 0xEF && byteBuffer[1] == 0xBB && byteBuffer[2] == 0xBF) {
2008-03-05 09:52:00 -05:00
// UTF-8
encoding = Encoding.UTF8;
decoder = encoding.GetDecoder();
CompressBuffer(3);
changedEncoding = true;
}
else if (byteLen == 2)
_detectEncoding = true;
// Note: in the future, if we change this algorithm significantly,
// we can support checking for the preamble of the given encoding.
if (changedEncoding) {
_maxCharsPerBuffer = encoding.GetMaxCharCount(byteBuffer.Length);
charBuffer = new char[_maxCharsPerBuffer];
}
}
private int ReadBuffer() {
charLen = 0;
byteLen = 0;
charPos = 0;
do {
byteLen = stream.Read(byteBuffer, 0, byteBuffer.Length);
2008-11-17 18:29:00 -05:00
if (byteLen == 0) // We're at EOF
2008-03-05 09:52:00 -05:00
return charLen;
// _isBlocked == whether we read fewer bytes than we asked for.
// Note we must check it here because CompressBuffer or
// DetectEncoding will screw with byteLen.
_isBlocked = (byteLen < byteBuffer.Length);
if (_checkPreamble && byteLen >= _preamble.Length) {
_checkPreamble = false;
if (BytesMatch(byteBuffer, _preamble)) {
_detectEncoding = false;
CompressBuffer(_preamble.Length);
}
}
// If we're supposed to detect the encoding and haven't done so yet,
// do it. Note this may need to be called more than once.
if (_detectEncoding && byteLen >= 2)
DetectEncoding();
charLen += decoder.GetChars(byteBuffer, 0, byteLen, charBuffer, charLen);
} while (charLen == 0);
//Console.WriteLine("ReadBuffer called. chars: "+charLen);
return charLen;
}
// This version has a perf optimization to decode data DIRECTLY into the
// user's buffer, bypassing StreamWriter's own buffer.
// This gives a > 20% perf improvement for our encodings across the board,
// but only when asking for at least the number of characters that one
// buffer's worth of bytes could produce.
// This optimization, if run, will break SwitchEncoding, so we must not do
// this on the first call to ReadBuffer.
private int ReadBuffer(char[] userBuffer, int userOffset, int desiredChars, out bool readToUserBuffer) {
charLen = 0;
byteLen = 0;
charPos = 0;
int charsRead = 0;
// As a perf optimization, we can decode characters DIRECTLY into a
// user's char[]. We absolutely must not write more characters
// into the user's buffer than they asked for. Calculating
// encoding.GetMaxCharCount(byteLen) each time is potentially very
// expensive - instead, cache the number of chars a full buffer's
// worth of data may produce. Yes, this makes the perf optimization
// less aggressive, in that all reads that asked for fewer than AND
// returned fewer than _maxCharsPerBuffer chars won't get the user
// buffer optimization. This affects reads where the end of the
// Stream comes in the middle somewhere, and when you ask for
// fewer chars than than your buffer could produce.
readToUserBuffer = desiredChars >= _maxCharsPerBuffer;
do {
byteLen = stream.Read(byteBuffer, 0, byteBuffer.Length);
2008-11-17 18:29:00 -05:00
if (byteLen == 0) // EOF
2008-03-05 09:52:00 -05:00
return charsRead;
// _isBlocked == whether we read fewer bytes than we asked for.
// Note we must check it here because CompressBuffer or
// DetectEncoding will screw with byteLen.
_isBlocked = (byteLen < byteBuffer.Length);
// On the first call to ReadBuffer, if we're supposed to detect the encoding, do it.
if (_detectEncoding && byteLen >= 2) {
DetectEncoding();
// DetectEncoding changes some buffer state. Recompute this.
readToUserBuffer = desiredChars >= _maxCharsPerBuffer;
}
if (_checkPreamble && byteLen >= _preamble.Length) {
_checkPreamble = false;
if (BytesMatch(byteBuffer, _preamble)) {
_detectEncoding = false;
CompressBuffer(_preamble.Length);
// CompressBuffer changes some buffer state. Recompute this.
readToUserBuffer = desiredChars >= _maxCharsPerBuffer;
}
}
2008-11-17 18:29:00 -05:00
//
//if (readToUserBuffer)
// Console.Write('.');
//else {
// Console.WriteLine("Desired chars is wrong. byteBuffer.length: "+byteBuffer.Length+" max chars is: "+encoding.GetMaxCharCount(byteLen)+" desired: "+desiredChars);
//}
//
2008-03-05 09:52:00 -05:00
charPos = 0;
if (readToUserBuffer) {
charsRead += decoder.GetChars(byteBuffer, 0, byteLen, userBuffer, userOffset + charsRead);
charLen = 0; // StreamReader's buffer is empty.
}
else {
charsRead = decoder.GetChars(byteBuffer, 0, byteLen, charBuffer, charsRead);
charLen += charsRead; // Number of chars in StreamReader's buffer.
}
} while (charsRead == 0);
//Console.WriteLine("ReadBuffer: charsRead: "+charsRead+" readToUserBuffer: "+readToUserBuffer);
return charsRead;
}
// Reads a line. A line is defined as a sequence of characters followed by
// a carriage return ('\r'), a line feed ('\n'), or a carriage return
// immediately followed by a line feed. The resulting string does not
// contain the terminating carriage return and/or line feed. The returned
// value is null if the end of the input stream has been reached.
//
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.ReadLine"]/*' />
public override String ReadLine() {
if (stream == null)
__Error.ReaderClosed();
if (charPos == charLen) {
if (ReadBuffer() == 0) return null;
}
StringBuilder sb = null;
do {
int i = charPos;
do {
char ch = charBuffer[i];
// Note the following common line feed chars:
// \n - UNIX \r\n - DOS \r - Mac
if (ch == '\r' || ch == '\n') {
String s;
if (sb != null) {
sb.Append(charBuffer, charPos, i - charPos);
s = sb.ToString();
}
else {
s = new String(charBuffer, charPos, i - charPos);
}
charPos = i + 1;
if (ch == '\r' && (charPos < charLen || ReadBuffer() > 0)) {
if (charBuffer[charPos] == '\n') charPos++;
}
return s;
}
i++;
} while (i < charLen);
i = charLen - charPos;
if (sb == null) sb = new StringBuilder(i + 80);
sb.Append(charBuffer, charPos, i);
} while (ReadBuffer() > 0);
return sb.ToString();
}
// No data, class doesn't need to be serializable.
// Note this class is threadsafe.
private class NullStreamReader : StreamReader
{
[Microsoft.Contracts.NotDelayed]
public NullStreamReader()
{
}
public override Stream BaseStream {
get { return Stream.Null; }
}
public override Encoding CurrentEncoding {
get { return Encoding.Unicode; }
}
public override int Peek()
{
return -1;
}
public override int Read()
{
return -1;
}
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.NullStreamReader.Read"]/*' />
public override int Read(char[] buffer, int index, int count) {
return 0;
}
//| <include file='doc\StreamReader.uex' path='docs/doc[@for="StreamReader.NullStreamReader.ReadLine"]/*' />
public override String ReadLine() {
return null;
}
public override String ReadToEnd()
{
return String.Empty;
}
}
}
}