369 lines
11 KiB
Plaintext
369 lines
11 KiB
Plaintext
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
//
|
||
|
// Microsoft Research Singularity
|
||
|
//
|
||
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||
|
//
|
||
|
// Note: Anti-virus patterns.
|
||
|
//
|
||
|
|
||
|
using System;
|
||
|
using System.Collections;
|
||
|
using System.Diagnostics;
|
||
|
using System.IO;
|
||
|
using System.Text;
|
||
|
|
||
|
namespace Microsoft.Singularity.Email
|
||
|
{
|
||
|
// Patterns
|
||
|
//
|
||
|
// A pattern is a restricted regular expression that can be matched
|
||
|
// against a location in an input file. It is represented as a linked
|
||
|
// list of Pattern subclasses that should be matched in sequence.
|
||
|
//
|
||
|
// EmptyPattern:
|
||
|
// Terminates every pattern, indicating successful match.
|
||
|
// Stores the name of the matched virus.
|
||
|
//
|
||
|
// SeqPattern:
|
||
|
// Holds a sequence of bytes that must be matched in order.
|
||
|
//
|
||
|
// AltPattern:
|
||
|
// Holds an array of bytes, one of which must match the next
|
||
|
// input byte.
|
||
|
//
|
||
|
// WildPattern:
|
||
|
// Matches an arbitrary sequence of bytes whose length is between
|
||
|
// lower and upper (inclusive).
|
||
|
//
|
||
|
// NibblePattern:
|
||
|
// Matches a masked version of the next input byte.
|
||
|
//
|
||
|
// Each pattern contains a Match method, which matches the pattern
|
||
|
// against an input array at a given index. If the pattern matches, it
|
||
|
// should call the "enqueue" delegate to queue up the next pattern in
|
||
|
// the list for matching. This delegate is also given the lower and
|
||
|
// upper bounds (inclusive) where the next pattern might potentially
|
||
|
// start.
|
||
|
|
||
|
class PatternError : ApplicationException
|
||
|
{
|
||
|
public PatternError(string message) { }
|
||
|
public PatternError(string message, Object o1)
|
||
|
: this(String.Format(message, o1)) { }
|
||
|
public PatternError(string message, Object o1, Object o2)
|
||
|
: this(String.Format(message, o1, o2)) { }
|
||
|
}
|
||
|
|
||
|
delegate void EnqueueDelegate(Pattern pattern, int lower, int upper);
|
||
|
|
||
|
abstract class Pattern
|
||
|
{
|
||
|
public Pattern Next;
|
||
|
|
||
|
abstract public void Match(byte[]! input, int index, EnqueueDelegate! enqueue);
|
||
|
|
||
|
virtual public string GetName()
|
||
|
{
|
||
|
return Next != null ? Next.GetName() : null;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
class EmptyPattern : Pattern
|
||
|
{
|
||
|
public EmptyPattern(string name)
|
||
|
{
|
||
|
Name = name;
|
||
|
}
|
||
|
|
||
|
override public void Match(byte[]! input, int index, EnqueueDelegate! enqueue)
|
||
|
{
|
||
|
Debug.Assert(false);
|
||
|
}
|
||
|
|
||
|
override public string GetName()
|
||
|
{
|
||
|
return Name;
|
||
|
}
|
||
|
|
||
|
public string Name;
|
||
|
}
|
||
|
|
||
|
class SeqPattern : Pattern
|
||
|
{
|
||
|
public SeqPattern(byte[] bytes)
|
||
|
{
|
||
|
Bytes = bytes;
|
||
|
}
|
||
|
|
||
|
override public void Match(byte[]! input, int index, EnqueueDelegate! enqueue)
|
||
|
{
|
||
|
int patternLength = Bytes.Length;
|
||
|
int inputLength = input.Length;
|
||
|
|
||
|
if (index + patternLength <= inputLength) {
|
||
|
for (int i = 0; i < patternLength; i++) {
|
||
|
if (input[index + i] != Bytes[i]) {
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
enqueue(Next, index + patternLength, index + patternLength);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public byte[] Bytes;
|
||
|
}
|
||
|
|
||
|
class AltPattern : Pattern
|
||
|
{
|
||
|
public AltPattern(byte[] options)
|
||
|
{
|
||
|
Options = options;
|
||
|
}
|
||
|
|
||
|
override public void Match(byte[]! input, int index, EnqueueDelegate! enqueue)
|
||
|
{
|
||
|
int length = Options.Length;
|
||
|
for (int i = 0; i < length; i++) {
|
||
|
if (input[index] == Options[i]) {
|
||
|
enqueue(Next, index + 1, index + 1);
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public byte[] Options;
|
||
|
}
|
||
|
|
||
|
class WildPattern : Pattern
|
||
|
{
|
||
|
public WildPattern(int lower, int upper)
|
||
|
{
|
||
|
Lower = lower;
|
||
|
Upper = upper;
|
||
|
}
|
||
|
|
||
|
override public void Match(byte[]! input, int index, EnqueueDelegate! enqueue)
|
||
|
{
|
||
|
// Make sure upper doesn't overflow.
|
||
|
int lower = index + Lower;
|
||
|
int upper = index <= Int32.MaxValue - Upper ? index + Upper : Int32.MaxValue;
|
||
|
enqueue(Next, lower, upper);
|
||
|
|
||
|
// If the lower bound is zero, we need to try matching with the next pattern, too.
|
||
|
if (Lower == 0) {
|
||
|
Next.Match(input, index, enqueue);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public int Lower;
|
||
|
public int Upper;
|
||
|
}
|
||
|
|
||
|
class NibblePattern : Pattern
|
||
|
{
|
||
|
public NibblePattern(int mask, int val)
|
||
|
{
|
||
|
Mask = mask;
|
||
|
Value = val;
|
||
|
}
|
||
|
|
||
|
override public void Match(byte[]! input, int index, EnqueueDelegate! enqueue)
|
||
|
{
|
||
|
if ((input[index] & Mask) == Value) {
|
||
|
enqueue(Next, index + 1, index + 1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public int Mask;
|
||
|
public int Value;
|
||
|
}
|
||
|
|
||
|
// PatternParser
|
||
|
//
|
||
|
// Given the name of a virus and an input pattern, construct a Pattern
|
||
|
// object as described above.
|
||
|
|
||
|
class PatternParser
|
||
|
{
|
||
|
public Pattern Parse(string name, string! input)
|
||
|
{
|
||
|
Debug.Assert(curPattern == null);
|
||
|
|
||
|
int index = 0;
|
||
|
|
||
|
while (index < input.Length) {
|
||
|
char cur = input[index];
|
||
|
if (cur == '*') {
|
||
|
Add(new WildPattern(0, Int32.MaxValue));
|
||
|
index += 1;
|
||
|
} else if (cur == '?') {
|
||
|
char next = input[index + 1]; // TODO: exists?
|
||
|
if (next == '?') {
|
||
|
Add(new WildPattern(1, 1));
|
||
|
} else if (IsHex(next)) {
|
||
|
Add(new NibblePattern(0xf, ToByte(next)));
|
||
|
} else {
|
||
|
throw new PatternError("Invalid byte: {0}{1}", cur, next);
|
||
|
}
|
||
|
index += 2;
|
||
|
} else if (IsHex(cur)) {
|
||
|
char next = input[index + 1]; // TODO: exists?
|
||
|
if (next == '?') {
|
||
|
Add(new NibblePattern(0xf0, ToByte(cur)));
|
||
|
} else if (IsHex(next)) {
|
||
|
AddByte(ToByte(cur, next));
|
||
|
} else {
|
||
|
throw new PatternError("Invalid byte: {0}{1}", cur, next);
|
||
|
}
|
||
|
index += 2;
|
||
|
} else if (cur == '(') {
|
||
|
Debug.Assert(byteOptions.Count == 0);
|
||
|
|
||
|
do {
|
||
|
index += 1;
|
||
|
char first = input[index];
|
||
|
char second = input[index + 1];
|
||
|
if (IsHex(first) && IsHex(second)) {
|
||
|
AddOption(ToByte(first, second));
|
||
|
} else {
|
||
|
throw new PatternError("Alternation does not contain literal byte: {0}{1}", first, second);
|
||
|
}
|
||
|
index += 2;
|
||
|
} while (input[index] == '|');
|
||
|
|
||
|
if (input[index] != ')') {
|
||
|
throw new PatternError("Alternation not terminated by paren: {0}", input[index]);
|
||
|
}
|
||
|
index += 1;
|
||
|
|
||
|
FinishOptions();
|
||
|
} else if (cur == '{') {
|
||
|
index += 1;
|
||
|
|
||
|
int lower = ParseNum(input, ref index);
|
||
|
|
||
|
int upper;
|
||
|
if (input[index] == '-') {
|
||
|
index += 1;
|
||
|
int num = ParseNum(input, ref index);
|
||
|
upper = num >= 0 ? num : Int32.MaxValue;
|
||
|
} else {
|
||
|
upper = lower;
|
||
|
}
|
||
|
|
||
|
if (input[index] != '}') {
|
||
|
throw new PatternError("Wildcard not terminated by brace: {0}", input[index]);
|
||
|
}
|
||
|
index += 1;
|
||
|
|
||
|
Add(new WildPattern(lower, upper));
|
||
|
} else {
|
||
|
throw new PatternError("Unrecognized character: {0}", cur);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
FinishSequence();
|
||
|
|
||
|
Add(new EmptyPattern(name));
|
||
|
|
||
|
Debug.Assert(byteBuffer.Count == 0);
|
||
|
Debug.Assert(byteOptions.Count == 0);
|
||
|
|
||
|
Pattern result = curPattern;
|
||
|
curPattern = null;
|
||
|
|
||
|
return Reverse(result);
|
||
|
}
|
||
|
|
||
|
private int ParseNum(string! input, ref int index)
|
||
|
{
|
||
|
int endIndex = input.IndexOfAny(new char[] { '-', '}' }, index);
|
||
|
string numStr = input.Substring(index, endIndex - index);
|
||
|
index = endIndex;
|
||
|
return numStr.Length > 0 ? Int32.Parse(numStr) : -1;
|
||
|
}
|
||
|
|
||
|
private void Add(Pattern! pattern)
|
||
|
{
|
||
|
FinishSequence();
|
||
|
AddCore(pattern);
|
||
|
}
|
||
|
|
||
|
private void AddCore(Pattern! pattern)
|
||
|
{
|
||
|
Debug.Assert(pattern.Next == null);
|
||
|
pattern.Next = curPattern;
|
||
|
curPattern = pattern;
|
||
|
}
|
||
|
|
||
|
private void AddByte(byte val)
|
||
|
{
|
||
|
byteBuffer.Add(val);
|
||
|
}
|
||
|
|
||
|
private void AddOption(byte val)
|
||
|
{
|
||
|
byteOptions.Add(val);
|
||
|
}
|
||
|
|
||
|
private void FinishOptions()
|
||
|
{
|
||
|
Add(new AltPattern((byte[])byteOptions.ToArray(typeof(byte))));
|
||
|
byteOptions.Clear();
|
||
|
}
|
||
|
|
||
|
private void FinishSequence()
|
||
|
{
|
||
|
if (byteBuffer.Count > 0) {
|
||
|
AddCore(new SeqPattern((byte[])byteBuffer.ToArray(typeof(byte))));
|
||
|
byteBuffer.Clear();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private Pattern Reverse(Pattern pattern)
|
||
|
{
|
||
|
Pattern reversed = null;
|
||
|
|
||
|
while (pattern != null) {
|
||
|
Pattern next = pattern.Next;
|
||
|
pattern.Next = reversed;
|
||
|
reversed = pattern;
|
||
|
pattern = next;
|
||
|
}
|
||
|
|
||
|
return reversed;
|
||
|
}
|
||
|
|
||
|
private bool IsHex(char c)
|
||
|
{
|
||
|
return ('0' <= c && c <= '9') ||
|
||
|
('a' <= c && c <= 'f') ||
|
||
|
('A' <= c && c <= 'F');
|
||
|
}
|
||
|
|
||
|
private byte ToByte(char c)
|
||
|
{
|
||
|
if ('0' <= c && c <= '9') {
|
||
|
return (byte)(c - '0');
|
||
|
} else if ('a' <= c && c <= 'f') {
|
||
|
return (byte)(c - 'a' + 10);
|
||
|
} else if ('A' <= c && c <= 'F') {
|
||
|
return (byte)(c - 'A' + 10);
|
||
|
} else {
|
||
|
throw new PatternError("Invalid hex digit: {0}", c);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private byte ToByte(char c1, char c2)
|
||
|
{
|
||
|
return (byte)((ToByte(c1) << 4) | ToByte(c2));
|
||
|
}
|
||
|
|
||
|
private ArrayList byteOptions = new ArrayList();
|
||
|
private ArrayList byteBuffer = new ArrayList();
|
||
|
private Pattern curPattern = null;
|
||
|
}
|
||
|
}
|