singrdk/base/Applications/EmailServer/AntiVirus/Matcher.sg

313 lines
10 KiB
Plaintext
Raw Normal View History

2008-11-17 18:29:00 -05:00
///////////////////////////////////////////////////////////////////////////////
//
// Microsoft Research Singularity
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// Note: Anti-virus pattern matcher.
//
using System;
using System.Collections;
using System.Diagnostics;
using System.IO;
using System.Text;
namespace Microsoft.Singularity.Email
{
// FutureTable
//
// The future table stores a list of patterns that we should try to match
// against an upcoming range of bytes in the input. This range is
// indicated by lower and upper bounds (inclusive).
//
// Callers can add patterns to the table with AddPattern, and they can
// match against the patterns in the table by calling Start.
//
// Internally, the future table stores a list of futures sorted by lower
// bound.
class Future
{
public Future(Pattern pattern, int lower, int upper)
{
Pattern = pattern;
Lower = lower;
Upper = upper;
}
public Pattern Pattern;
public int Lower;
public int Upper;
public Future Next;
}
class FutureTable
{
// AddPattern
//
// Add a pattern with specified file offset bounds to the table.
// We first attempt to consolidate with any overlapping entries for
// this pattern that are already in the table. We then insert the
// new future at the appropriate location.
public void AddPattern(Pattern pattern, int lower, int upper)
{
Future prev;
Future cur;
// Remove any overlapping ranges for this pattern, updating lower and upper as appropriate.
for (prev = null, cur = head;
cur != null && cur.Lower - 1 <= upper;
prev = cur, cur = cur.Next) {
// If the new pattern overlaps with an existing one...
if (pattern == cur.Pattern && lower - 1 <= cur.Upper) {
// Remove it.
if (prev != null) {
prev.Next = cur.Next;
} else {
head = cur.Next;
}
// Widen the bounds if necessary.
lower = Math.Min(lower, cur.Lower);
upper = Math.Max(upper, cur.Upper);
}
}
// Find the location where we should insert this new item.
for (prev = null, cur = head;
cur != null && cur.Lower < lower;
prev = cur, cur = cur.Next) {
// Do nothing; we're just searching for the insert location.
}
// Insert the item.
Future item = new Future(pattern, lower, upper);
item.Next = cur;
if (prev != null) {
prev.Next = item;
} else {
head = item;
}
}
// Start
//
// Given an input file and an index in that file, try to match all
// applicable patterns. We use the fact that the list of futures
// is sorted in order to avoid trying to match against all patterns.
// We also discard any patterns that will no longer be matched.
//
// We assume that the index will increase with each call to Start.
public void Start(byte[]! input, int index, EnqueueDelegate! enqueue)
{
ArrayList runnable = new ArrayList();
for (Future prev = null, cur = head;
cur != null && cur.Lower <= index;
prev = cur, cur = cur.Next) {
if (index <= cur.Upper) {
// The index is between lower and upper, so check this pattern.
runnable.Add(cur.Pattern);
} else {
// This pattern is no longer relevant; remove it.
if (prev != null) {
prev.Next = cur.Next;
} else {
head = cur.Next;
}
}
}
foreach (Pattern! pattern in runnable) {
pattern.Match(input, index, enqueue);
}
}
public void Clear()
{
head = null;
}
public int Count()
{
int i = 0;
for (Future cur = head; cur != null; cur = cur.Next) {
i += 1;
}
return i;
}
// List of future patterns to match, sorted by lower bound.
private Future head;
}
// OffsetTable
//
// The offset table stores patterns that should be applied at a particular
// offset in the file. This offset can be positive or negative, where
// the latter indicates an offset from the end of the file.
//
// Callers can add pattersn to the table with AddPattern, and they can
// match against patterns in the table with Start.
class OffsetTable
{
public void AddPattern(Pattern pattern, int offset)
{
if (table[offset] == null) {
table[offset] = new ArrayList();
}
ArrayList list = (!)(table[offset] as ArrayList);
list.Add(pattern);
}
public void Start(byte[]! input, int index, EnqueueDelegate! enqueue)
{
StartList(table[index] as ArrayList, input, index, enqueue);
StartList(table[index - input.Length] as ArrayList, input, index, enqueue);
}
private void StartList(ArrayList list, byte[]! input, int index, EnqueueDelegate! enqueue)
{
if (list != null) {
foreach (Pattern! pattern in list) {
pattern.Match(input, index, enqueue);
}
}
}
private Hashtable table = new Hashtable();
}
// StartTable
//
// The start table stores patterns that should be applied at *any* offset
// in the input file. For efficiency, we construct an n-level table
// mapping the first n bytes of the input to the relevant set of patterns.
// If a pattern doesn't start with a sequence of n bytes (e.g., it has a
// wildcard in front), we'll look for a shorter prefix.
//
// Callers can add patterns to the table with AddPattern, and they can
// match against the patterns in the table with Start.
class StartTable
{
public StartTable(int d)
{
Debug.Assert(d >= 0);
depth = d;
}
public void AddPattern(Pattern pattern)
{
AddPattern(pattern, 0);
}
private void AddPattern(Pattern pattern, int offset)
{
SeqPattern seq = pattern as SeqPattern;
if (depth > 0 && seq != null && offset < seq.Bytes.Length) {
byte b = seq.Bytes[offset];
if (table == null) {
table = new StartTable[256];
}
if (table[b] == null) {
table[b] = new StartTable(depth - 1);
}
StartTable subTable = (!)table[b];
subTable.AddPattern(pattern, offset + 1);
} else {
patterns.Add(pattern);
}
}
public void Start(byte[]! input, int index, EnqueueDelegate! enqueue)
{
Start(input, index, 0, enqueue);
}
private void Start(byte[]! input, int index, int offset, EnqueueDelegate! enqueue)
{
foreach (Pattern! pattern in patterns) {
pattern.Match(input, index, enqueue);
}
if (index + offset < input.Length) {
byte b = input[index + offset];
if (table != null && table[b] != null) {
StartTable subTable = (!)table[b];
subTable.Start(input, index, offset + 1, enqueue);
}
}
}
private int depth;
private StartTable[] table;
private ArrayList patterns = new ArrayList();
}
// PatternMatcher
//
// The pattern matcher stores one of each of the above tables:
//
// FutureTable: Pattern matches in progress.
// OffsetTable: Patterns to be matched at a specific offset.
// StartTable: Patterns to be matched at every offset.
//
// We set up the matcher by calling AddPattern, which adds patterns
// to the offset table or the start table. We then call Match to
// match an input array against the provided patterns.
class PatternMatcher
{
public void AddPattern(Pattern pattern)
{
startTable.AddPattern(pattern);
}
public void AddPattern(Pattern pattern, int offset)
{
offsetTable.AddPattern(pattern, offset);
}
public string Match(byte[]! input)
{
found = null;
futureTable.Clear();
int length = input.Length;
for (int index = 0; found == null && index < length; index++) {
startTable.Start(input, index, Enqueue);
offsetTable.Start(input, index, Enqueue);
futureTable.Start(input, index, Enqueue);
}
futureTable.Clear();
return found;
}
private void Enqueue(Pattern pattern, int lower, int upper)
{
Debug.Assert(pattern != null);
if (pattern is EmptyPattern) {
found = ((EmptyPattern)pattern).Name;
} else {
futureTable.AddPattern(pattern, lower, upper);
}
}
private string found;
const int TableDepth = 2;
private StartTable startTable = new StartTable(TableDepth);
private OffsetTable offsetTable = new OffsetTable();
private FutureTable futureTable = new FutureTable();
}
}