working runlength implementation and tests
This commit is contained in:
parent
38ee2e4eb5
commit
8fbef6ff51
|
@ -1,189 +0,0 @@
|
|||
using Logging;
|
||||
using NuGet.Frameworks;
|
||||
using NUnit.Framework;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Numerics;
|
||||
using Utils;
|
||||
|
||||
namespace FrameworkTests.Utils
|
||||
{
|
||||
[TestFixture]
|
||||
public class IndexEncodingTests
|
||||
{
|
||||
private readonly Random random = new Random();
|
||||
|
||||
[Test]
|
||||
public void IndexRanging()
|
||||
{
|
||||
var log = new FileLog(Path.Combine(Environment.CurrentDirectory, nameof(IndexRanging) + ".log"));
|
||||
|
||||
var reruns = 1;
|
||||
var tests = CreateTests().ToArray();
|
||||
log.Log($"Tests: {tests.Length}");
|
||||
foreach (var test in tests)
|
||||
{
|
||||
log.Log($"Running {test.GetName()}");
|
||||
Parallel.For(0, reruns, i =>
|
||||
{
|
||||
RunTest(log, test);
|
||||
});
|
||||
}
|
||||
|
||||
log.Log("Results:");
|
||||
foreach (var test in tests)
|
||||
{
|
||||
test.PrintResult(log);
|
||||
}
|
||||
}
|
||||
|
||||
private void RunTest(FileLog log, IndexTest test)
|
||||
{
|
||||
var blockPresence = new BlockPresence(log, test.NumIndices, test.PresenceFactor);
|
||||
var presentIndices = blockPresence.Present.ToArray();
|
||||
|
||||
//Stopwatch.Measure(log, nameof(RunLengthEncode), () =>
|
||||
//{
|
||||
var run = RunLengthEncode(presentIndices);
|
||||
test.RunLengthEncodingLengths.Add(run.Length);
|
||||
//});
|
||||
|
||||
//Stopwatch.Measure(log, nameof(FlipMapEncode), () =>
|
||||
//{
|
||||
var flipMap = FlipMapEncode(presentIndices, test.NumIndices);
|
||||
test.FlipMapLengths.Add(flipMap.Length);
|
||||
//});
|
||||
}
|
||||
|
||||
private int[] RunLengthEncode(int[] indices)
|
||||
{
|
||||
var result = new List<int>();
|
||||
if (indices.Length == 0) return result.ToArray();
|
||||
|
||||
var runValue = indices[0];
|
||||
var runStart = runValue;
|
||||
var runLength = 1;
|
||||
for (var i = 1; i < indices.Length; i++)
|
||||
{
|
||||
if (i >= indices.Length)
|
||||
{
|
||||
result.Add(runStart);
|
||||
result.Add(runLength);
|
||||
}
|
||||
else
|
||||
{
|
||||
var nextValue = indices[i];
|
||||
if (nextValue == runValue + 1)
|
||||
{
|
||||
runLength++;
|
||||
}
|
||||
else
|
||||
{
|
||||
result.Add(runStart);
|
||||
result.Add(runLength);
|
||||
|
||||
runLength = 1;
|
||||
runStart = nextValue;
|
||||
runValue = nextValue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result.ToArray();
|
||||
}
|
||||
|
||||
private int[] FlipMapEncode(int[] presentIndices, int numIndices)
|
||||
{
|
||||
var flips = new List<int>();
|
||||
if (presentIndices.Length == 0) return flips.ToArray();
|
||||
|
||||
var current = false;
|
||||
for (var i = 0; i < numIndices; i++)
|
||||
{
|
||||
var isPresent = presentIndices.Contains(i);
|
||||
if (current != isPresent)
|
||||
{
|
||||
flips.Add(i);
|
||||
current = isPresent;
|
||||
}
|
||||
}
|
||||
|
||||
return flips.ToArray();
|
||||
}
|
||||
|
||||
private IEnumerable<IndexTest> CreateTests()
|
||||
{
|
||||
//// 10,000,000 indices * 64k = 610 GB dataset
|
||||
//for (var numIndices = 1000; numIndices < 10000000; numIndices *= 100)
|
||||
//{
|
||||
// for (float factor = 0.0f; factor < 1.0f; factor += 0.1f)
|
||||
// {
|
||||
// yield return new IndexTest(numIndices, factor);
|
||||
// }
|
||||
// yield return new IndexTest(numIndices, 1.0f);
|
||||
//}
|
||||
|
||||
yield return new IndexTest(100000, 0.5f);
|
||||
}
|
||||
|
||||
public class IndexTest
|
||||
{
|
||||
public IndexTest(int numIndices, float presenceFactor)
|
||||
{
|
||||
NumIndices = numIndices;
|
||||
PresenceFactor = presenceFactor;
|
||||
}
|
||||
|
||||
public int NumIndices { get; }
|
||||
public float PresenceFactor { get; }
|
||||
|
||||
public int BitMapLength => Convert.ToInt32(Math.Ceiling(NumIndices / 8.0));
|
||||
public int PresenceArrayLength => Convert.ToInt32(Math.Ceiling(NumIndices * PresenceFactor));
|
||||
public ConcurrentBag<int> RunLengthEncodingLengths { get; } = new ConcurrentBag<int>();
|
||||
public ConcurrentBag<int> FlipMapLengths { get; } = new ConcurrentBag<int>();
|
||||
|
||||
public void PrintResult(ILog log)
|
||||
{
|
||||
log.Log(GetName());
|
||||
log.Log($"BitmapLength: {BitMapLength}");
|
||||
log.Log($"PresenceArrayLength: {PresenceArrayLength}");
|
||||
log.Log($"RunLength: {RunLengthEncodingLengths.Average()}");
|
||||
log.Log($"FlipMap: {FlipMapLengths.Average()}");
|
||||
log.Log("");
|
||||
}
|
||||
|
||||
public string GetName()
|
||||
{
|
||||
return $"Test: {NumIndices} indices, {PresenceFactor * 100.0f}% present.";
|
||||
}
|
||||
}
|
||||
|
||||
public class BlockPresence
|
||||
{
|
||||
public BlockPresence(ILog log, int length, float factor)
|
||||
{
|
||||
//Stopwatch.Measure(log, "Factoring", () =>
|
||||
//{
|
||||
var all = Enumerable.Range(0, length).ToList();
|
||||
|
||||
float l = length;
|
||||
var numPresent = Convert.ToInt32(Math.Round(l * factor));
|
||||
if (numPresent >= length)
|
||||
{
|
||||
Present = all.ToArray();
|
||||
return;
|
||||
}
|
||||
|
||||
var present = new List<int>();
|
||||
while (present.Count < numPresent)
|
||||
{
|
||||
present.Add(all.PickOneRandom());
|
||||
}
|
||||
present.Sort();
|
||||
Present = present.ToArray();
|
||||
//});
|
||||
}
|
||||
|
||||
public int[] Present { get; private set; } = Array.Empty<int>();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,193 @@
|
|||
using NUnit.Framework;
|
||||
using NUnit.Framework.Interfaces;
|
||||
using static FrameworkTests.Utils.RunLengthEncodingTests;
|
||||
|
||||
namespace FrameworkTests.Utils
|
||||
{
|
||||
[TestFixture]
|
||||
public class RunLengthEncodingRunTests
|
||||
{
|
||||
[Test]
|
||||
[Combinatorial]
|
||||
public void RunIncludes(
|
||||
[Values(0, 1, 2, 3)] int start,
|
||||
[Values(1, 2, 3, 4)] int length)
|
||||
{
|
||||
var run = new Run(start, length);
|
||||
|
||||
var shouldInclude = Enumerable.Range(start, length).ToArray();
|
||||
var shouldExclude = new int[]
|
||||
{
|
||||
shouldInclude.Min() - 1,
|
||||
shouldInclude.Max() + 1
|
||||
};
|
||||
|
||||
foreach (var incl in shouldInclude)
|
||||
{
|
||||
Assert.That(run.Includes(incl));
|
||||
}
|
||||
foreach (var excl in shouldExclude)
|
||||
{
|
||||
Assert.That(!run.Includes(excl));
|
||||
}
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void RunExpandToInclude()
|
||||
{
|
||||
var run = new Run(2, 3);
|
||||
Assert.That(run.Includes(2));
|
||||
Assert.That(run.Includes(4));
|
||||
Assert.That(!run.Includes(5));
|
||||
|
||||
Assert.That(run.ExpandToInclude(1), Is.False);
|
||||
Assert.That(run.ExpandToInclude(2), Is.False);
|
||||
Assert.That(run.ExpandToInclude(4), Is.False);
|
||||
Assert.That(run.ExpandToInclude(6), Is.False);
|
||||
|
||||
Assert.That(run.ExpandToInclude(5), Is.True);
|
||||
Assert.That(run.Includes(5));
|
||||
Assert.That(!run.Includes(6));
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void RunCanUnsetLastIndex()
|
||||
{
|
||||
var run = new Run(0, 3);
|
||||
Assert.That(run.Includes(2));
|
||||
var update = run.Unset(2);
|
||||
Assert.That(!run.Includes(2));
|
||||
|
||||
Assert.That(update.NewRuns.Length, Is.EqualTo(0));
|
||||
Assert.That(update.RemoveRuns.Length, Is.EqualTo(0));
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void RunCanSplit()
|
||||
{
|
||||
var run = new Run(0, 6); // 0, 1, 2, 3, 4, 5
|
||||
var update = run.Unset(2);
|
||||
|
||||
Assert.That(run.Start, Is.EqualTo(0));
|
||||
Assert.That(run.Length, Is.EqualTo(2)); // 0, 1
|
||||
Assert.That(!run.Includes(2));
|
||||
|
||||
Assert.That(update.NewRuns.Length, Is.EqualTo(1));
|
||||
Assert.That(update.RemoveRuns.Length, Is.EqualTo(0));
|
||||
|
||||
Assert.That(!update.NewRuns[0].Includes(2));
|
||||
Assert.That(update.NewRuns[0].Start, Is.EqualTo(3));
|
||||
Assert.That(update.NewRuns[0].Length, Is.EqualTo(3)); // 3, 4, 5
|
||||
Assert.That(!update.NewRuns[0].Includes(6));
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void RunReplacesSelfWhenUnsetFirstIndex()
|
||||
{
|
||||
var run = new Run(0, 5);
|
||||
var update = run.Unset(0);
|
||||
|
||||
Assert.That(update.NewRuns.Length, Is.EqualTo(1));
|
||||
Assert.That(update.RemoveRuns.Length, Is.EqualTo(1));
|
||||
|
||||
Assert.That(update.RemoveRuns[0], Is.SameAs(run));
|
||||
Assert.That(update.NewRuns[0].Start, Is.EqualTo(1));
|
||||
Assert.That(update.NewRuns[0].Length, Is.EqualTo(4));
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void CanIterateIndices()
|
||||
{
|
||||
var run = new Run(2, 4);
|
||||
var seen = new List<int>();
|
||||
run.Iterate(i => seen.Add(i));
|
||||
|
||||
CollectionAssert.AreEqual(new[] { 2, 3, 4, 5 }, seen);
|
||||
}
|
||||
}
|
||||
|
||||
public class Run
|
||||
{
|
||||
public Run(int start, int length)
|
||||
{
|
||||
Start = start;
|
||||
Length = length;
|
||||
}
|
||||
|
||||
public int Start { get; }
|
||||
public int Length { get; private set; }
|
||||
|
||||
public bool Includes(int index)
|
||||
{
|
||||
return index >= Start && index < (Start + Length);
|
||||
}
|
||||
|
||||
public bool ExpandToInclude(int index)
|
||||
{
|
||||
if (index == (Start + Length))
|
||||
{
|
||||
Length++;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public RunUpdate Unset(int index)
|
||||
{
|
||||
if (!Includes(index))
|
||||
{
|
||||
return new RunUpdate();
|
||||
}
|
||||
|
||||
if (index == Start)
|
||||
{
|
||||
// First index: Replace self with new run at next index, unless empty.
|
||||
if (Length == 1)
|
||||
{
|
||||
return new RunUpdate(Array.Empty<Run>(), new[] { this });
|
||||
}
|
||||
return new RunUpdate(
|
||||
newRuns: new[] { new Run(Start + 1, Length - 1) },
|
||||
removeRuns: new[] { this }
|
||||
);
|
||||
}
|
||||
|
||||
if (index == (Start + Length - 1))
|
||||
{
|
||||
// Last index: Become one smaller.
|
||||
Length--;
|
||||
return new RunUpdate();
|
||||
}
|
||||
|
||||
// Split:
|
||||
var newRunLength = (Start + Length - 1) - index;
|
||||
Length = index - Start;
|
||||
return new RunUpdate(new[] { new Run(index + 1, newRunLength) }, Array.Empty<Run>());
|
||||
}
|
||||
|
||||
public void Iterate(Action<int> action)
|
||||
{
|
||||
for (var i = 0; i < Length; i++)
|
||||
{
|
||||
action(Start + i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public class RunUpdate
|
||||
{
|
||||
public RunUpdate()
|
||||
: this(Array.Empty<Run>(), Array.Empty<Run>())
|
||||
{
|
||||
}
|
||||
|
||||
public RunUpdate(Run[] newRuns, Run[] removeRuns)
|
||||
{
|
||||
NewRuns = newRuns;
|
||||
RemoveRuns = removeRuns;
|
||||
}
|
||||
|
||||
public Run[] NewRuns { get; }
|
||||
public Run[] RemoveRuns { get; }
|
||||
}
|
||||
}
|
|
@ -0,0 +1,320 @@
|
|||
using Logging;
|
||||
using Microsoft.VisualStudio.TestPlatform.Common;
|
||||
using NuGet.Frameworks;
|
||||
using NUnit.Framework;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Numerics;
|
||||
using Utils;
|
||||
|
||||
namespace FrameworkTests.Utils
|
||||
{
|
||||
[TestFixture]
|
||||
public class RunLengthEncodingTests
|
||||
{
|
||||
private readonly Random random = new Random();
|
||||
|
||||
[Test]
|
||||
public void EmptySet()
|
||||
{
|
||||
var set = new IndexSet();
|
||||
for (var i = 0; i < 1000; i++)
|
||||
{
|
||||
Assert.That(set.IsSet(i), Is.False);
|
||||
}
|
||||
|
||||
var calls = 0;
|
||||
set.Iterate(i => calls++);
|
||||
Assert.That(calls, Is.EqualTo(0));
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void SetsIndex()
|
||||
{
|
||||
var set = new IndexSet();
|
||||
var index = 1234;
|
||||
set.Set(index);
|
||||
|
||||
Assert.That(set.IsSet(index), Is.True);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void UnsetsIndex()
|
||||
{
|
||||
var set = new IndexSet();
|
||||
var index = 1234;
|
||||
set.Set(index);
|
||||
set.Unset(index);
|
||||
|
||||
Assert.That(set.IsSet(index), Is.False);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void RandomIndices()
|
||||
{
|
||||
var indices = GenerateRandomIndices();
|
||||
var set = new IndexSet(indices);
|
||||
|
||||
AssertEqual(set, indices);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void RandomRunLengthEncoding()
|
||||
{
|
||||
var indices = GenerateRandomIndices();
|
||||
var set = new IndexSet(indices);
|
||||
|
||||
var encoded = set.RunLengthEncoded();
|
||||
var decoded = IndexSet.FromRunLengthEncoded(encoded);
|
||||
|
||||
AssertEqual(decoded, indices);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void RunLengthEncoding()
|
||||
{
|
||||
var indices = new[] { 0, 1, 2, 4, 6, 7 };
|
||||
var set = new IndexSet(indices);
|
||||
var encoded = set.RunLengthEncoded();
|
||||
|
||||
CollectionAssert.AreEqual(new[]
|
||||
{
|
||||
0, 3,
|
||||
4, 1,
|
||||
6, 2
|
||||
}, encoded);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void RunLengthDecoding()
|
||||
{
|
||||
var encoded = new[]
|
||||
{
|
||||
2, 4, // 2, 3, 4, 5
|
||||
7, 1, // 7
|
||||
9, 2 // 9, 10
|
||||
};
|
||||
|
||||
var set = IndexSet.FromRunLengthEncoded(encoded);
|
||||
var seen = new List<int>();
|
||||
set.Iterate(i => seen.Add(i));
|
||||
|
||||
CollectionAssert.AreEqual(new[]
|
||||
{
|
||||
2, 3, 4, 5,
|
||||
7,
|
||||
9, 10
|
||||
}, seen);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void SetIndexBeforeRun()
|
||||
{
|
||||
var set = new IndexSet(new[] { 12, 13, 14 });
|
||||
set.Set(11);
|
||||
var encoded = set.RunLengthEncoded();
|
||||
|
||||
CollectionAssert.AreEqual(new[]
|
||||
{
|
||||
11, 4
|
||||
}, encoded);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void SetIndexAfterRun()
|
||||
{
|
||||
var set = new IndexSet(new[] { 12, 13, 14 });
|
||||
set.Set(15);
|
||||
var encoded = set.RunLengthEncoded();
|
||||
|
||||
CollectionAssert.AreEqual(new[]
|
||||
{
|
||||
12, 4
|
||||
}, encoded);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void UnsetIndexAtStartOfRun()
|
||||
{
|
||||
var set = new IndexSet(new[] { 11, 12, 13, 14 });
|
||||
set.Unset(11);
|
||||
var encoded = set.RunLengthEncoded();
|
||||
|
||||
CollectionAssert.AreEqual(new[]
|
||||
{
|
||||
12, 3
|
||||
}, encoded);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void UnsetIndexAtEndOfRun()
|
||||
{
|
||||
var set = new IndexSet(new[] { 11, 12, 13, 14 });
|
||||
set.Unset(14);
|
||||
var encoded = set.RunLengthEncoded();
|
||||
|
||||
CollectionAssert.AreEqual(new[]
|
||||
{
|
||||
11, 3
|
||||
}, encoded);
|
||||
}
|
||||
|
||||
[Test]
|
||||
public void UnsetIndexInRun()
|
||||
{
|
||||
var set = new IndexSet(new[] { 11, 12, 13, 14 });
|
||||
set.Unset(12);
|
||||
var encoded = set.RunLengthEncoded();
|
||||
|
||||
CollectionAssert.AreEqual(new[]
|
||||
{
|
||||
11, 1,
|
||||
13, 2
|
||||
}, encoded);
|
||||
}
|
||||
|
||||
private void AssertEqual(IndexSet set, int[] indices)
|
||||
{
|
||||
var max = indices.Max() + 1;
|
||||
for (var i = 0; i < max; i++)
|
||||
{
|
||||
Assert.That(set.IsSet(i), Is.EqualTo(indices.Contains(i)));
|
||||
}
|
||||
|
||||
var seen = new List<int>();
|
||||
set.Iterate(i => seen.Add(i));
|
||||
|
||||
CollectionAssert.AreEqual(indices, seen);
|
||||
}
|
||||
|
||||
private int[] GenerateRandomIndices()
|
||||
{
|
||||
var number = 1000;
|
||||
var max = 2000;
|
||||
var all = Enumerable.Range(0, max).ToList();
|
||||
var result = new List<int>();
|
||||
|
||||
while (all.Any() && result.Count < number)
|
||||
{
|
||||
result.Add(all.PickOneRandom());
|
||||
}
|
||||
|
||||
all.Sort();
|
||||
return all.ToArray();
|
||||
}
|
||||
|
||||
public class IndexSet
|
||||
{
|
||||
private readonly SortedList<int, Run> runs = new SortedList<int, Run>();
|
||||
|
||||
public IndexSet()
|
||||
{
|
||||
}
|
||||
|
||||
public IndexSet(int[] indices)
|
||||
{
|
||||
foreach (var i in indices) Set(i);
|
||||
}
|
||||
|
||||
public static IndexSet FromRunLengthEncoded(int[] rle)
|
||||
{
|
||||
var set = new IndexSet();
|
||||
for (var i = 0; i < rle.Length; i += 2)
|
||||
{
|
||||
var start = rle[i];
|
||||
var length = rle[i + 1];
|
||||
set.runs.Add(start, new Run(start, length));
|
||||
}
|
||||
|
||||
return set;
|
||||
}
|
||||
|
||||
public bool IsSet(int index)
|
||||
{
|
||||
if (runs.ContainsKey(index)) return true;
|
||||
|
||||
var run = GetRunBefore(index);
|
||||
if (run == null) return false;
|
||||
|
||||
return run.Includes(index);
|
||||
}
|
||||
|
||||
public void Set(int index)
|
||||
{
|
||||
if (runs.ContainsKey(index)) return;
|
||||
|
||||
var run = GetRunBefore(index);
|
||||
if (run == null || !run.ExpandToInclude(index))
|
||||
{
|
||||
CreateNewRun(index);
|
||||
}
|
||||
}
|
||||
|
||||
public void Unset(int index)
|
||||
{
|
||||
if (runs.ContainsKey(index))
|
||||
{
|
||||
HandleUpdate(runs[index].Unset(index));
|
||||
}
|
||||
else
|
||||
{
|
||||
var run = GetRunBefore(index);
|
||||
if (run == null) return;
|
||||
HandleUpdate(run.Unset(index));
|
||||
}
|
||||
}
|
||||
|
||||
public void Iterate(Action<int> onIndex)
|
||||
{
|
||||
foreach (var run in runs.Values)
|
||||
{
|
||||
run.Iterate(onIndex);
|
||||
}
|
||||
}
|
||||
|
||||
public int[] RunLengthEncoded()
|
||||
{
|
||||
return Encode().ToArray();
|
||||
}
|
||||
|
||||
private IEnumerable<int> Encode()
|
||||
{
|
||||
foreach (var pair in runs)
|
||||
{
|
||||
yield return pair.Value.Start;
|
||||
yield return pair.Value.Length;
|
||||
}
|
||||
}
|
||||
|
||||
private Run? GetRunBefore(int index)
|
||||
{
|
||||
Run? result = null;
|
||||
foreach (var pair in runs)
|
||||
{
|
||||
if (pair.Key < index) result = pair.Value;
|
||||
else return result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private void HandleUpdate(RunUpdate runUpdate)
|
||||
{
|
||||
foreach (var newRun in runUpdate.NewRuns) runs.Add(newRun.Start, newRun);
|
||||
foreach (var removeRun in runUpdate.RemoveRuns) runs.Remove(removeRun.Start);
|
||||
}
|
||||
|
||||
private void CreateNewRun(int index)
|
||||
{
|
||||
if (runs.ContainsKey(index + 1))
|
||||
{
|
||||
var length = runs[index + 1].Length + 1;
|
||||
runs.Add(index, new Run(index, length));
|
||||
runs.Remove(index + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
runs.Add(index, new Run(index, 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue