working runlength implementation and tests

This commit is contained in:
Ben 2024-08-27 13:32:33 +02:00
parent 38ee2e4eb5
commit 8fbef6ff51
No known key found for this signature in database
GPG Key ID: 0F16E812E736C24B
3 changed files with 513 additions and 189 deletions

View File

@ -1,189 +0,0 @@
using Logging;
using NuGet.Frameworks;
using NUnit.Framework;
using System.Collections.Concurrent;
using System.Numerics;
using Utils;
namespace FrameworkTests.Utils
{
[TestFixture]
public class IndexEncodingTests
{
private readonly Random random = new Random();
[Test]
public void IndexRanging()
{
var log = new FileLog(Path.Combine(Environment.CurrentDirectory, nameof(IndexRanging) + ".log"));
var reruns = 1;
var tests = CreateTests().ToArray();
log.Log($"Tests: {tests.Length}");
foreach (var test in tests)
{
log.Log($"Running {test.GetName()}");
Parallel.For(0, reruns, i =>
{
RunTest(log, test);
});
}
log.Log("Results:");
foreach (var test in tests)
{
test.PrintResult(log);
}
}
private void RunTest(FileLog log, IndexTest test)
{
var blockPresence = new BlockPresence(log, test.NumIndices, test.PresenceFactor);
var presentIndices = blockPresence.Present.ToArray();
//Stopwatch.Measure(log, nameof(RunLengthEncode), () =>
//{
var run = RunLengthEncode(presentIndices);
test.RunLengthEncodingLengths.Add(run.Length);
//});
//Stopwatch.Measure(log, nameof(FlipMapEncode), () =>
//{
var flipMap = FlipMapEncode(presentIndices, test.NumIndices);
test.FlipMapLengths.Add(flipMap.Length);
//});
}
private int[] RunLengthEncode(int[] indices)
{
var result = new List<int>();
if (indices.Length == 0) return result.ToArray();
var runValue = indices[0];
var runStart = runValue;
var runLength = 1;
for (var i = 1; i < indices.Length; i++)
{
if (i >= indices.Length)
{
result.Add(runStart);
result.Add(runLength);
}
else
{
var nextValue = indices[i];
if (nextValue == runValue + 1)
{
runLength++;
}
else
{
result.Add(runStart);
result.Add(runLength);
runLength = 1;
runStart = nextValue;
runValue = nextValue;
}
}
}
return result.ToArray();
}
private int[] FlipMapEncode(int[] presentIndices, int numIndices)
{
var flips = new List<int>();
if (presentIndices.Length == 0) return flips.ToArray();
var current = false;
for (var i = 0; i < numIndices; i++)
{
var isPresent = presentIndices.Contains(i);
if (current != isPresent)
{
flips.Add(i);
current = isPresent;
}
}
return flips.ToArray();
}
private IEnumerable<IndexTest> CreateTests()
{
//// 10,000,000 indices * 64k = 610 GB dataset
//for (var numIndices = 1000; numIndices < 10000000; numIndices *= 100)
//{
// for (float factor = 0.0f; factor < 1.0f; factor += 0.1f)
// {
// yield return new IndexTest(numIndices, factor);
// }
// yield return new IndexTest(numIndices, 1.0f);
//}
yield return new IndexTest(100000, 0.5f);
}
public class IndexTest
{
public IndexTest(int numIndices, float presenceFactor)
{
NumIndices = numIndices;
PresenceFactor = presenceFactor;
}
public int NumIndices { get; }
public float PresenceFactor { get; }
public int BitMapLength => Convert.ToInt32(Math.Ceiling(NumIndices / 8.0));
public int PresenceArrayLength => Convert.ToInt32(Math.Ceiling(NumIndices * PresenceFactor));
public ConcurrentBag<int> RunLengthEncodingLengths { get; } = new ConcurrentBag<int>();
public ConcurrentBag<int> FlipMapLengths { get; } = new ConcurrentBag<int>();
public void PrintResult(ILog log)
{
log.Log(GetName());
log.Log($"BitmapLength: {BitMapLength}");
log.Log($"PresenceArrayLength: {PresenceArrayLength}");
log.Log($"RunLength: {RunLengthEncodingLengths.Average()}");
log.Log($"FlipMap: {FlipMapLengths.Average()}");
log.Log("");
}
public string GetName()
{
return $"Test: {NumIndices} indices, {PresenceFactor * 100.0f}% present.";
}
}
public class BlockPresence
{
public BlockPresence(ILog log, int length, float factor)
{
//Stopwatch.Measure(log, "Factoring", () =>
//{
var all = Enumerable.Range(0, length).ToList();
float l = length;
var numPresent = Convert.ToInt32(Math.Round(l * factor));
if (numPresent >= length)
{
Present = all.ToArray();
return;
}
var present = new List<int>();
while (present.Count < numPresent)
{
present.Add(all.PickOneRandom());
}
present.Sort();
Present = present.ToArray();
//});
}
public int[] Present { get; private set; } = Array.Empty<int>();
}
}
}

View File

@ -0,0 +1,193 @@
using NUnit.Framework;
using NUnit.Framework.Interfaces;
using static FrameworkTests.Utils.RunLengthEncodingTests;
namespace FrameworkTests.Utils
{
[TestFixture]
public class RunLengthEncodingRunTests
{
[Test]
[Combinatorial]
public void RunIncludes(
[Values(0, 1, 2, 3)] int start,
[Values(1, 2, 3, 4)] int length)
{
var run = new Run(start, length);
var shouldInclude = Enumerable.Range(start, length).ToArray();
var shouldExclude = new int[]
{
shouldInclude.Min() - 1,
shouldInclude.Max() + 1
};
foreach (var incl in shouldInclude)
{
Assert.That(run.Includes(incl));
}
foreach (var excl in shouldExclude)
{
Assert.That(!run.Includes(excl));
}
}
[Test]
public void RunExpandToInclude()
{
var run = new Run(2, 3);
Assert.That(run.Includes(2));
Assert.That(run.Includes(4));
Assert.That(!run.Includes(5));
Assert.That(run.ExpandToInclude(1), Is.False);
Assert.That(run.ExpandToInclude(2), Is.False);
Assert.That(run.ExpandToInclude(4), Is.False);
Assert.That(run.ExpandToInclude(6), Is.False);
Assert.That(run.ExpandToInclude(5), Is.True);
Assert.That(run.Includes(5));
Assert.That(!run.Includes(6));
}
[Test]
public void RunCanUnsetLastIndex()
{
var run = new Run(0, 3);
Assert.That(run.Includes(2));
var update = run.Unset(2);
Assert.That(!run.Includes(2));
Assert.That(update.NewRuns.Length, Is.EqualTo(0));
Assert.That(update.RemoveRuns.Length, Is.EqualTo(0));
}
[Test]
public void RunCanSplit()
{
var run = new Run(0, 6); // 0, 1, 2, 3, 4, 5
var update = run.Unset(2);
Assert.That(run.Start, Is.EqualTo(0));
Assert.That(run.Length, Is.EqualTo(2)); // 0, 1
Assert.That(!run.Includes(2));
Assert.That(update.NewRuns.Length, Is.EqualTo(1));
Assert.That(update.RemoveRuns.Length, Is.EqualTo(0));
Assert.That(!update.NewRuns[0].Includes(2));
Assert.That(update.NewRuns[0].Start, Is.EqualTo(3));
Assert.That(update.NewRuns[0].Length, Is.EqualTo(3)); // 3, 4, 5
Assert.That(!update.NewRuns[0].Includes(6));
}
[Test]
public void RunReplacesSelfWhenUnsetFirstIndex()
{
var run = new Run(0, 5);
var update = run.Unset(0);
Assert.That(update.NewRuns.Length, Is.EqualTo(1));
Assert.That(update.RemoveRuns.Length, Is.EqualTo(1));
Assert.That(update.RemoveRuns[0], Is.SameAs(run));
Assert.That(update.NewRuns[0].Start, Is.EqualTo(1));
Assert.That(update.NewRuns[0].Length, Is.EqualTo(4));
}
[Test]
public void CanIterateIndices()
{
var run = new Run(2, 4);
var seen = new List<int>();
run.Iterate(i => seen.Add(i));
CollectionAssert.AreEqual(new[] { 2, 3, 4, 5 }, seen);
}
}
public class Run
{
public Run(int start, int length)
{
Start = start;
Length = length;
}
public int Start { get; }
public int Length { get; private set; }
public bool Includes(int index)
{
return index >= Start && index < (Start + Length);
}
public bool ExpandToInclude(int index)
{
if (index == (Start + Length))
{
Length++;
return true;
}
return false;
}
public RunUpdate Unset(int index)
{
if (!Includes(index))
{
return new RunUpdate();
}
if (index == Start)
{
// First index: Replace self with new run at next index, unless empty.
if (Length == 1)
{
return new RunUpdate(Array.Empty<Run>(), new[] { this });
}
return new RunUpdate(
newRuns: new[] { new Run(Start + 1, Length - 1) },
removeRuns: new[] { this }
);
}
if (index == (Start + Length - 1))
{
// Last index: Become one smaller.
Length--;
return new RunUpdate();
}
// Split:
var newRunLength = (Start + Length - 1) - index;
Length = index - Start;
return new RunUpdate(new[] { new Run(index + 1, newRunLength) }, Array.Empty<Run>());
}
public void Iterate(Action<int> action)
{
for (var i = 0; i < Length; i++)
{
action(Start + i);
}
}
}
public class RunUpdate
{
public RunUpdate()
: this(Array.Empty<Run>(), Array.Empty<Run>())
{
}
public RunUpdate(Run[] newRuns, Run[] removeRuns)
{
NewRuns = newRuns;
RemoveRuns = removeRuns;
}
public Run[] NewRuns { get; }
public Run[] RemoveRuns { get; }
}
}

View File

@ -0,0 +1,320 @@
using Logging;
using Microsoft.VisualStudio.TestPlatform.Common;
using NuGet.Frameworks;
using NUnit.Framework;
using System.Collections.Concurrent;
using System.Numerics;
using Utils;
namespace FrameworkTests.Utils
{
[TestFixture]
public class RunLengthEncodingTests
{
private readonly Random random = new Random();
[Test]
public void EmptySet()
{
var set = new IndexSet();
for (var i = 0; i < 1000; i++)
{
Assert.That(set.IsSet(i), Is.False);
}
var calls = 0;
set.Iterate(i => calls++);
Assert.That(calls, Is.EqualTo(0));
}
[Test]
public void SetsIndex()
{
var set = new IndexSet();
var index = 1234;
set.Set(index);
Assert.That(set.IsSet(index), Is.True);
}
[Test]
public void UnsetsIndex()
{
var set = new IndexSet();
var index = 1234;
set.Set(index);
set.Unset(index);
Assert.That(set.IsSet(index), Is.False);
}
[Test]
public void RandomIndices()
{
var indices = GenerateRandomIndices();
var set = new IndexSet(indices);
AssertEqual(set, indices);
}
[Test]
public void RandomRunLengthEncoding()
{
var indices = GenerateRandomIndices();
var set = new IndexSet(indices);
var encoded = set.RunLengthEncoded();
var decoded = IndexSet.FromRunLengthEncoded(encoded);
AssertEqual(decoded, indices);
}
[Test]
public void RunLengthEncoding()
{
var indices = new[] { 0, 1, 2, 4, 6, 7 };
var set = new IndexSet(indices);
var encoded = set.RunLengthEncoded();
CollectionAssert.AreEqual(new[]
{
0, 3,
4, 1,
6, 2
}, encoded);
}
[Test]
public void RunLengthDecoding()
{
var encoded = new[]
{
2, 4, // 2, 3, 4, 5
7, 1, // 7
9, 2 // 9, 10
};
var set = IndexSet.FromRunLengthEncoded(encoded);
var seen = new List<int>();
set.Iterate(i => seen.Add(i));
CollectionAssert.AreEqual(new[]
{
2, 3, 4, 5,
7,
9, 10
}, seen);
}
[Test]
public void SetIndexBeforeRun()
{
var set = new IndexSet(new[] { 12, 13, 14 });
set.Set(11);
var encoded = set.RunLengthEncoded();
CollectionAssert.AreEqual(new[]
{
11, 4
}, encoded);
}
[Test]
public void SetIndexAfterRun()
{
var set = new IndexSet(new[] { 12, 13, 14 });
set.Set(15);
var encoded = set.RunLengthEncoded();
CollectionAssert.AreEqual(new[]
{
12, 4
}, encoded);
}
[Test]
public void UnsetIndexAtStartOfRun()
{
var set = new IndexSet(new[] { 11, 12, 13, 14 });
set.Unset(11);
var encoded = set.RunLengthEncoded();
CollectionAssert.AreEqual(new[]
{
12, 3
}, encoded);
}
[Test]
public void UnsetIndexAtEndOfRun()
{
var set = new IndexSet(new[] { 11, 12, 13, 14 });
set.Unset(14);
var encoded = set.RunLengthEncoded();
CollectionAssert.AreEqual(new[]
{
11, 3
}, encoded);
}
[Test]
public void UnsetIndexInRun()
{
var set = new IndexSet(new[] { 11, 12, 13, 14 });
set.Unset(12);
var encoded = set.RunLengthEncoded();
CollectionAssert.AreEqual(new[]
{
11, 1,
13, 2
}, encoded);
}
private void AssertEqual(IndexSet set, int[] indices)
{
var max = indices.Max() + 1;
for (var i = 0; i < max; i++)
{
Assert.That(set.IsSet(i), Is.EqualTo(indices.Contains(i)));
}
var seen = new List<int>();
set.Iterate(i => seen.Add(i));
CollectionAssert.AreEqual(indices, seen);
}
private int[] GenerateRandomIndices()
{
var number = 1000;
var max = 2000;
var all = Enumerable.Range(0, max).ToList();
var result = new List<int>();
while (all.Any() && result.Count < number)
{
result.Add(all.PickOneRandom());
}
all.Sort();
return all.ToArray();
}
public class IndexSet
{
private readonly SortedList<int, Run> runs = new SortedList<int, Run>();
public IndexSet()
{
}
public IndexSet(int[] indices)
{
foreach (var i in indices) Set(i);
}
public static IndexSet FromRunLengthEncoded(int[] rle)
{
var set = new IndexSet();
for (var i = 0; i < rle.Length; i += 2)
{
var start = rle[i];
var length = rle[i + 1];
set.runs.Add(start, new Run(start, length));
}
return set;
}
public bool IsSet(int index)
{
if (runs.ContainsKey(index)) return true;
var run = GetRunBefore(index);
if (run == null) return false;
return run.Includes(index);
}
public void Set(int index)
{
if (runs.ContainsKey(index)) return;
var run = GetRunBefore(index);
if (run == null || !run.ExpandToInclude(index))
{
CreateNewRun(index);
}
}
public void Unset(int index)
{
if (runs.ContainsKey(index))
{
HandleUpdate(runs[index].Unset(index));
}
else
{
var run = GetRunBefore(index);
if (run == null) return;
HandleUpdate(run.Unset(index));
}
}
public void Iterate(Action<int> onIndex)
{
foreach (var run in runs.Values)
{
run.Iterate(onIndex);
}
}
public int[] RunLengthEncoded()
{
return Encode().ToArray();
}
private IEnumerable<int> Encode()
{
foreach (var pair in runs)
{
yield return pair.Value.Start;
yield return pair.Value.Length;
}
}
private Run? GetRunBefore(int index)
{
Run? result = null;
foreach (var pair in runs)
{
if (pair.Key < index) result = pair.Value;
else return result;
}
return result;
}
private void HandleUpdate(RunUpdate runUpdate)
{
foreach (var newRun in runUpdate.NewRuns) runs.Add(newRun.Start, newRun);
foreach (var removeRun in runUpdate.RemoveRuns) runs.Remove(removeRun.Start);
}
private void CreateNewRun(int index)
{
if (runs.ContainsKey(index + 1))
{
var length = runs[index + 1].Length + 1;
runs.Add(index, new Run(index, length));
runs.Remove(index + 1);
}
else
{
runs.Add(index, new Run(index, 1));
}
}
}
}
}