Start parallel versions of general TSV serialization/parsing
They mostly work, but are not actually parallelized yet and likely have some edge cases. Also, the soon-to-be parallel version of parsing is very slow compared to the original.
This commit is contained in:
@ -1,7 +1,7 @@
|
||||
using NathanMcRae;
|
||||
using System.Text;
|
||||
|
||||
internal class Program
|
||||
internal class Program : SaneTsv
|
||||
{
|
||||
public class TestRecord : SaneTsv.TsvRecord
|
||||
{
|
||||
@ -349,7 +349,7 @@ internal class Program
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "Check parallel serialization";
|
||||
string testName = "Check parallel Simple TSV serialization";
|
||||
|
||||
int N = 100000;
|
||||
var records = new StringTestRecord[N];
|
||||
@ -398,7 +398,7 @@ internal class Program
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "Check parallel parsing";
|
||||
string testName = "Check Simple TSV parallel parsing";
|
||||
|
||||
int N = 100000;
|
||||
var records = new StringTestRecord[N];
|
||||
@ -423,8 +423,8 @@ internal class Program
|
||||
(string[] headers, string[][] data) = SaneTsv.ParseSimpleTsvParallel(serialized);
|
||||
TimeSpan parallelTime = DateTime.Now - lastTime;
|
||||
|
||||
Console.WriteLine($"Unparallel serialization time: {unparallelTime}");
|
||||
Console.WriteLine($"Parallel serialization time: {parallelTime}");
|
||||
Console.WriteLine($"Unparallel parse time: {unparallelTime}");
|
||||
Console.WriteLine($"Parallel parse time: {parallelTime}");
|
||||
|
||||
bool matching = true;
|
||||
for (int j = 0; j < Math.Min(headers2.Length, headers.Length); j++)
|
||||
@ -458,6 +458,112 @@ internal class Program
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "Check parallel serialization";
|
||||
|
||||
int N = 1000;
|
||||
var records = new BoolTestRecord[N];
|
||||
var rand = new Random(1);
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
byte[] bytes = new byte[rand.Next(50)];
|
||||
rand.NextBytes(bytes);
|
||||
records[i] = new BoolTestRecord()
|
||||
{
|
||||
Column1 = rand.NextDouble() > 0.5,
|
||||
column2 = bytes,
|
||||
Column3 = rand.Next().ToString(),
|
||||
};
|
||||
}
|
||||
|
||||
DateTime lastTime = DateTime.Now;
|
||||
byte[] serialized1 = SaneTsv.SerializeTsv<BoolTestRecord>(records, FormatType.COMMENTED_TSV);
|
||||
TimeSpan unparallelTime = DateTime.Now - lastTime;
|
||||
lastTime = DateTime.Now;
|
||||
byte[] serialized2 = SaneTsv.SerializeTsvParallel<BoolTestRecord>(records, FormatType.COMMENTED_TSV);
|
||||
TimeSpan parallelTime = DateTime.Now - lastTime;
|
||||
|
||||
Console.WriteLine($"Unparallel serialization time: {unparallelTime}");
|
||||
Console.WriteLine($"Parallel serialization time: {parallelTime}");
|
||||
|
||||
bool matching = true;
|
||||
for (int i = 0; i < Math.Min(serialized1.Length, serialized2.Length); i++)
|
||||
{
|
||||
if (serialized1[i] != serialized2[i])
|
||||
{
|
||||
matching = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (matching)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName}");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Failed {testName}");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "Check parallel parsing";
|
||||
|
||||
int N = 1000;
|
||||
var records = new BoolTestRecord[N];
|
||||
var rand = new Random(1);
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
byte[] bytes = new byte[rand.Next(50)];
|
||||
rand.NextBytes(bytes);
|
||||
records[i] = new BoolTestRecord()
|
||||
{
|
||||
Column1 = rand.NextDouble() > 0.5,
|
||||
column2 = bytes,
|
||||
Column3 = rand.Next().ToString(),
|
||||
};
|
||||
}
|
||||
|
||||
byte[] serialized2 = SaneTsv.SerializeTsvParallel<BoolTestRecord>(records, FormatType.COMMENTED_TSV);
|
||||
|
||||
DateTime lastTime = DateTime.Now;
|
||||
CommentedTsv<BoolTestRecord> parsed = (CommentedTsv<BoolTestRecord>)SaneTsv.Parse<BoolTestRecord>(serialized2, FormatType.COMMENTED_TSV);
|
||||
TimeSpan unparallelTime = DateTime.Now - lastTime;
|
||||
lastTime = DateTime.Now;
|
||||
CommentedTsv<BoolTestRecord> parsed2 = (CommentedTsv<BoolTestRecord>)SaneTsv.ParseParallel<BoolTestRecord>(serialized2, FormatType.COMMENTED_TSV);
|
||||
TimeSpan parallelTime = DateTime.Now - lastTime;
|
||||
|
||||
Console.WriteLine($"Unparallel parsing time: {unparallelTime}");
|
||||
Console.WriteLine($"Parallel parsing time: {parallelTime}");
|
||||
|
||||
bool matching = parsed.FileComment == parsed2.FileComment;
|
||||
|
||||
matching &= parsed.Records.Count == parsed2.Records.Count;
|
||||
|
||||
for (int i = 0; matching && i < parsed.Records.Count; i++)
|
||||
{
|
||||
matching &= parsed.Records[i].Comment == parsed2.Records[i].Comment;
|
||||
matching &= parsed.Records[i].Column1 == parsed2.Records[i].Column1;
|
||||
matching &= parsed.Records[i].column2.Length == parsed2.Records[i].column2.Length;
|
||||
for (int j = 0; matching && j < parsed.Records[i].column2.Length; j++)
|
||||
{
|
||||
matching &= parsed.Records[i].column2[j] == parsed2.Records[i].column2[j];
|
||||
}
|
||||
}
|
||||
|
||||
if (matching)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName}");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Failed {testName}");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Console.WriteLine("Done with tests");
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user