Add parallel parsing/serialization for Simple TSV
Doesn't give as much of a performance bonus as hoped
This commit is contained in:
@ -101,6 +101,18 @@ internal class Program
|
||||
public double BinFloat { get; set; }
|
||||
}
|
||||
|
||||
public class StringTestRecord : SaneTsv.TsvRecord
|
||||
{
|
||||
[SaneTsv.TypedTsvColumn("column1")]
|
||||
public string Column1 { get; set; }
|
||||
|
||||
[SaneTsv.TypedTsvColumn]
|
||||
public string column2 { get; set; }
|
||||
|
||||
[SaneTsv.TypedTsvColumn("columnthree\nyep")]
|
||||
public string Column3 { get; set; }
|
||||
}
|
||||
|
||||
private static void Main(string[] args)
|
||||
{
|
||||
{
|
||||
@ -275,7 +287,7 @@ internal class Program
|
||||
{
|
||||
string testName = "Try to parsed a Typed TSV as a Simple TSV";
|
||||
|
||||
string testString1 =
|
||||
string testString1 =
|
||||
"column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther";
|
||||
@ -292,6 +304,160 @@ internal class Program
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "Timing comparison of simple parse methods and comparison of simple serialization methods";
|
||||
|
||||
int N = 1000000;
|
||||
var records = new StringTestRecord[N];
|
||||
var rand = new Random(1);
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
records[i] = new StringTestRecord()
|
||||
{
|
||||
Column1 = rand.Next().ToString(),
|
||||
column2 = rand.Next().ToString(),
|
||||
Column3 = rand.Next().ToString(),
|
||||
};
|
||||
}
|
||||
|
||||
string[][] recordStrings = records.Select(record => new string[] { record.Column1, record.column2, record.Column3 }).ToArray();
|
||||
|
||||
DateTime lastTime = DateTime.Now;
|
||||
byte[] serialized1 = SaneTsv.SerializeSimpleTsv<StringTestRecord>(records);
|
||||
|
||||
TimeSpan speccedSerializationTime = DateTime.Now - lastTime;
|
||||
Console.WriteLine($"Specced serialization time: {speccedSerializationTime}");
|
||||
lastTime = DateTime.Now;
|
||||
|
||||
byte[] serialized2 = SaneTsv.SerializeSimpleTsv(new string[] { "column1", "column2", "columnthree\nyep" }, recordStrings);
|
||||
|
||||
TimeSpan unspeccedSerializationTime = DateTime.Now - lastTime;
|
||||
Console.WriteLine($"Unspecced serialization time: {unspeccedSerializationTime}");
|
||||
lastTime = DateTime.Now;
|
||||
|
||||
Tsv<StringTestRecord> parsed = SaneTsv.ParseSimpleTsv<StringTestRecord>(serialized1);
|
||||
|
||||
TimeSpan speccedParseTime = DateTime.Now - lastTime;
|
||||
Console.WriteLine($"Specced parse time: {speccedParseTime}");
|
||||
lastTime = DateTime.Now;
|
||||
|
||||
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(serialized2);
|
||||
|
||||
TimeSpan unspeccedParseTime = DateTime.Now - lastTime;
|
||||
Console.WriteLine($"Unspecced parse time: {unspeccedParseTime}");
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "Check parallel serialization";
|
||||
|
||||
int N = 100000;
|
||||
var records = new StringTestRecord[N];
|
||||
var rand = new Random(1);
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
records[i] = new StringTestRecord()
|
||||
{
|
||||
Column1 = rand.Next().ToString(),
|
||||
column2 = rand.Next().ToString(),
|
||||
Column3 = rand.Next().ToString(),
|
||||
};
|
||||
}
|
||||
|
||||
string[][] recordStrings = records.Select(record => new string[] { record.Column1, record.column2, record.Column3 }).ToArray();
|
||||
|
||||
DateTime lastTime = DateTime.Now;
|
||||
byte[] serialized1 = SaneTsv.SerializeSimpleTsv(new string[] { "column1", "column2", "columnthree\nyep" }, recordStrings);
|
||||
TimeSpan unparallelTime = DateTime.Now - lastTime;
|
||||
lastTime = DateTime.Now;
|
||||
byte[] serialized2 = SaneTsv.SerializeSimpleTsvParallel(new string[] { "column1", "column2", "columnthree\nyep" }, recordStrings);
|
||||
TimeSpan parallelTime = DateTime.Now - lastTime;
|
||||
|
||||
Console.WriteLine($"Unparallel serialization time: {unparallelTime}");
|
||||
Console.WriteLine($"Parallel serialization time: {parallelTime}");
|
||||
|
||||
bool matching = true;
|
||||
for (int i = 0; i < Math.Min(serialized1.Length, serialized2.Length); i++)
|
||||
{
|
||||
if (serialized1[i] != serialized2[i])
|
||||
{
|
||||
matching = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (matching)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName}");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Failed {testName}");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "Check parallel parsing";
|
||||
|
||||
int N = 100000;
|
||||
var records = new StringTestRecord[N];
|
||||
var rand = new Random(1);
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
records[i] = new StringTestRecord()
|
||||
{
|
||||
Column1 = rand.Next().ToString(),
|
||||
column2 = rand.Next().ToString(),
|
||||
Column3 = rand.Next().ToString(),
|
||||
};
|
||||
}
|
||||
|
||||
byte[] serialized = SaneTsv.SerializeSimpleTsv<StringTestRecord>(records);
|
||||
|
||||
DateTime lastTime = DateTime.Now;
|
||||
(string[] headers2, string[][] data2) = SaneTsv.ParseSimpleTsv(serialized);
|
||||
TimeSpan unparallelTime = DateTime.Now - lastTime;
|
||||
lastTime = DateTime.Now;
|
||||
(string[] headers, string[][] data) = SaneTsv.ParseSimpleTsvParallel(serialized);
|
||||
TimeSpan parallelTime = DateTime.Now - lastTime;
|
||||
|
||||
Console.WriteLine($"Unparallel serialization time: {unparallelTime}");
|
||||
Console.WriteLine($"Parallel serialization time: {parallelTime}");
|
||||
|
||||
bool matching = true;
|
||||
for (int j = 0; j < Math.Min(headers2.Length, headers.Length); j++)
|
||||
{
|
||||
if (headers[j] != headers2[j])
|
||||
{
|
||||
matching = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < Math.Min(data.Length, data2.Length) && matching; i++)
|
||||
{
|
||||
for (int j = 0; j < data[0].Length; j++)
|
||||
{
|
||||
if (data[i][j] != data2[i][j])
|
||||
{
|
||||
matching = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (matching)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName}");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Failed {testName}");
|
||||
}
|
||||
}
|
||||
|
||||
Console.WriteLine("Done with tests");
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user