sane-tsv/SaneTsv/SaneTsvTest/Program.cs
Nathan McRae 4ddb8dc44d Add parallel parsing/serialization for Simple TSV
Doesn't give as much of a performance bonus as hoped
2024-02-25 11:24:30 -08:00

466 lines
15 KiB
C#

using NathanMcRae;
using System.Text;
internal class Program
{
public class TestRecord : SaneTsv.TsvRecord
{
[SaneTsv.TypedTsvColumn("string-test")]
public string StringTest { get; set; }
[SaneTsv.TypedTsvColumn("bool-test")]
public bool BoolTest { get; set; }
[SaneTsv.TypedTsvColumn("float32-test")]
public float Float32Test { get; set; }
[SaneTsv.TypedTsvColumn("float32-le-test", typeof(SaneTsv.Float32LEType))]
public float Float32LETest { get; set; }
[SaneTsv.TypedTsvColumn("float64-test")]
public double Float64Test { get; set; }
[SaneTsv.TypedTsvColumn("float64-le-test", typeof(SaneTsv.Float64LEType))]
public double Float64LETest { get; set; }
[SaneTsv.TypedTsvColumn("uint32-test")]
public UInt32 UInt32Test { get; set; }
[SaneTsv.TypedTsvColumn("uint64-test")]
public UInt64 UInt64Test { get; set; }
[SaneTsv.TypedTsvColumn("int32-test")]
public Int32 Int32Test { get; set; }
[SaneTsv.TypedTsvColumn("int64-test")]
public Int64 Int64Test { get; set; }
[SaneTsv.TypedTsvColumn("binary-test")]
public byte[] BinaryTest { get; set; }
public TestRecord(string stringTest, bool boolTest, float float32Test, float float32LETest, double float64Test, double float64LETest, UInt32 uInt32Test, UInt64 uInt64Test, Int32 int32Test, Int64 int64Test, byte[] binaryTest)
{
StringTest = stringTest;
BoolTest = boolTest;
Float32Test = float32Test;
Float32LETest = float32LETest;
Float64Test = float64Test;
Float64LETest = float64LETest;
UInt32Test = uInt32Test;
UInt64Test = uInt64Test;
Int32Test = int32Test;
Int64Test = int64Test;
BinaryTest = binaryTest;
}
public TestRecord() { }
}
public class BoolTestRecord : SaneTsv.CommentedTsvRecord
{
[SaneTsv.TypedTsvColumn("column1:ty#pe")]
public bool Column1 { get; set; }
[SaneTsv.TypedTsvColumn]
public byte[] column2 { get; set; }
[SaneTsv.TypedTsvColumn("columnthree\nyep")]
public string Column3 { get; set; }
}
public class BoolTestRecord2 : SaneTsv.CommentedTsvRecord
{
[SaneTsv.TypedTsvColumn("column1:type")]
public bool Column1 { get; set; }
[SaneTsv.TypedTsvColumn]
public byte[] column2 { get; set; }
[SaneTsv.TypedTsvColumn("columnthree\nyep")]
public string Column3 { get; set; }
}
public class SerdeTestRecord : SaneTsv.CommentedTsvRecord
{
[SaneTsv.TypedTsvColumn("column1")]
public bool Column1 { get; set; }
[SaneTsv.TypedTsvColumn]
public byte[] column2 { get; set; }
[SaneTsv.TypedTsvColumn("columnthree\nyep")]
public string Column3 { get; set; }
}
public class FloatTestRecord : SaneTsv.CommentedTsvRecord
{
[SaneTsv.TypedTsvColumn("somefloat")]
public double SomeFloat { get; set; }
[SaneTsv.TypedTsvColumn("binfloat", typeof(SaneTsv.Float64LEType))]
public double BinFloat { get; set; }
}
public class StringTestRecord : SaneTsv.TsvRecord
{
[SaneTsv.TypedTsvColumn("column1")]
public string Column1 { get; set; }
[SaneTsv.TypedTsvColumn]
public string column2 { get; set; }
[SaneTsv.TypedTsvColumn("columnthree\nyep")]
public string Column3 { get; set; }
}
private static void Main(string[] args)
{
{
string testName = "Bool test";
string testString1 = "column1:ty\\#pe:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
"\nFALSE\tnother\tno\\ther";
Tsv<BoolTestRecord> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord>(Encoding.UTF8.GetBytes(testString1));
if (parsed.Records[0].Column1)
{
Console.WriteLine($"Passed {testName}");
}
else
{
Console.WriteLine($"Failed {testName}");
}
}
{
string testName = "Bad bool test";
try
{
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
"\nTUE\tvalue\\\\t\0woo\tvaluetrhee" +
"\nFALSE\tnother\tno\\ther";
Tsv<BoolTestRecord> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord>(Encoding.UTF8.GetBytes(testString1));
Console.WriteLine($"Failed {testName}");
}
catch (Exception)
{
Console.WriteLine($"Passed {testName}");
}
}
{
string testName = "Comment test";
string testString1 = "#This is a file comment\n" +
"#One more file comment line\n" +
"column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
"\n#This is a comment" +
"\n#Another comment line" +
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
"\nFALSE\tnother\tno\\ther";
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
}
//{
// string testName = "Serde test";
// string testString1 = "column1\tcolumn2\tcolumnthree\\nyep" +
// "\nTRUE\tvalue\\\\twoo\tvaluetrhee" +
// "\nFALSE\tnother\tno\\ther";
// Tsv<SerdeTestRecord> parsed = SaneTsv.ParseSimpleTsv<SerdeTestRecord>(Encoding.UTF8.GetBytes(testString1));
// string serialized = Encoding.UTF8.GetString(SaneTsv.SerializeSimpleTsv(parsed.ColumnNames, parsed.Records.Select(r => r.Fields.Select(f => f.ToString()).ToArray()).ToArray()));
// if (testString1 == serialized)
// {
// Console.WriteLine($"Passed {testName}");
// }
// else
// {
// Console.WriteLine($"Failed {testName}");
// }
//}
{
string testName = "Float binary test";
var bytes = new List<byte>();
bytes.AddRange(Encoding.UTF8.GetBytes("somefloat:float64\tbinfloat:float64-le" +
"\n1.5\t")); bytes.AddRange(BitConverter.GetBytes(1.5));
bytes.AddRange(Encoding.UTF8.GetBytes("\n-8.0000005E-14\t")); bytes.AddRange(BitConverter.GetBytes(-8.0000005E-14));
Tsv<FloatTestRecord> parsed = SaneTsv.ParseTypedTsv<FloatTestRecord>(bytes.ToArray());
if (parsed.Records[0].BinFloat == parsed.Records[0].SomeFloat)
{
Console.WriteLine($"Passed {testName}");
}
else
{
Console.WriteLine($"Failed {testName}");
}
}
{
string testName = "Serde test";
TestRecord[] data =
{
new TestRecord("test", true, 44.5f, 44.5f, -88e-3, -88e-3, 7773, 88888888, -7773, -88888888, new byte[] { 0, 1, 2, 3 }),
new TestRecord("test2", false, 44.5000005f, 44.5000005f, -88e-30, -88e-30, 7773, 88888888, -7773, -88888888, new byte[] { 0, 1, 2, 3, 4 }),
new TestRecord("test2", false, float.NaN, float.NaN, double.NaN, double.NaN, 7773, 88888888, -7773, -88888888, new byte[] { 0, 1, 2, 3, 4 }),
new TestRecord("test2", false, float.NegativeInfinity, float.NegativeInfinity, double.NegativeInfinity, double.NegativeInfinity, 7773, 88888888, -7773, -88888888, new byte[] { 0, 1, 2, 3, 4 }),
new TestRecord("test2", false, float.PositiveInfinity, float.PositiveInfinity, double.PositiveInfinity, double.PositiveInfinity, 7773, 88888888, -7773, -88888888, new byte[] { 0, 1, 2, 3, 4 }),
};
byte[] serialized = SaneTsv.SerializeTypedTsv(data);
Tsv<TestRecord> parsed = SaneTsv.ParseTypedTsv<TestRecord>(serialized);
if ((float)parsed.Records[1].Float32Test == 44.5000005f)
{
Console.WriteLine($"Passed {testName}");
}
else
{
Console.WriteLine($"Failed {testName}");
}
}
{
string testName = "Trying to parse a not commented record as a Commented TSV test";
// These should not compile:
//byte[] serialized = SaneTsv.SerializeCommentedTsv(data);
// Gives this error: error CS7036: There is no argument given that corresponds to the required parameter 'fileComment' of 'SaneTsv.SerializeCommentedTsv<T>(IList<T>, string)'
//Tsv<TestRecord> parsed = SaneTsv.ParseCommentedTsv<TestRecord>(serialized);
// Gives this error: error CS0311: The type 'Program.TestRecord' cannot be used as type parameter 'T' in the generic type or method 'SaneTsv.ParseCommentedTsv<T>(byte[])'. There is no implicit reference conversion from 'Program.TestRecord' to 'NathanMcRae.SaneTsv.CommentedTsvRecord'.
}
{
string testName = "Try to parsed a Commented TSV as a Simple TSV";
string testString1 = "#This is a file comment\n" +
"#One more file comment line\n" +
"column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
"\n#This is a comment" +
"\n#Another comment line" +
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
"\nFALSE\tnother\tno\\ther";
try
{
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseSimpleTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
Console.WriteLine($"Failed {testName}");
}
catch (Exception e)
{
Console.WriteLine($"Passed {testName}");
}
}
{
string testName = "Try to parsed a Commented TSV as a Typed TSV";
string testString1 = "#This is a file comment\n" +
"#One more file comment line\n" +
"column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
"\n#This is a comment" +
"\n#Another comment line" +
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
"\nFALSE\tnother\tno\\ther";
try
{
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
Console.WriteLine($"Failed {testName}");
}
catch (Exception e)
{
Console.WriteLine($"Passed {testName}");
}
}
{
string testName = "Try to parsed a Typed TSV as a Simple TSV";
string testString1 =
"column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
"\nFALSE\tnother\tno\\ther";
try
{
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseSimpleTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
Console.WriteLine($"Failed {testName}");
}
catch (Exception e)
{
Console.WriteLine($"Passed {testName}");
}
}
{
string testName = "Timing comparison of simple parse methods and comparison of simple serialization methods";
int N = 1000000;
var records = new StringTestRecord[N];
var rand = new Random(1);
for (int i = 0; i < N; i++)
{
records[i] = new StringTestRecord()
{
Column1 = rand.Next().ToString(),
column2 = rand.Next().ToString(),
Column3 = rand.Next().ToString(),
};
}
string[][] recordStrings = records.Select(record => new string[] { record.Column1, record.column2, record.Column3 }).ToArray();
DateTime lastTime = DateTime.Now;
byte[] serialized1 = SaneTsv.SerializeSimpleTsv<StringTestRecord>(records);
TimeSpan speccedSerializationTime = DateTime.Now - lastTime;
Console.WriteLine($"Specced serialization time: {speccedSerializationTime}");
lastTime = DateTime.Now;
byte[] serialized2 = SaneTsv.SerializeSimpleTsv(new string[] { "column1", "column2", "columnthree\nyep" }, recordStrings);
TimeSpan unspeccedSerializationTime = DateTime.Now - lastTime;
Console.WriteLine($"Unspecced serialization time: {unspeccedSerializationTime}");
lastTime = DateTime.Now;
Tsv<StringTestRecord> parsed = SaneTsv.ParseSimpleTsv<StringTestRecord>(serialized1);
TimeSpan speccedParseTime = DateTime.Now - lastTime;
Console.WriteLine($"Specced parse time: {speccedParseTime}");
lastTime = DateTime.Now;
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(serialized2);
TimeSpan unspeccedParseTime = DateTime.Now - lastTime;
Console.WriteLine($"Unspecced parse time: {unspeccedParseTime}");
}
{
string testName = "Check parallel serialization";
int N = 100000;
var records = new StringTestRecord[N];
var rand = new Random(1);
for (int i = 0; i < N; i++)
{
records[i] = new StringTestRecord()
{
Column1 = rand.Next().ToString(),
column2 = rand.Next().ToString(),
Column3 = rand.Next().ToString(),
};
}
string[][] recordStrings = records.Select(record => new string[] { record.Column1, record.column2, record.Column3 }).ToArray();
DateTime lastTime = DateTime.Now;
byte[] serialized1 = SaneTsv.SerializeSimpleTsv(new string[] { "column1", "column2", "columnthree\nyep" }, recordStrings);
TimeSpan unparallelTime = DateTime.Now - lastTime;
lastTime = DateTime.Now;
byte[] serialized2 = SaneTsv.SerializeSimpleTsvParallel(new string[] { "column1", "column2", "columnthree\nyep" }, recordStrings);
TimeSpan parallelTime = DateTime.Now - lastTime;
Console.WriteLine($"Unparallel serialization time: {unparallelTime}");
Console.WriteLine($"Parallel serialization time: {parallelTime}");
bool matching = true;
for (int i = 0; i < Math.Min(serialized1.Length, serialized2.Length); i++)
{
if (serialized1[i] != serialized2[i])
{
matching = false;
break;
}
}
if (matching)
{
Console.WriteLine($"Passed {testName}");
}
else
{
Console.WriteLine($"Failed {testName}");
}
}
{
string testName = "Check parallel parsing";
int N = 100000;
var records = new StringTestRecord[N];
var rand = new Random(1);
for (int i = 0; i < N; i++)
{
records[i] = new StringTestRecord()
{
Column1 = rand.Next().ToString(),
column2 = rand.Next().ToString(),
Column3 = rand.Next().ToString(),
};
}
byte[] serialized = SaneTsv.SerializeSimpleTsv<StringTestRecord>(records);
DateTime lastTime = DateTime.Now;
(string[] headers2, string[][] data2) = SaneTsv.ParseSimpleTsv(serialized);
TimeSpan unparallelTime = DateTime.Now - lastTime;
lastTime = DateTime.Now;
(string[] headers, string[][] data) = SaneTsv.ParseSimpleTsvParallel(serialized);
TimeSpan parallelTime = DateTime.Now - lastTime;
Console.WriteLine($"Unparallel serialization time: {unparallelTime}");
Console.WriteLine($"Parallel serialization time: {parallelTime}");
bool matching = true;
for (int j = 0; j < Math.Min(headers2.Length, headers.Length); j++)
{
if (headers[j] != headers2[j])
{
matching = false;
break;
}
}
for (int i = 0; i < Math.Min(data.Length, data2.Length) && matching; i++)
{
for (int j = 0; j < data[0].Length; j++)
{
if (data[i][j] != data2[i][j])
{
matching = false;
break;
}
}
}
if (matching)
{
Console.WriteLine($"Passed {testName}");
}
else
{
Console.WriteLine($"Failed {testName}");
}
}
Console.WriteLine("Done with tests");
}
}