Compare commits
10 Commits
a7e6f1c0e9
...
8e3332b484
Author | SHA1 | Date | |
---|---|---|---|
|
8e3332b484 | ||
|
32393e704d | ||
|
d7720d8cde | ||
|
aff4b353bb | ||
|
203458fdf7 | ||
|
4148475031 | ||
|
e38baa9167 | ||
|
a66f6a1368 | ||
|
96af5ae82c | ||
|
11c7daec8e |
779
SaneTsv.cs
779
SaneTsv.cs
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,8 @@
|
||||
using NathanMcRae;
|
||||
using System.Reflection;
|
||||
using System.Text;
|
||||
|
||||
internal class Program
|
||||
internal class Program : SaneTsv
|
||||
{
|
||||
public class TestRecord : SaneTsv.TsvRecord
|
||||
{
|
||||
@ -80,6 +81,18 @@ internal class Program
|
||||
public string Column3 { get; set; }
|
||||
}
|
||||
|
||||
public class BoolTestRecord3 : SaneTsv.CommentedTsvRecord
|
||||
{
|
||||
[SaneTsv.TsvColumn("column1")]
|
||||
public string Column1 { get; set; }
|
||||
|
||||
[SaneTsv.TsvColumn]
|
||||
public string column2 { get; set; }
|
||||
|
||||
[SaneTsv.TsvColumn("columnthree\nyep")]
|
||||
public string Column3 { get; set; }
|
||||
}
|
||||
|
||||
public class SerdeTestRecord : SaneTsv.CommentedTsvRecord
|
||||
{
|
||||
[SaneTsv.TypedTsvColumn("column1")]
|
||||
@ -349,45 +362,26 @@ internal class Program
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "Check parallel serialization";
|
||||
string testName = "With and without file comment";
|
||||
|
||||
int N = 100000;
|
||||
var records = new StringTestRecord[N];
|
||||
var rand = new Random(1);
|
||||
string testString1 = "#This is a file comment\n" +
|
||||
"#One more file comment line\n" +
|
||||
"column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\n#This is a comment" +
|
||||
"\n#Another comment line" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther";
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
records[i] = new StringTestRecord()
|
||||
{
|
||||
Column1 = rand.Next().ToString(),
|
||||
column2 = rand.Next().ToString(),
|
||||
Column3 = rand.Next().ToString(),
|
||||
};
|
||||
}
|
||||
string testString2 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\n#This is a comment" +
|
||||
"\n#Another comment line" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther";
|
||||
|
||||
string[][] recordStrings = records.Select(record => new string[] { record.Column1, record.column2, record.Column3 }).ToArray();
|
||||
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
CommentedTsv<BoolTestRecord2> parsed2 = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString2));
|
||||
|
||||
DateTime lastTime = DateTime.Now;
|
||||
byte[] serialized1 = SaneTsv.SerializeSimpleTsv(new string[] { "column1", "column2", "columnthree\nyep" }, recordStrings);
|
||||
TimeSpan unparallelTime = DateTime.Now - lastTime;
|
||||
lastTime = DateTime.Now;
|
||||
byte[] serialized2 = SaneTsv.SerializeSimpleTsvParallel(new string[] { "column1", "column2", "columnthree\nyep" }, recordStrings);
|
||||
TimeSpan parallelTime = DateTime.Now - lastTime;
|
||||
|
||||
Console.WriteLine($"Unparallel serialization time: {unparallelTime}");
|
||||
Console.WriteLine($"Parallel serialization time: {parallelTime}");
|
||||
|
||||
bool matching = true;
|
||||
for (int i = 0; i < Math.Min(serialized1.Length, serialized2.Length); i++)
|
||||
{
|
||||
if (serialized1[i] != serialized2[i])
|
||||
{
|
||||
matching = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (matching)
|
||||
if (parsed.FileComment == "This is a file comment\nOne more file comment line" && parsed2.FileComment == null)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName}");
|
||||
}
|
||||
@ -398,63 +392,370 @@ internal class Program
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "Check parallel parsing";
|
||||
string testName = "With and without types";
|
||||
|
||||
int N = 100000;
|
||||
var records = new StringTestRecord[N];
|
||||
var rand = new Random(1);
|
||||
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\nTRUE\tvalue\\\\twoo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther";
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
try
|
||||
{
|
||||
records[i] = new StringTestRecord()
|
||||
{
|
||||
Column1 = rand.Next().ToString(),
|
||||
column2 = rand.Next().ToString(),
|
||||
Column3 = rand.Next().ToString(),
|
||||
};
|
||||
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Passed {testName} 1A");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Failed {testName} 1A");
|
||||
}
|
||||
|
||||
byte[] serialized = SaneTsv.SerializeSimpleTsv<StringTestRecord>(records);
|
||||
|
||||
DateTime lastTime = DateTime.Now;
|
||||
(string[] headers2, string[][] data2) = SaneTsv.ParseSimpleTsv(serialized);
|
||||
TimeSpan unparallelTime = DateTime.Now - lastTime;
|
||||
lastTime = DateTime.Now;
|
||||
(string[] headers, string[][] data) = SaneTsv.ParseSimpleTsvParallel(serialized);
|
||||
TimeSpan parallelTime = DateTime.Now - lastTime;
|
||||
|
||||
Console.WriteLine($"Unparallel serialization time: {unparallelTime}");
|
||||
Console.WriteLine($"Parallel serialization time: {parallelTime}");
|
||||
|
||||
bool matching = true;
|
||||
for (int j = 0; j < Math.Min(headers2.Length, headers.Length); j++)
|
||||
try
|
||||
{
|
||||
if (headers[j] != headers2[j])
|
||||
Tsv<BoolTestRecord2> parsed2 = SaneTsv.ParseSimpleTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1B");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1B");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1C");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1C");
|
||||
}
|
||||
|
||||
string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" +
|
||||
"\nTRUE\tvalue\\\\twoo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther";
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 2A");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 2A");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord2> parsed2 = SaneTsv.ParseSimpleTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 2B");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 2B");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 2C");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 2C");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "With and without line comment";
|
||||
|
||||
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\n#This is a comment" +
|
||||
"\n#Another comment line" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther";
|
||||
|
||||
try
|
||||
{
|
||||
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Passed {testName} 1A");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Failed {testName} 1A");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1B");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1B");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord2> parsed2 = SaneTsv.ParseSimpleTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1C");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1C");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1D");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1D");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "End of file comment";
|
||||
|
||||
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther" +
|
||||
"\n# Hey, you're not supposed to have comments at the end of the tsv!";
|
||||
|
||||
try
|
||||
{
|
||||
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1A");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1A");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1B");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1B");
|
||||
}
|
||||
|
||||
string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther" +
|
||||
"\n# Hey, you're not supposed to have comments at the end of the tsv!";
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord3> parsed3 = SaneTsv.ParseSimpleTsv<BoolTestRecord3>(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 1C");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1C");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 1D");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1D");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "Partial parsing";
|
||||
|
||||
string line1 = "column1\tcolumn2\tcolumnthree\\nyep";
|
||||
string line2 = "\nTRUE\tvalue\\\\t\0woo\tvaluetrhee";
|
||||
string line3 = "\nFALSE\tnother\tno\\ther";
|
||||
|
||||
byte[] inputBuffer = Encoding.UTF8.GetBytes(line1 + line2 + line3);
|
||||
|
||||
var headerTypes = new List<Type>();
|
||||
var headerNames = new List<string>();
|
||||
var headerPropertyInfos = new List<PropertyInfo>();
|
||||
int columnCount = 0;
|
||||
|
||||
foreach (PropertyInfo property in typeof(BoolTestRecord3).GetProperties())
|
||||
{
|
||||
TsvColumnAttribute attribute = (TsvColumnAttribute)Attribute.GetCustomAttribute(property, typeof(TsvColumnAttribute));
|
||||
if (attribute == null)
|
||||
{
|
||||
matching = false;
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
|
||||
headerNames.Add(attribute.ColumnName ?? property.Name);
|
||||
headerTypes.Add(attribute.ColumnType ?? GetColumnFromType(property.PropertyType));
|
||||
headerPropertyInfos.Add(property);
|
||||
// TODO: Check that the property type and given column type are compatible
|
||||
columnCount++;
|
||||
}
|
||||
|
||||
for (int i = 0; i < Math.Min(data.Length, data2.Length) && matching; i++)
|
||||
{
|
||||
for (int j = 0; j < data[0].Length; j++)
|
||||
{
|
||||
if (data[i][j] != data2[i][j])
|
||||
{
|
||||
matching = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
BoolTestRecord3[] records = SaneTsv.Parse<BoolTestRecord3>(inputBuffer,
|
||||
FormatType.SIMPLE_TSV,
|
||||
headerPropertyInfos.ToArray(),
|
||||
headerTypes.ToArray(),
|
||||
line1.Length + line2.Length + 1,
|
||||
inputBuffer.Length);
|
||||
|
||||
if (matching)
|
||||
if (records.Length == 0 )
|
||||
{
|
||||
Console.WriteLine($"Passed {testName}");
|
||||
Console.WriteLine($"Passed {testName} 1");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Failed {testName}");
|
||||
Console.WriteLine($"Failed {testName} 1");
|
||||
}
|
||||
|
||||
BoolTestRecord3[] records2 = SaneTsv.Parse<BoolTestRecord3>(inputBuffer,
|
||||
FormatType.SIMPLE_TSV,
|
||||
headerPropertyInfos.ToArray(),
|
||||
headerTypes.ToArray(),
|
||||
line1.Length,
|
||||
line1.Length + 3);
|
||||
|
||||
if (records2[0].Column3 == "valuetrhee")
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 2");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Failed {testName} 2");
|
||||
}
|
||||
|
||||
string[][] data = SaneTsv.ParseSimpleTsv(inputBuffer, 3, line1.Length + line2.Length + 1, inputBuffer.Length);
|
||||
|
||||
if (data[0][1] == "nother")
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 3");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Failed {testName} 3");
|
||||
}
|
||||
|
||||
string[][] data2 = SaneTsv.ParseSimpleTsv(inputBuffer, 3, line1.Length, line1.Length + 3);
|
||||
|
||||
if (data2.Length == 0)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 4");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Failed {testName} 4");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "End of file \\n";
|
||||
|
||||
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther" +
|
||||
"\n";
|
||||
|
||||
try
|
||||
{
|
||||
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1A");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1A");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1B");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1B");
|
||||
}
|
||||
|
||||
string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther" +
|
||||
"\n";
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord3> parsed3 = SaneTsv.ParseSimpleTsv<BoolTestRecord3>(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 1C");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1C");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 1D");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1D");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "End of file partial record";
|
||||
|
||||
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther" +
|
||||
"\nTRUE\t";
|
||||
|
||||
try
|
||||
{
|
||||
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1A");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1A");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1B");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1B");
|
||||
}
|
||||
|
||||
string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther" +
|
||||
"\nTRUE\t";
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord3> parsed3 = SaneTsv.ParseSimpleTsv<BoolTestRecord3>(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 1C");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1C");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 1D");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1D");
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user