Compare commits
12 Commits
78eaa5dbab
...
master
Author | SHA1 | Date | |
---|---|---|---|
d9ef2a4bb6 | |||
a80206767e | |||
b8ae3ce65d | |||
0fd092685d | |||
55fa00a6e7 | |||
d428af51bb | |||
aef92e87d4 | |||
b56236cbb7 | |||
7230f982ac | |||
f4145bacd2 | |||
f98a40a173 | |||
0c61128e0e |
1216
SaneTsv/SaneTsv.cs
1216
SaneTsv/SaneTsv.cs
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,5 @@
|
||||
using NathanMcRae;
|
||||
using System.Reflection;
|
||||
using System.Text;
|
||||
|
||||
internal class Program : SaneTsv
|
||||
@ -80,6 +81,18 @@ internal class Program : SaneTsv
|
||||
public string Column3 { get; set; }
|
||||
}
|
||||
|
||||
public class BoolTestRecord3 : SaneTsv.CommentedTsvRecord
|
||||
{
|
||||
[SaneTsv.TsvColumn("column1")]
|
||||
public string Column1 { get; set; }
|
||||
|
||||
[SaneTsv.TsvColumn]
|
||||
public string column2 { get; set; }
|
||||
|
||||
[SaneTsv.TsvColumn("columnthree\nyep")]
|
||||
public string Column3 { get; set; }
|
||||
}
|
||||
|
||||
public class SerdeTestRecord : SaneTsv.CommentedTsvRecord
|
||||
{
|
||||
[SaneTsv.TypedTsvColumn("column1")]
|
||||
@ -349,45 +362,26 @@ internal class Program : SaneTsv
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "Check parallel Simple TSV serialization";
|
||||
string testName = "With and without file comment";
|
||||
|
||||
int N = 100000;
|
||||
var records = new StringTestRecord[N];
|
||||
var rand = new Random(1);
|
||||
string testString1 = "#This is a file comment\n" +
|
||||
"#One more file comment line\n" +
|
||||
"column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\n#This is a comment" +
|
||||
"\n#Another comment line" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther";
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
{
|
||||
records[i] = new StringTestRecord()
|
||||
{
|
||||
Column1 = rand.Next().ToString(),
|
||||
column2 = rand.Next().ToString(),
|
||||
Column3 = rand.Next().ToString(),
|
||||
};
|
||||
}
|
||||
string testString2 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\n#This is a comment" +
|
||||
"\n#Another comment line" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther";
|
||||
|
||||
string[][] recordStrings = records.Select(record => new string[] { record.Column1, record.column2, record.Column3 }).ToArray();
|
||||
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
CommentedTsv<BoolTestRecord2> parsed2 = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString2));
|
||||
|
||||
DateTime lastTime = DateTime.Now;
|
||||
byte[] serialized1 = SaneTsv.SerializeSimpleTsv(new string[] { "column1", "column2", "columnthree\nyep" }, recordStrings);
|
||||
TimeSpan unparallelTime = DateTime.Now - lastTime;
|
||||
lastTime = DateTime.Now;
|
||||
byte[] serialized2 = SaneTsv.SerializeSimpleTsvParallel(new string[] { "column1", "column2", "columnthree\nyep" }, recordStrings);
|
||||
TimeSpan parallelTime = DateTime.Now - lastTime;
|
||||
|
||||
Console.WriteLine($"Unparallel serialization time: {unparallelTime}");
|
||||
Console.WriteLine($"Parallel serialization time: {parallelTime}");
|
||||
|
||||
bool matching = true;
|
||||
for (int i = 0; i < Math.Min(serialized1.Length, serialized2.Length); i++)
|
||||
{
|
||||
if (serialized1[i] != serialized2[i])
|
||||
{
|
||||
matching = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (matching)
|
||||
if (parsed.FileComment == "This is a file comment\nOne more file comment line" && parsed2.FileComment == null)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName}");
|
||||
}
|
||||
@ -398,162 +392,388 @@ internal class Program : SaneTsv
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "Check Simple TSV parallel parsing";
|
||||
string testName = "With and without types";
|
||||
|
||||
int N = 100000;
|
||||
var records = new StringTestRecord[N];
|
||||
var rand = new Random(1);
|
||||
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\nTRUE\tvalue\\\\twoo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther";
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
try
|
||||
{
|
||||
records[i] = new StringTestRecord()
|
||||
{
|
||||
Column1 = rand.Next().ToString(),
|
||||
column2 = rand.Next().ToString(),
|
||||
Column3 = rand.Next().ToString(),
|
||||
};
|
||||
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Passed {testName} 1A");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Failed {testName} 1A");
|
||||
}
|
||||
|
||||
byte[] serialized = SaneTsv.SerializeSimpleTsv<StringTestRecord>(records);
|
||||
|
||||
DateTime lastTime = DateTime.Now;
|
||||
(string[] headers2, string[][] data2) = SaneTsv.ParseSimpleTsv(serialized);
|
||||
TimeSpan unparallelTime = DateTime.Now - lastTime;
|
||||
lastTime = DateTime.Now;
|
||||
(string[] headers, string[][] data) = SaneTsv.ParseSimpleTsvParallel(serialized);
|
||||
TimeSpan parallelTime = DateTime.Now - lastTime;
|
||||
|
||||
Console.WriteLine($"Unparallel parse time: {unparallelTime}");
|
||||
Console.WriteLine($"Parallel parse time: {parallelTime}");
|
||||
|
||||
bool matching = true;
|
||||
for (int j = 0; j < Math.Min(headers2.Length, headers.Length); j++)
|
||||
try
|
||||
{
|
||||
if (headers[j] != headers2[j])
|
||||
{
|
||||
matching = false;
|
||||
break;
|
||||
}
|
||||
Tsv<BoolTestRecord2> parsed2 = SaneTsv.ParseSimpleTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1B");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1B");
|
||||
}
|
||||
|
||||
for (int i = 0; i < Math.Min(data.Length, data2.Length) && matching; i++)
|
||||
try
|
||||
{
|
||||
for (int j = 0; j < data[0].Length; j++)
|
||||
{
|
||||
if (data[i][j] != data2[i][j])
|
||||
{
|
||||
matching = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1C");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1C");
|
||||
}
|
||||
|
||||
if (matching)
|
||||
string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" +
|
||||
"\nTRUE\tvalue\\\\twoo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther";
|
||||
|
||||
try
|
||||
{
|
||||
Console.WriteLine($"Passed {testName}");
|
||||
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 2A");
|
||||
}
|
||||
else
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Failed {testName}");
|
||||
Console.WriteLine($"Passed {testName} 2A");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord2> parsed2 = SaneTsv.ParseSimpleTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 2B");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 2B");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 2C");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 2C");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "Check parallel serialization";
|
||||
string testName = "With and without line comment";
|
||||
|
||||
int N = 1000;
|
||||
var records = new BoolTestRecord[N];
|
||||
var rand = new Random(1);
|
||||
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\n#This is a comment" +
|
||||
"\n#Another comment line" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther";
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
try
|
||||
{
|
||||
byte[] bytes = new byte[rand.Next(50)];
|
||||
rand.NextBytes(bytes);
|
||||
records[i] = new BoolTestRecord()
|
||||
{
|
||||
Column1 = rand.NextDouble() > 0.5,
|
||||
column2 = bytes,
|
||||
Column3 = rand.Next().ToString(),
|
||||
};
|
||||
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Passed {testName} 1A");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Failed {testName} 1A");
|
||||
}
|
||||
|
||||
DateTime lastTime = DateTime.Now;
|
||||
byte[] serialized1 = SaneTsv.SerializeTsv<BoolTestRecord>(records, FormatType.COMMENTED_TSV);
|
||||
TimeSpan unparallelTime = DateTime.Now - lastTime;
|
||||
lastTime = DateTime.Now;
|
||||
byte[] serialized2 = SaneTsv.SerializeTsvParallel<BoolTestRecord>(records, FormatType.COMMENTED_TSV);
|
||||
TimeSpan parallelTime = DateTime.Now - lastTime;
|
||||
|
||||
Console.WriteLine($"Unparallel serialization time: {unparallelTime}");
|
||||
Console.WriteLine($"Parallel serialization time: {parallelTime}");
|
||||
|
||||
bool matching = true;
|
||||
for (int i = 0; i < Math.Min(serialized1.Length, serialized2.Length); i++)
|
||||
try
|
||||
{
|
||||
if (serialized1[i] != serialized2[i])
|
||||
{
|
||||
matching = false;
|
||||
break;
|
||||
}
|
||||
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1B");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1B");
|
||||
}
|
||||
|
||||
if (matching)
|
||||
try
|
||||
{
|
||||
Console.WriteLine($"Passed {testName}");
|
||||
Tsv<BoolTestRecord2> parsed2 = SaneTsv.ParseSimpleTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1C");
|
||||
}
|
||||
else
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Failed {testName}");
|
||||
Console.WriteLine($"Passed {testName} 1C");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1D");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1D");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "Check parallel parsing";
|
||||
string testName = "End of file comment";
|
||||
|
||||
int N = 1000;
|
||||
var records = new BoolTestRecord[N];
|
||||
var rand = new Random(1);
|
||||
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther" +
|
||||
"\n# Hey, you're not supposed to have comments at the end of the tsv!";
|
||||
|
||||
for (int i = 0; i < N; i++)
|
||||
try
|
||||
{
|
||||
byte[] bytes = new byte[rand.Next(50)];
|
||||
rand.NextBytes(bytes);
|
||||
records[i] = new BoolTestRecord()
|
||||
{
|
||||
Column1 = rand.NextDouble() > 0.5,
|
||||
column2 = bytes,
|
||||
Column3 = rand.Next().ToString(),
|
||||
};
|
||||
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1A");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1A");
|
||||
}
|
||||
|
||||
byte[] serialized2 = SaneTsv.SerializeTsvParallel<BoolTestRecord>(records, FormatType.COMMENTED_TSV);
|
||||
|
||||
DateTime lastTime = DateTime.Now;
|
||||
CommentedTsv<BoolTestRecord> parsed = (CommentedTsv<BoolTestRecord>)SaneTsv.Parse<BoolTestRecord>(serialized2, FormatType.COMMENTED_TSV);
|
||||
TimeSpan unparallelTime = DateTime.Now - lastTime;
|
||||
lastTime = DateTime.Now;
|
||||
CommentedTsv<BoolTestRecord> parsed2 = (CommentedTsv<BoolTestRecord>)SaneTsv.ParseParallel<BoolTestRecord>(serialized2, FormatType.COMMENTED_TSV);
|
||||
TimeSpan parallelTime = DateTime.Now - lastTime;
|
||||
|
||||
Console.WriteLine($"Unparallel parsing time: {unparallelTime}");
|
||||
Console.WriteLine($"Parallel parsing time: {parallelTime}");
|
||||
|
||||
bool matching = parsed.FileComment == parsed2.FileComment;
|
||||
|
||||
matching &= parsed.Records.Count == parsed2.Records.Count;
|
||||
|
||||
for (int i = 0; matching && i < parsed.Records.Count; i++)
|
||||
try
|
||||
{
|
||||
matching &= parsed.Records[i].Comment == parsed2.Records[i].Comment;
|
||||
matching &= parsed.Records[i].Column1 == parsed2.Records[i].Column1;
|
||||
matching &= parsed.Records[i].column2.Length == parsed2.Records[i].column2.Length;
|
||||
for (int j = 0; matching && j < parsed.Records[i].column2.Length; j++)
|
||||
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1B");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1B");
|
||||
}
|
||||
|
||||
string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther" +
|
||||
"\n# Hey, you're not supposed to have comments at the end of the tsv!";
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord3> parsed3 = SaneTsv.ParseSimpleTsv<BoolTestRecord3>(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 1C");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1C");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 1D");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1D");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "Partial parsing";
|
||||
|
||||
string line1 = "column1\tcolumn2\tcolumnthree\\nyep";
|
||||
string line2 = "\nTRUE\tvalue\\\\t\0woo\tvaluetrhee";
|
||||
string line3 = "\nFALSE\tnother\tno\\ther";
|
||||
|
||||
byte[] inputBuffer = Encoding.UTF8.GetBytes(line1 + line2 + line3);
|
||||
|
||||
var headerTypes = new List<Type>();
|
||||
var headerNames = new List<string>();
|
||||
var headerPropertyInfos = new List<PropertyInfo>();
|
||||
int columnCount = 0;
|
||||
|
||||
foreach (PropertyInfo property in typeof(BoolTestRecord3).GetProperties())
|
||||
{
|
||||
TsvColumnAttribute attribute = (TsvColumnAttribute)Attribute.GetCustomAttribute(property, typeof(TsvColumnAttribute));
|
||||
if (attribute == null)
|
||||
{
|
||||
matching &= parsed.Records[i].column2[j] == parsed2.Records[i].column2[j];
|
||||
continue;
|
||||
}
|
||||
|
||||
headerNames.Add(attribute.ColumnName ?? property.Name);
|
||||
headerTypes.Add(attribute.ColumnType ?? GetColumnFromType(property.PropertyType));
|
||||
headerPropertyInfos.Add(property);
|
||||
// TODO: Check that the property type and given column type are compatible
|
||||
columnCount++;
|
||||
}
|
||||
|
||||
if (matching)
|
||||
BoolTestRecord3[] records = SaneTsv.Parse<BoolTestRecord3>(inputBuffer,
|
||||
FormatType.SIMPLE_TSV,
|
||||
headerPropertyInfos.ToArray(),
|
||||
headerTypes.ToArray(),
|
||||
line1.Length + line2.Length + 1,
|
||||
inputBuffer.Length);
|
||||
|
||||
if (records.Length == 0 )
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Failed {testName} 1");
|
||||
}
|
||||
|
||||
BoolTestRecord3[] records2 = SaneTsv.Parse<BoolTestRecord3>(inputBuffer,
|
||||
FormatType.SIMPLE_TSV,
|
||||
headerPropertyInfos.ToArray(),
|
||||
headerTypes.ToArray(),
|
||||
line1.Length,
|
||||
line1.Length + 3);
|
||||
|
||||
if (records2[0].Column3 == "valuetrhee")
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 2");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Failed {testName} 2");
|
||||
}
|
||||
|
||||
string[][] data = SaneTsv.ParseSimpleTsv(inputBuffer, 3, line1.Length + line2.Length + 1, inputBuffer.Length);
|
||||
|
||||
if (data[0][1] == "nother")
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 3");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Failed {testName} 3");
|
||||
}
|
||||
|
||||
string[][] data2 = SaneTsv.ParseSimpleTsv(inputBuffer, 3, line1.Length, line1.Length + 3);
|
||||
|
||||
if (data2.Length == 0)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 4");
|
||||
}
|
||||
else
|
||||
{
|
||||
Console.WriteLine($"Failed {testName} 4");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "End of file \\n";
|
||||
|
||||
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther" +
|
||||
"\n";
|
||||
|
||||
try
|
||||
{
|
||||
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1A");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1A");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1B");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1B");
|
||||
}
|
||||
|
||||
string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther" +
|
||||
"\n";
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord3> parsed3 = SaneTsv.ParseSimpleTsv<BoolTestRecord3>(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 1C");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1C");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 1D");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1D");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "End of file partial record";
|
||||
|
||||
string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther" +
|
||||
"\nTRUE\t";
|
||||
|
||||
try
|
||||
{
|
||||
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1A");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1A");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord2> parsed = SaneTsv.ParseTypedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
Console.WriteLine($"Failed {testName} 1B");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1B");
|
||||
}
|
||||
|
||||
string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther" +
|
||||
"\nTRUE\t";
|
||||
|
||||
try
|
||||
{
|
||||
Tsv<BoolTestRecord3> parsed3 = SaneTsv.ParseSimpleTsv<BoolTestRecord3>(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 1C");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1C");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
(string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString2));
|
||||
Console.WriteLine($"Failed {testName} 1D");
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName} 1D");
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
string testName = "File comment serde";
|
||||
|
||||
string testString1 = "#this is a file comment" +
|
||||
"\n# and one more line since you're such a good customer" +
|
||||
"\ncolumn1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" +
|
||||
"\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" +
|
||||
"\nFALSE\tnother\tno\\ther";
|
||||
|
||||
|
||||
CommentedTsv<BoolTestRecord2> parsed = SaneTsv.ParseCommentedTsv<BoolTestRecord2>(Encoding.UTF8.GetBytes(testString1));
|
||||
|
||||
string reserialized = Encoding.UTF8.GetString(SaneTsv.SerializeCommentedTsv<BoolTestRecord2>(parsed.Records, parsed.FileComment));
|
||||
|
||||
if (reserialized == testString1)
|
||||
{
|
||||
Console.WriteLine($"Passed {testName}");
|
||||
}
|
||||
@ -563,7 +783,6 @@ internal class Program : SaneTsv
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Console.WriteLine("Done with tests");
|
||||
}
|
||||
}
|
||||
|
41
readme.md
41
readme.md
@ -3,14 +3,43 @@
|
||||
## Roadmap
|
||||
|
||||
- Improve error reporting by including line/column information in exceptions
|
||||
- Come up with a static-typing interface
|
||||
- Use this to get line numbers for parallel parsing implementations
|
||||
- [x] Come up with a static-typing interface
|
||||
|
||||
Something that doesn't require an array of objects
|
||||
|
||||
Use a class with SaveTsv attributes
|
||||
|
||||
- Check numeric formatting matches spec
|
||||
- Do parallel parsing / serializing implementation
|
||||
- Next task: Refactor parsing so that it will start and end at arbitrary indices and return an array of SaneTsvRecords. The refactor should ignore the current record (unless at the start of the buffer) and continue parsing the record the end index is in.
|
||||
- More optimization and making parsing modular:
|
||||
- [x] Maybe add a binary representation for f32/f64. It should specify that it is Little-endian (since we have to pick one). That way we can guarantee bit-compatibility between implementations where an application might require that.
|
||||
- [x] Add Column name/type specification to API
|
||||
- So you can tell it what columns to expect
|
||||
- [ ] Lax/strict versions
|
||||
|
||||
See the attributes thing above
|
||||
- Generate test cases
|
||||
- [x] File comment / no file comment
|
||||
- [x] header types / no header types
|
||||
- [x] Line comments / no line comments
|
||||
- [x] end of file comment
|
||||
- [x] Test with the start index of parallel methods in last record
|
||||
- end index in first record
|
||||
- [x] Extra \n at end of file
|
||||
- [x] Wrong number of fields
|
||||
- Wrong number of fields at end of file
|
||||
|
||||
- [x] Do parallel parsing / serializing implementation
|
||||
- [x] Next task: Refactor parsing so that it will start and end at arbitrary indices and return an array of SaneTsvRecords. The refactor should ignore the current record (unless at the start of the buffer) and continue parsing the record the end index is in.
|
||||
- ~~More optimization and making parsing modular:~~
|
||||
- Have callbacks for header parsing and field parsing
|
||||
- That way other formats (like ExtraTSV) don't have to iterate through the entire set of data again.
|
||||
- Finish ExtraTSV implementation
|
||||
- Do zig implementation
|
||||
- [x] Make untyped Simple TSV (De)serialization
|
||||
- [x] ~~Finish~~ Minimal ExtraTSV implementation
|
||||
- [ ] Do zig implementation
|
||||
- Make a c interface from that
|
||||
- Make a commandline interface
|
||||
- Make a viewer / editor
|
||||
- Streaming interface
|
||||
So you can start processing your data while it finishes parsing?
|
||||
- [ ] Decoding a binary stream with a \0 in it via UTF-8 doesn't seem to cause any issues. I thought that valid UTF-8 wouldn't have a \0?
|
||||
- [ ] Instead of exceptions when parsing, we should parse as much as possible and reflect parsing errors in the returned data structure
|
||||
|
Reference in New Issue
Block a user