From 0807f0ffc266cef2369711f74133083fbd89d862 Mon Sep 17 00:00:00 2001 From: Nathan McRae Date: Sun, 10 Mar 2024 22:43:11 -0700 Subject: [PATCH] Add iso8601 parsing --- SaneTsv.cs | 132 +++++++- SaneTsvTest/Program.cs | 737 +---------------------------------------- 2 files changed, 137 insertions(+), 732 deletions(-) diff --git a/SaneTsv.cs b/SaneTsv.cs index 71df6ea..86bf535 100644 --- a/SaneTsv.cs +++ b/SaneTsv.cs @@ -1,5 +1,7 @@ -using System.Reflection; +using System.Globalization; +using System.Reflection; using System.Text; +using System.Text.RegularExpressions; namespace NathanMcRae; @@ -32,6 +34,51 @@ public class SaneTsv public class Int32Type : ColumnType { } public class Int64Type : ColumnType { } public class BinaryType : ColumnType { } + public class Iso8601Type : ColumnType { } + public class PhysicalUnitsType : ColumnType + { + public string Units { get; } + public PhysicalUnitsType(string Units) { } + } + + public static readonly string[] ValidUnits = + { + "m", + "s", + "A", + "K", + "cd", + "mol", + "kg", + "Hz", + "rad", + "sr", + "N", + "Pa", + "J", + "W", + "C", + "V", + "F", + "Ω", + "S", + "Wb", + "T", + "H", + "°C", + "lm", + "lx", + "Bq", + "Gy", + "Sv", + "kat" + }; + + public static readonly int MajorVersion = 0; + public static readonly int MinorVersion = 0; + public static readonly int PatchVersion = 1; + + public static Regex VersionRegex = new Regex(@"^ ExtraTSV V(\d+)\.(\d+)\.(\d+)"); protected enum FormatType { @@ -71,6 +118,31 @@ public class SaneTsv return (CommentedTsv)Parse(inputBuffer, FormatType.COMMENTED_TSV); } + public static CommentedTsv ParseExtraTsv(byte[] inputBuffer) where T : CommentedTsvRecord, new() + { + CommentedTsv parsed = (CommentedTsv)Parse(inputBuffer, FormatType.COMMENTED_TSV); + + if (parsed.FileComment == null) + { + throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}"); + } + + Match match = VersionRegex.Match(parsed.FileComment); + if (!match.Success) + { + throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}"); + } + + int fileMajorVersion = int.Parse(match.Groups[1].Value); + + if (fileMajorVersion != MajorVersion) + { + throw new Exception($"File has major version ({fileMajorVersion}) which is newer than this parser's version {MajorVersion}"); + } + + return parsed; + } + protected static Tsv Parse(byte[] inputBuffer, FormatType format) where T : TsvRecord, new() { Tsv parsed; @@ -169,7 +241,7 @@ public class SaneTsv throw new Exception($"Header field {fields.Count} is not valid UTF-8", e); } - string columnTypeString; + string[] columnTypeStrings; string columnName; if (columnString.Contains(':')) { @@ -177,8 +249,8 @@ public class SaneTsv { throw new Exception($"Header field {j} contains ':', which is not allowed for column names"); } - columnTypeString = columnString.Split(":").Last(); - columnName = columnString.Substring(0, columnString.Length - columnTypeString.Length - 1); + columnTypeStrings = columnString.Split(":"); + columnName = string.Join(":", columnTypeStrings.Take(columnTypeStrings.Length - 1)); } else { @@ -186,20 +258,30 @@ public class SaneTsv { throw new Exception($"Header field {fields.Count} has no type"); } - columnTypeString = ""; + columnTypeStrings = new string[] { "" }; columnName = columnString; } ColumnType type; - switch (columnTypeString) + switch (columnTypeStrings.Last()) { case "": numTypesBlank++; type = new StringType(); break; case "string": - type = new StringType(); + if (columnTypeStrings[columnTypeStrings.Length - 2] == "iso8601") + { + type = new Iso8601Type(); + columnName = string.Join(":", columnTypeStrings.Take(columnTypeStrings.Length - 2)); + } + // TODO: ISO8601 time spans + // TODO: ISO8601 time durations + else + { + type = new StringType(); + } break; case "boolean": type = new BooleanType(); @@ -232,9 +314,11 @@ public class SaneTsv type = new BinaryType(); break; default: - throw new Exception($"Invalid type '{columnTypeString}' for column {j}"); + throw new Exception($"Invalid type '{columnTypeStrings.Last()}' for column {j}"); } + // TODO: physical unit types + // TODO: Allow lax parsing (only worry about parsing columns that are given in the specifying type if (columnNames[j] != columnName) @@ -359,12 +443,12 @@ public class SaneTsv int relativeLine = 0; int i = startIndex; - while (i < inputBuffer.Length - 1 && inputBuffer[i] != '\n' && inputBuffer[i + 1] != '#') + while (i < inputBuffer.Length - 1 && inputBuffer[i] != '\n' && inputBuffer[i + 1] != '#') { i++; } - if (i >= inputBuffer.Length - 1) + if (i >= inputBuffer.Length - 1) { return Array.Empty(); } @@ -484,7 +568,7 @@ public class SaneTsv } fields.Add(fieldBytes.ToArray()); - + if (fields.Count == 0) { // TODO @@ -692,6 +776,15 @@ public class SaneTsv properties[j].SetValue(record, parsedInt64); } + else if (columnTypes[j].GetType() == typeof(Iso8601Type)) + { + if (!DateTime.TryParseExact(fieldString, "yyyy-MM-ddTHH:mm:ss.ffff", CultureInfo.InvariantCulture, DateTimeStyles.None, out DateTime parsed)) + { + throw new Exception($"ISO 8601 timestamp format error on line {line}, field {j}"); + } + + properties[j].SetValue(record, parsed); + } else { throw new Exception($"Unexpected type {columnTypes[j]}"); @@ -1156,6 +1249,10 @@ public class SaneTsv { return new BinaryType(); } + else if (type == typeof(DateTime)) + { + return new Iso8601Type(); + } else { throw new Exception($"Invalid type: {type.GetType()}"); @@ -1208,6 +1305,10 @@ public class SaneTsv { return "binary"; } + else if (type.GetType() == typeof(Iso8601Type)) + { + return "iso8601:string"; + } else { throw new Exception($"Invalid type: {type.GetType()}"); @@ -1229,6 +1330,11 @@ public class SaneTsv return SerializeTsv(data, FormatType.COMMENTED_TSV, fileComment); } + public static byte[] SerializeExtraTsv(IList data) where T : TsvRecord + { + return SerializeTsv(data, FormatType.COMMENTED_TSV, $" ExtraTSV V{MajorVersion}.{MinorVersion}.{PatchVersion}"); + } + protected static byte[] SerializeTsv(IList data, FormatType tsvFormat, string fileComment = null) { var bytes = new List(); @@ -1468,6 +1574,10 @@ public class SaneTsv { fieldEncoded = (byte[])datum; } + else if (columnTypes[j].GetType() == typeof(Iso8601Type)) + { + fieldEncoded = Encoding.UTF8.GetBytes(((DateTime)datum).ToString("yyyy-MM-ddTHH:mm:ss.ffff")); + } else { throw new Exception($"Unexpected column type {columnTypes[j]} for column {j}"); diff --git a/SaneTsvTest/Program.cs b/SaneTsvTest/Program.cs index 98ac12b..ddfecbd 100644 --- a/SaneTsvTest/Program.cs +++ b/SaneTsvTest/Program.cs @@ -1,63 +1,9 @@ using NathanMcRae; using System.Reflection; using System.Text; - internal class Program : SaneTsv { - public class TestRecord : SaneTsv.TsvRecord - { - [SaneTsv.TypedTsvColumn("string-test")] - public string StringTest { get; set; } - - [SaneTsv.TypedTsvColumn("bool-test")] - public bool BoolTest { get; set; } - - [SaneTsv.TypedTsvColumn("float32-test")] - public float Float32Test { get; set; } - - [SaneTsv.TypedTsvColumn("float32-le-test", typeof(SaneTsv.Float32LEType))] - public float Float32LETest { get; set; } - - [SaneTsv.TypedTsvColumn("float64-test")] - public double Float64Test { get; set; } - - [SaneTsv.TypedTsvColumn("float64-le-test", typeof(SaneTsv.Float64LEType))] - public double Float64LETest { get; set; } - - [SaneTsv.TypedTsvColumn("uint32-test")] - public UInt32 UInt32Test { get; set; } - - [SaneTsv.TypedTsvColumn("uint64-test")] - public UInt64 UInt64Test { get; set; } - - [SaneTsv.TypedTsvColumn("int32-test")] - public Int32 Int32Test { get; set; } - - [SaneTsv.TypedTsvColumn("int64-test")] - public Int64 Int64Test { get; set; } - - [SaneTsv.TypedTsvColumn("binary-test")] - public byte[] BinaryTest { get; set; } - - public TestRecord(string stringTest, bool boolTest, float float32Test, float float32LETest, double float64Test, double float64LETest, UInt32 uInt32Test, UInt64 uInt64Test, Int32 int32Test, Int64 int64Test, byte[] binaryTest) - { - StringTest = stringTest; - BoolTest = boolTest; - Float32Test = float32Test; - Float32LETest = float32LETest; - Float64Test = float64Test; - Float64LETest = float64LETest; - UInt32Test = uInt32Test; - UInt64Test = uInt64Test; - Int32Test = int32Test; - Int64Test = int64Test; - BinaryTest = binaryTest; - } - - public TestRecord() { } - } - - public class BoolTestRecord : SaneTsv.CommentedTsvRecord + public class DateTest : SaneTsv.CommentedTsvRecord { [SaneTsv.TypedTsvColumn("column1:ty#pe")] public bool Column1 { get; set; } @@ -66,75 +12,19 @@ internal class Program : SaneTsv public byte[] column2 { get; set; } [SaneTsv.TypedTsvColumn("columnthree\nyep")] - public string Column3 { get; set; } - } - - public class BoolTestRecord2 : SaneTsv.CommentedTsvRecord - { - [SaneTsv.TypedTsvColumn("column1:type")] - public bool Column1 { get; set; } - - [SaneTsv.TypedTsvColumn] - public byte[] column2 { get; set; } - - [SaneTsv.TypedTsvColumn("columnthree\nyep")] - public string Column3 { get; set; } - } - - public class BoolTestRecord3 : SaneTsv.CommentedTsvRecord - { - [SaneTsv.TsvColumn("column1")] - public string Column1 { get; set; } - - [SaneTsv.TsvColumn] - public string column2 { get; set; } - - [SaneTsv.TsvColumn("columnthree\nyep")] - public string Column3 { get; set; } - } - - public class SerdeTestRecord : SaneTsv.CommentedTsvRecord - { - [SaneTsv.TypedTsvColumn("column1")] - public bool Column1 { get; set; } - - [SaneTsv.TypedTsvColumn] - public byte[] column2 { get; set; } - - [SaneTsv.TypedTsvColumn("columnthree\nyep")] - public string Column3 { get; set; } - } - - public class FloatTestRecord : SaneTsv.CommentedTsvRecord - { - [SaneTsv.TypedTsvColumn("somefloat")] - public double SomeFloat { get; set; } - - [SaneTsv.TypedTsvColumn("binfloat", typeof(SaneTsv.Float64LEType))] - public double BinFloat { get; set; } - } - - public class StringTestRecord : SaneTsv.TsvRecord - { - [SaneTsv.TypedTsvColumn("column1")] - public string Column1 { get; set; } - - [SaneTsv.TypedTsvColumn] - public string column2 { get; set; } - - [SaneTsv.TypedTsvColumn("columnthree\nyep")] - public string Column3 { get; set; } + public DateTime Column3 { get; set; } } private static void Main(string[] args) { { - string testName = "Bool test"; - string testString1 = "column1:ty\\#pe:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" + - "\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" + - "\nFALSE\tnother\tno\\ther"; + string testName = "Parse date"; + string testString1 = "# ExtraTSV V0.0.1\n" + + "column1:ty\\#pe:boolean\tcolumn2:binary\tcolumnthree\\nyep:iso8601:string" + + "\nTRUE\tvalue\\\\t\0woo\t2024-02-15T18:03:30.0000" + + "\nFALSE\tnother\t2024-02-15T18:03:39.0001"; - Tsv parsed = SaneTsv.ParseTypedTsv(Encoding.UTF8.GetBytes(testString1)); + CommentedTsv parsed = SaneTsv.ParseExtraTsv(Encoding.UTF8.GetBytes(testString1)); if (parsed.Records[0].Column1) { Console.WriteLine($"Passed {testName}"); @@ -146,64 +36,15 @@ internal class Program : SaneTsv } { - string testName = "Bad bool test"; - try - { - string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" + - "\nTUE\tvalue\\\\t\0woo\tvaluetrhee" + - "\nFALSE\tnother\tno\\ther"; + string testName = "Serde date"; + string testString1 = "# ExtraTSV V0.0.1\n" + + "column1:ty\\#pe:boolean\tcolumn2:binary\tcolumnthree\\nyep:iso8601:string" + + "\nTRUE\tvalue\\\\t\0woo\t2024-02-15T18:03:30.0000" + + "\nFALSE\tnother\t2024-02-15T18:03:39.0001"; - Tsv parsed = SaneTsv.ParseTypedTsv(Encoding.UTF8.GetBytes(testString1)); - Console.WriteLine($"Failed {testName}"); - } - catch (Exception) - { - Console.WriteLine($"Passed {testName}"); - } - - } - - { - string testName = "Comment test"; - string testString1 = "#This is a file comment\n" + - "#One more file comment line\n" + - "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" + - "\n#This is a comment" + - "\n#Another comment line" + - "\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" + - "\nFALSE\tnother\tno\\ther"; - - CommentedTsv parsed = SaneTsv.ParseCommentedTsv(Encoding.UTF8.GetBytes(testString1)); - } - - //{ - // string testName = "Serde test"; - // string testString1 = "column1\tcolumn2\tcolumnthree\\nyep" + - // "\nTRUE\tvalue\\\\twoo\tvaluetrhee" + - // "\nFALSE\tnother\tno\\ther"; - - // Tsv parsed = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1)); - // string serialized = Encoding.UTF8.GetString(SaneTsv.SerializeSimpleTsv(parsed.ColumnNames, parsed.Records.Select(r => r.Fields.Select(f => f.ToString()).ToArray()).ToArray())); - - // if (testString1 == serialized) - // { - // Console.WriteLine($"Passed {testName}"); - // } - // else - // { - // Console.WriteLine($"Failed {testName}"); - // } - //} - - { - string testName = "Float binary test"; - var bytes = new List(); - bytes.AddRange(Encoding.UTF8.GetBytes("somefloat:float64\tbinfloat:float64-le" + - "\n1.5\t")); bytes.AddRange(BitConverter.GetBytes(1.5)); - bytes.AddRange(Encoding.UTF8.GetBytes("\n-8.0000005E-14\t")); bytes.AddRange(BitConverter.GetBytes(-8.0000005E-14)); - - Tsv parsed = SaneTsv.ParseTypedTsv(bytes.ToArray()); - if (parsed.Records[0].BinFloat == parsed.Records[0].SomeFloat) + CommentedTsv parsed = SaneTsv.ParseExtraTsv(Encoding.UTF8.GetBytes(testString1)); + string serialized = Encoding.UTF8.GetString(SaneTsv.SerializeExtraTsv(parsed.Records)); + if (serialized == testString1) { Console.WriteLine($"Passed {testName}"); } @@ -213,552 +54,6 @@ internal class Program : SaneTsv } } - { - string testName = "Serde test"; - - TestRecord[] data = - { - new TestRecord("test", true, 44.5f, 44.5f, -88e-3, -88e-3, 7773, 88888888, -7773, -88888888, new byte[] { 0, 1, 2, 3 }), - new TestRecord("test2", false, 44.5000005f, 44.5000005f, -88e-30, -88e-30, 7773, 88888888, -7773, -88888888, new byte[] { 0, 1, 2, 3, 4 }), - new TestRecord("test2", false, float.NaN, float.NaN, double.NaN, double.NaN, 7773, 88888888, -7773, -88888888, new byte[] { 0, 1, 2, 3, 4 }), - new TestRecord("test2", false, float.NegativeInfinity, float.NegativeInfinity, double.NegativeInfinity, double.NegativeInfinity, 7773, 88888888, -7773, -88888888, new byte[] { 0, 1, 2, 3, 4 }), - new TestRecord("test2", false, float.PositiveInfinity, float.PositiveInfinity, double.PositiveInfinity, double.PositiveInfinity, 7773, 88888888, -7773, -88888888, new byte[] { 0, 1, 2, 3, 4 }), - }; - - byte[] serialized = SaneTsv.SerializeTypedTsv(data); - - Tsv parsed = SaneTsv.ParseTypedTsv(serialized); - - if ((float)parsed.Records[1].Float32Test == 44.5000005f) - { - Console.WriteLine($"Passed {testName}"); - } - else - { - Console.WriteLine($"Failed {testName}"); - } - } - - { - string testName = "Trying to parse a not commented record as a Commented TSV test"; - - // These should not compile: - - //byte[] serialized = SaneTsv.SerializeCommentedTsv(data); - // Gives this error: error CS7036: There is no argument given that corresponds to the required parameter 'fileComment' of 'SaneTsv.SerializeCommentedTsv(IList, string)' - - //Tsv parsed = SaneTsv.ParseCommentedTsv(serialized); - // Gives this error: error CS0311: The type 'Program.TestRecord' cannot be used as type parameter 'T' in the generic type or method 'SaneTsv.ParseCommentedTsv(byte[])'. There is no implicit reference conversion from 'Program.TestRecord' to 'NathanMcRae.SaneTsv.CommentedTsvRecord'. - } - - { - string testName = "Try to parsed a Commented TSV as a Simple TSV"; - - string testString1 = "#This is a file comment\n" + - "#One more file comment line\n" + - "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" + - "\n#This is a comment" + - "\n#Another comment line" + - "\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" + - "\nFALSE\tnother\tno\\ther"; - - try - { - Tsv parsed = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1)); - - Console.WriteLine($"Failed {testName}"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName}"); - } - } - - { - string testName = "Try to parsed a Commented TSV as a Typed TSV"; - - string testString1 = "#This is a file comment\n" + - "#One more file comment line\n" + - "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" + - "\n#This is a comment" + - "\n#Another comment line" + - "\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" + - "\nFALSE\tnother\tno\\ther"; - - try - { - Tsv parsed = SaneTsv.ParseTypedTsv(Encoding.UTF8.GetBytes(testString1)); - - Console.WriteLine($"Failed {testName}"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName}"); - } - } - - { - string testName = "Try to parsed a Typed TSV as a Simple TSV"; - - string testString1 = - "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" + - "\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" + - "\nFALSE\tnother\tno\\ther"; - - try - { - Tsv parsed = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1)); - - Console.WriteLine($"Failed {testName}"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName}"); - } - } - - { - string testName = "Timing comparison of simple parse methods and comparison of simple serialization methods"; - - int N = 1000000; - var records = new StringTestRecord[N]; - var rand = new Random(1); - - for (int i = 0; i < N; i++) - { - records[i] = new StringTestRecord() - { - Column1 = rand.Next().ToString(), - column2 = rand.Next().ToString(), - Column3 = rand.Next().ToString(), - }; - } - - string[][] recordStrings = records.Select(record => new string[] { record.Column1, record.column2, record.Column3 }).ToArray(); - - DateTime lastTime = DateTime.Now; - byte[] serialized1 = SaneTsv.SerializeSimpleTsv(records); - - TimeSpan speccedSerializationTime = DateTime.Now - lastTime; - Console.WriteLine($"Specced serialization time: {speccedSerializationTime}"); - lastTime = DateTime.Now; - - byte[] serialized2 = SaneTsv.SerializeSimpleTsv(new string[] { "column1", "column2", "columnthree\nyep" }, recordStrings); - - TimeSpan unspeccedSerializationTime = DateTime.Now - lastTime; - Console.WriteLine($"Unspecced serialization time: {unspeccedSerializationTime}"); - lastTime = DateTime.Now; - - Tsv parsed = SaneTsv.ParseSimpleTsv(serialized1); - - TimeSpan speccedParseTime = DateTime.Now - lastTime; - Console.WriteLine($"Specced parse time: {speccedParseTime}"); - lastTime = DateTime.Now; - - (string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(serialized2); - - TimeSpan unspeccedParseTime = DateTime.Now - lastTime; - Console.WriteLine($"Unspecced parse time: {unspeccedParseTime}"); - } - - { - string testName = "With and without file comment"; - - string testString1 = "#This is a file comment\n" + - "#One more file comment line\n" + - "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" + - "\n#This is a comment" + - "\n#Another comment line" + - "\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" + - "\nFALSE\tnother\tno\\ther"; - - string testString2 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" + - "\n#This is a comment" + - "\n#Another comment line" + - "\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" + - "\nFALSE\tnother\tno\\ther"; - - CommentedTsv parsed = SaneTsv.ParseCommentedTsv(Encoding.UTF8.GetBytes(testString1)); - CommentedTsv parsed2 = SaneTsv.ParseCommentedTsv(Encoding.UTF8.GetBytes(testString2)); - - if (parsed.FileComment == "This is a file comment\nOne more file comment line" && parsed2.FileComment == null) - { - Console.WriteLine($"Passed {testName}"); - } - else - { - Console.WriteLine($"Failed {testName}"); - } - } - - { - string testName = "With and without types"; - - string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" + - "\nTRUE\tvalue\\\\twoo\tvaluetrhee" + - "\nFALSE\tnother\tno\\ther"; - - try - { - Tsv parsed = SaneTsv.ParseTypedTsv(Encoding.UTF8.GetBytes(testString1)); - Console.WriteLine($"Passed {testName} 1A"); - } - catch (Exception e) - { - Console.WriteLine($"Failed {testName} 1A"); - } - - try - { - Tsv parsed2 = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1)); - Console.WriteLine($"Failed {testName} 1B"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 1B"); - } - - try - { - (string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1)); - Console.WriteLine($"Failed {testName} 1C"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 1C"); - } - - string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" + - "\nTRUE\tvalue\\\\twoo\tvaluetrhee" + - "\nFALSE\tnother\tno\\ther"; - - try - { - Tsv parsed = SaneTsv.ParseTypedTsv(Encoding.UTF8.GetBytes(testString2)); - Console.WriteLine($"Failed {testName} 2A"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 2A"); - } - - try - { - Tsv parsed2 = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1)); - Console.WriteLine($"Failed {testName} 2B"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 2B"); - } - - try - { - (string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1)); - Console.WriteLine($"Failed {testName} 2C"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 2C"); - } - } - - { - string testName = "With and without line comment"; - - string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" + - "\n#This is a comment" + - "\n#Another comment line" + - "\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" + - "\nFALSE\tnother\tno\\ther"; - - try - { - CommentedTsv parsed = SaneTsv.ParseCommentedTsv(Encoding.UTF8.GetBytes(testString1)); - Console.WriteLine($"Passed {testName} 1A"); - } - catch (Exception e) - { - Console.WriteLine($"Failed {testName} 1A"); - } - - try - { - Tsv parsed = SaneTsv.ParseTypedTsv(Encoding.UTF8.GetBytes(testString1)); - Console.WriteLine($"Failed {testName} 1B"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 1B"); - } - - try - { - Tsv parsed2 = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1)); - Console.WriteLine($"Failed {testName} 1C"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 1C"); - } - - try - { - (string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1)); - Console.WriteLine($"Failed {testName} 1D"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 1D"); - } - } - - { - string testName = "End of file comment"; - - string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" + - "\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" + - "\nFALSE\tnother\tno\\ther" + - "\n# Hey, you're not supposed to have comments at the end of the tsv!"; - - try - { - CommentedTsv parsed = SaneTsv.ParseCommentedTsv(Encoding.UTF8.GetBytes(testString1)); - Console.WriteLine($"Failed {testName} 1A"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 1A"); - } - - try - { - Tsv parsed = SaneTsv.ParseTypedTsv(Encoding.UTF8.GetBytes(testString1)); - Console.WriteLine($"Failed {testName} 1B"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 1B"); - } - - string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" + - "\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" + - "\nFALSE\tnother\tno\\ther" + - "\n# Hey, you're not supposed to have comments at the end of the tsv!"; - - try - { - Tsv parsed3 = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString2)); - Console.WriteLine($"Failed {testName} 1C"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 1C"); - } - - try - { - (string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString2)); - Console.WriteLine($"Failed {testName} 1D"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 1D"); - } - } - - { - string testName = "Partial parsing"; - - string line1 = "column1\tcolumn2\tcolumnthree\\nyep"; - string line2 = "\nTRUE\tvalue\\\\t\0woo\tvaluetrhee"; - string line3 = "\nFALSE\tnother\tno\\ther"; - - byte[] inputBuffer = Encoding.UTF8.GetBytes(line1 + line2 + line3); - - var headerTypes = new List(); - var headerNames = new List(); - var headerPropertyInfos = new List(); - int columnCount = 0; - - foreach (PropertyInfo property in typeof(BoolTestRecord3).GetProperties()) - { - TsvColumnAttribute attribute = (TsvColumnAttribute)Attribute.GetCustomAttribute(property, typeof(TsvColumnAttribute)); - if (attribute == null) - { - continue; - } - - headerNames.Add(attribute.ColumnName ?? property.Name); - headerTypes.Add(attribute.ColumnType ?? GetColumnFromType(property.PropertyType)); - headerPropertyInfos.Add(property); - // TODO: Check that the property type and given column type are compatible - columnCount++; - } - - BoolTestRecord3[] records = SaneTsv.Parse(inputBuffer, - FormatType.SIMPLE_TSV, - headerPropertyInfos.ToArray(), - headerTypes.ToArray(), - line1.Length + line2.Length + 1, - inputBuffer.Length); - - if (records.Length == 0 ) - { - Console.WriteLine($"Passed {testName} 1"); - } - else - { - Console.WriteLine($"Failed {testName} 1"); - } - - BoolTestRecord3[] records2 = SaneTsv.Parse(inputBuffer, - FormatType.SIMPLE_TSV, - headerPropertyInfos.ToArray(), - headerTypes.ToArray(), - line1.Length, - line1.Length + 3); - - if (records2[0].Column3 == "valuetrhee") - { - Console.WriteLine($"Passed {testName} 2"); - } - else - { - Console.WriteLine($"Failed {testName} 2"); - } - - string[][] data = SaneTsv.ParseSimpleTsv(inputBuffer, 3, line1.Length + line2.Length + 1, inputBuffer.Length); - - if (data[0][1] == "nother") - { - Console.WriteLine($"Passed {testName} 3"); - } - else - { - Console.WriteLine($"Failed {testName} 3"); - } - - string[][] data2 = SaneTsv.ParseSimpleTsv(inputBuffer, 3, line1.Length, line1.Length + 3); - - if (data2.Length == 0) - { - Console.WriteLine($"Passed {testName} 4"); - } - else - { - Console.WriteLine($"Failed {testName} 4"); - } - } - - { - string testName = "End of file \\n"; - - string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" + - "\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" + - "\nFALSE\tnother\tno\\ther" + - "\n"; - - try - { - CommentedTsv parsed = SaneTsv.ParseCommentedTsv(Encoding.UTF8.GetBytes(testString1)); - Console.WriteLine($"Failed {testName} 1A"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 1A"); - } - - try - { - Tsv parsed = SaneTsv.ParseTypedTsv(Encoding.UTF8.GetBytes(testString1)); - Console.WriteLine($"Failed {testName} 1B"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 1B"); - } - - string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" + - "\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" + - "\nFALSE\tnother\tno\\ther" + - "\n"; - - try - { - Tsv parsed3 = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString2)); - Console.WriteLine($"Failed {testName} 1C"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 1C"); - } - - try - { - (string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString2)); - Console.WriteLine($"Failed {testName} 1D"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 1D"); - } - } - - { - string testName = "End of file partial record"; - - string testString1 = "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" + - "\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" + - "\nFALSE\tnother\tno\\ther" + - "\nTRUE\t"; - - try - { - CommentedTsv parsed = SaneTsv.ParseCommentedTsv(Encoding.UTF8.GetBytes(testString1)); - Console.WriteLine($"Failed {testName} 1A"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 1A"); - } - - try - { - Tsv parsed = SaneTsv.ParseTypedTsv(Encoding.UTF8.GetBytes(testString1)); - Console.WriteLine($"Failed {testName} 1B"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 1B"); - } - - string testString2 = "column1\tcolumn2\tcolumnthree\\nyep" + - "\nTRUE\tvalue\\\\t\0woo\tvaluetrhee" + - "\nFALSE\tnother\tno\\ther" + - "\nTRUE\t"; - - try - { - Tsv parsed3 = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString2)); - Console.WriteLine($"Failed {testName} 1C"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 1C"); - } - - try - { - (string[] columns, string[][] data) = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString2)); - Console.WriteLine($"Failed {testName} 1D"); - } - catch (Exception e) - { - Console.WriteLine($"Passed {testName} 1D"); - } - } - Console.WriteLine("Done with tests"); } }