diff --git a/SaneTsv/SaneTsv.cs b/SaneTsv/SaneTsv.cs index b1b1756..2418de6 100644 --- a/SaneTsv/SaneTsv.cs +++ b/SaneTsv/SaneTsv.cs @@ -22,7 +22,7 @@ public class SaneTsv protected enum FormatType { - SANE_TSV = 0, + SIMPLE_TSV = 0, TYPED_TSV = 1, COMMENTED_TSV = 2, } @@ -34,9 +34,9 @@ public class SaneTsv public List Records { get; protected set; } public string FileComment { get; protected set; } = null; - public static SaneTsv ParseSaneTsv(byte[] inputBuffer) + public static SaneTsv ParseSimpleTsv(byte[] inputBuffer) { - return Parse(inputBuffer, FormatType.SANE_TSV); + return Parse(inputBuffer, FormatType.SIMPLE_TSV); } public static SaneTsv ParseTypedTsv(byte[] inputBuffer) @@ -135,7 +135,7 @@ public class SaneTsv string columnTypeString; string columnName; if (columnString.Contains(':')) { - if (format == FormatType.SANE_TSV) + if (format == FormatType.SIMPLE_TSV) { throw new Exception($"Header {fields.Count} contain ':', which is not allowed for column names"); } @@ -144,7 +144,7 @@ public class SaneTsv } else { - if (format > FormatType.SANE_TSV) + if (format > FormatType.SIMPLE_TSV) { throw new Exception($"Header {fields.Count} has no type"); } @@ -401,7 +401,7 @@ public class SaneTsv return parsedFields; } - public static byte[] SerializeSaneTsv(IList header, IList> data) + public static byte[] SerializeSimpleTsv(IList header, IList> data) { var escapedString = new StringBuilder(); diff --git a/SaneTsv/SaneTsvTest/Program.cs b/SaneTsv/SaneTsvTest/Program.cs index 7878575..059d338 100644 --- a/SaneTsv/SaneTsvTest/Program.cs +++ b/SaneTsv/SaneTsvTest/Program.cs @@ -39,7 +39,7 @@ using System.Text; { string testName = "Comment test"; string testString1 = "#This is a file comment\n" + - " #One more file comment line\n" + + "#One more file comment line\n" + "column1:type:boolean\tcolumn2:binary\tcolumnthree\\nyep:string" + "\n#This is a comment" + "\n#Another comment line" + @@ -49,4 +49,23 @@ using System.Text; SaneTsv parsed = SaneTsv.ParseCommentedTsv(Encoding.UTF8.GetBytes(testString1)); } +{ + string testName = "Serde test"; + string testString1 = "column1\tcolumn2\tcolumnthree\\nyep" + + "\nTRUE\tvalue\\\\twoo\tvaluetrhee" + + "\nFALSE\tnother\tno\\ther"; + + SaneTsv parsed = SaneTsv.ParseSimpleTsv(Encoding.UTF8.GetBytes(testString1)); + string serialized = Encoding.UTF8.GetString(SaneTsv.SerializeSimpleTsv(parsed.ColumnNames, parsed.Records.Select(r => r.Fields.Select(f => f.ToString()).ToArray()).ToArray())); + + if (testString1 == serialized) + { + Console.WriteLine($"Passed {testName}"); + } + else + { + Console.WriteLine($"Failed {testName}"); + } +} + Console.WriteLine("Done with tests"); diff --git a/SaneTsv/readme.md b/SaneTsv/readme.md index df15f83..c162070 100644 --- a/SaneTsv/readme.md +++ b/SaneTsv/readme.md @@ -1,6 +1,10 @@ # Sane TSV -Sane TSV is a strict format for tabular data. +Sane Tab-Separate Values is a series of tabular formats as an alternative to the under-specified TSV / CSV quagmire. + +# Simple TSV + +Simple TSV is a strict format for tabular data. '\n' (0x0A) character delimit lines, and '\t' (0x09) characters delimit fields within a line. @@ -20,7 +24,7 @@ Implementations of the format do not need to handle file reading and writing dir # Typed TSV -Typed TSV builds on Sane TSV to allow for typing of columns. All column names in a typed TSV must end with ':' (0x3A) and then one of the following types: +Typed TSV builds on Simple TSV to allow for typing of columns. All column names in a typed TSV must end with ':' (0x3A) and then one of the following types: - 'string' - 'boolean' @@ -34,7 +38,7 @@ Typed TSV builds on Sane TSV to allow for typing of columns. All column names in Any other values are an error, however, the portion of the name prior to the last ':' may be anything and may include ':' characters. -All fields in the rest of the file must be of the type corresponding the their column. +All fields in the rest of the file must be of the type corresponding to their column. Aside from the 'binary' column type, all fields must be UTF-8 encoded text. Each type has the following restrictions: @@ -55,7 +59,7 @@ Typed TSV files should have the .ytsv extension (.ttsv is already used). Commented TSV builds on Typed TSV and allows for more flexibility in the format by including line comments. They are kept distinct so that some applications of it can take advantage of the extra flexibility, while others can stick with the more restricted Typed TSV format. -Commented lines start with a '#' character at the beginning of the line. Unescaped '#' characters are not allowed on a line that does not start with a '#'. Any '#' characters in fields must be escaped. Any unescaped '#' after the start of a line are errors. +Commented lines start with a '#' character at the beginning of the line. Unescaped '#' characters are not allowed on a line that does not start with a '#'. Any '#' characters in fields must be escaped. Comments must be UTF-8 encoded text.