From ddb873e21f246117b41d8e9e458094e6529640f7 Mon Sep 17 00:00:00 2001 From: Nathan McRae Date: Thu, 15 Feb 2024 20:26:40 -0800 Subject: [PATCH] Add ExtraTSV --- ExtraTsv/ExtraTsv.cs | 125 +++++++++++++++++++++++++++++++ ExtraTsv/ExtraTsv.csproj | 14 ++++ ExtraTsv/readme.md | 44 +++++++++++ ExtraTsvTest/ExtraTsvTest.csproj | 14 ++++ ExtraTsvTest/Program.cs | 20 +++++ SaneTsv.csproj | 6 ++ SaneTsv.sln | 14 +++- 7 files changed, 236 insertions(+), 1 deletion(-) create mode 100644 ExtraTsv/ExtraTsv.cs create mode 100644 ExtraTsv/ExtraTsv.csproj create mode 100644 ExtraTsv/readme.md create mode 100644 ExtraTsvTest/ExtraTsvTest.csproj create mode 100644 ExtraTsvTest/Program.cs diff --git a/ExtraTsv/ExtraTsv.cs b/ExtraTsv/ExtraTsv.cs new file mode 100644 index 0000000..f911bae --- /dev/null +++ b/ExtraTsv/ExtraTsv.cs @@ -0,0 +1,125 @@ + +using System.Globalization; +using System.Text.RegularExpressions; + +namespace NathanMcRae; + +public class ExtraTsv : SaneTsv +{ + public class Iso8601Type : ColumnType { } + public class PhysicalUnitsType : ColumnType + { + public string Units { get; } + public PhysicalUnitsType(string Units) { } + } + + public static readonly string[] ValidUnits = + { + "m", + "s", + "A", + "K", + "cd", + "mol", + "kg", + "Hz", + "rad", + "sr", + "N", + "Pa", + "J", + "W", + "C", + "V", + "F", + "Ω", + "S", + "Wb", + "T", + "H", + "°C", + "lm", + "lx", + "Bq", + "Gy", + "Sv", + "kat" + }; + + public static readonly int MajorVersion = 0; + public static readonly int MinorVersion = 0; + public static readonly int PatchVersion = 1; + + public static Regex VersionRegex = new Regex(@"^ ExtraTSV V(\d+)\.(\d+)\.(\d+)"); + + public static ExtraTsv ParseExtraTsv(byte[] inputBuffer) + { + SaneTsv tsv = ParseCommentedTsv(inputBuffer); + + if (tsv.FileComment == null) { + throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}"); + } + + Match match = VersionRegex.Match(tsv.FileComment); + if (!match.Success) + { + throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}"); + } + + int fileMajorVersion = int.Parse(match.Groups[1].Value); + + if (fileMajorVersion != MajorVersion) + { + throw new Exception($"File has major version ({fileMajorVersion}) which is newer than this parser's version {MajorVersion}"); + } + + for (int i = 0; i < tsv.ColumnNames.Count(); i++) + { + string[] typeParts = tsv.ColumnNames[i].Split(":"); + if (typeParts[typeParts.Length - 1] == "iso8601" && tsv.ColumnTypes[i] == typeof(StringType)) + { + string columnName = tsv.ColumnNames[i].Substring(0, tsv.ColumnNames[i].Length - ":iso8601".Length); + tsv.ColumnNames[i] = columnName; + tsv.ColumnTypes[i] = typeof(Iso8601Type); + } + // TODO: ISO8601 time spans + // TODO: ISO8601 time durations + else if (typeParts[typeParts.Length - 1] == "units" && (tsv.ColumnTypes[i] == typeof(Float64Type) || tsv.ColumnTypes[i] == typeof(Float32Type))) + { + if (typeParts.Count() > 1 && ValidUnits.Contains(typeParts[typeParts.Length - 2])) + { + // TODO: How to store type information since the ColumnTypes is of type Type? + } + else + { + throw new Exception($"Invalid units type '{typeParts[typeParts.Length - 2]}' for column {i}"); + } + } + } + + CultureInfo provider = CultureInfo.InvariantCulture; + + for (int i = 0; i < tsv.Records.Count; i++) + { + if (tsv.Records[i].Comment != null) + { + throw new Exception($"Line {tsv.Records[i].Line} has comment above it which is not allowed"); + } + + for (int j = 0; j < tsv.ColumnNames.Count(); j++) + { + if (tsv.ColumnTypes[j] == typeof(Iso8601Type)) + { + if (!DateTime.TryParseExact((string)tsv.Records[i][j], "yyyy-MM-ddTHH:mm:ss.ffff", provider, DateTimeStyles.None, out DateTime parsed)) + { + throw new Exception($"ISO 8601 timestamp format error on line {tsv.Records[i].Line}, field {j}"); + } + + tsv.Records[i].Fields[j] = parsed; + } + } + } + + return (ExtraTsv)tsv; + } +} \ No newline at end of file diff --git a/ExtraTsv/ExtraTsv.csproj b/ExtraTsv/ExtraTsv.csproj new file mode 100644 index 0000000..1cf4d45 --- /dev/null +++ b/ExtraTsv/ExtraTsv.csproj @@ -0,0 +1,14 @@ + + + + net6.0 + enable + enable + NathanMcRae + + + + + + + diff --git a/ExtraTsv/readme.md b/ExtraTsv/readme.md new file mode 100644 index 0000000..00cccc2 --- /dev/null +++ b/ExtraTsv/readme.md @@ -0,0 +1,44 @@ +Extra TSV adds many convenience types to Sane TSV: + +- Timestamps + Just this format for now: yyyy-MM-ddTHH:mm:ss.ffff +- Timespans +- Time durations +- Multiformats + - Multihashes + - Multiprotocols + - ... +- Physical units + To start with, just use SI base and derived units + - Base units + - m + - s + - A + - K + - cd + - mol + - kg + - Derived units + - Hz + - rad + - sr + - N + - Pa + - J + - W + - C + - V + - F + - Ω + - S + - Wb + - T + - H + - °C + - lm + - lx + - Bq + - Gy + - Sv + - kat + How to handle derived units? \ No newline at end of file diff --git a/ExtraTsvTest/ExtraTsvTest.csproj b/ExtraTsvTest/ExtraTsvTest.csproj new file mode 100644 index 0000000..4eae1c4 --- /dev/null +++ b/ExtraTsvTest/ExtraTsvTest.csproj @@ -0,0 +1,14 @@ + + + + Exe + net6.0 + enable + enable + + + + + + + diff --git a/ExtraTsvTest/Program.cs b/ExtraTsvTest/Program.cs new file mode 100644 index 0000000..91b61df --- /dev/null +++ b/ExtraTsvTest/Program.cs @@ -0,0 +1,20 @@ +using NathanMcRae; +using System.Text; + +{ + string testName = "Bool test"; + string testString1 = "# ExtraTSV V0.0.1\n" + + "column1:ty\\#pe:boolean\tcolumn2:binary\tcolumnthree\\nyep:iso8601:string" + + "\nTRUE\tvalue\\\\t\0woo\t2024-02-15T18:03:30.0000" + + "\nFALSE\tnother\t2024-02-15T18:03:39.0001"; + + ExtraTsv parsed = ExtraTsv.ParseExtraTsv(Encoding.UTF8.GetBytes(testString1)); + if (parsed.Records[0]["column1:ty#pe"] is bool result && result) + { + Console.WriteLine($"Passed {testName}"); + } + else + { + Console.WriteLine($"Failed {testName}"); + } +} diff --git a/SaneTsv.csproj b/SaneTsv.csproj index fbf1f20..c231904 100644 --- a/SaneTsv.csproj +++ b/SaneTsv.csproj @@ -10,8 +10,14 @@ + + + + + + diff --git a/SaneTsv.sln b/SaneTsv.sln index 4a1fd60..e383f6d 100644 --- a/SaneTsv.sln +++ b/SaneTsv.sln @@ -5,7 +5,11 @@ VisualStudioVersion = 17.7.34024.191 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SaneTsv", "SaneTsv.csproj", "{DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SaneTsvTest", "SaneTsvTest\SaneTsvTest.csproj", "{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SaneTsvTest", "SaneTsvTest\SaneTsvTest.csproj", "{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}" +EndProject +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ExtraTsv", "ExtraTsv\ExtraTsv.csproj", "{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ExtraTsvTest", "ExtraTsvTest\ExtraTsvTest.csproj", "{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -21,6 +25,14 @@ Global {43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Debug|Any CPU.Build.0 = Debug|Any CPU {43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.ActiveCfg = Release|Any CPU {43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.Build.0 = Release|Any CPU + {D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Debug|Any CPU.Build.0 = Debug|Any CPU + {D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Release|Any CPU.ActiveCfg = Release|Any CPU + {D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Release|Any CPU.Build.0 = Release|Any CPU + {A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Debug|Any CPU.Build.0 = Debug|Any CPU + {A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Release|Any CPU.ActiveCfg = Release|Any CPU + {A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE