using System.Globalization; using System.Text.RegularExpressions; namespace NathanMcRae; public class ExtraTsv : SaneTsv { public class Iso8601Type : ColumnType { } public class PhysicalUnitsType : ColumnType { public string Units { get; } public PhysicalUnitsType(string Units) { } } public static readonly string[] ValidUnits = { "m", "s", "A", "K", "cd", "mol", "kg", "Hz", "rad", "sr", "N", "Pa", "J", "W", "C", "V", "F", "Ω", "S", "Wb", "T", "H", "°C", "lm", "lx", "Bq", "Gy", "Sv", "kat" }; public static readonly int MajorVersion = 0; public static readonly int MinorVersion = 0; public static readonly int PatchVersion = 1; public static Regex VersionRegex = new Regex(@"^ ExtraTSV V(\d+)\.(\d+)\.(\d+)"); public static ExtraTsv ParseExtraTsv(byte[] inputBuffer) { SaneTsv tsv = ParseCommentedTsv(inputBuffer); if (tsv.FileComment == null) { throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}"); } Match match = VersionRegex.Match(tsv.FileComment); if (!match.Success) { throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}"); } int fileMajorVersion = int.Parse(match.Groups[1].Value); if (fileMajorVersion != MajorVersion) { throw new Exception($"File has major version ({fileMajorVersion}) which is newer than this parser's version {MajorVersion}"); } for (int i = 0; i < tsv.ColumnNames.Count(); i++) { string[] typeParts = tsv.ColumnNames[i].Split(":"); if (typeParts[typeParts.Length - 1] == "iso8601" && tsv.ColumnTypes[i] == typeof(StringType)) { string columnName = tsv.ColumnNames[i].Substring(0, tsv.ColumnNames[i].Length - ":iso8601".Length); tsv.ColumnNames[i] = columnName; tsv.ColumnTypes[i] = typeof(Iso8601Type); } // TODO: ISO8601 time spans // TODO: ISO8601 time durations else if (typeParts[typeParts.Length - 1] == "units" && (tsv.ColumnTypes[i] == typeof(Float64Type) || tsv.ColumnTypes[i] == typeof(Float32Type))) { if (typeParts.Count() > 1 && ValidUnits.Contains(typeParts[typeParts.Length - 2])) { // TODO: How to store type information since the ColumnTypes is of type Type? } else { throw new Exception($"Invalid units type '{typeParts[typeParts.Length - 2]}' for column {i}"); } } } CultureInfo provider = CultureInfo.InvariantCulture; for (int i = 0; i < tsv.Records.Count; i++) { if (tsv.Records[i].Comment != null) { throw new Exception($"Line {tsv.Records[i].Line} has comment above it which is not allowed"); } for (int j = 0; j < tsv.ColumnNames.Count(); j++) { if (tsv.ColumnTypes[j] == typeof(Iso8601Type)) { if (!DateTime.TryParseExact((string)tsv.Records[i][j], "yyyy-MM-ddTHH:mm:ss.ffff", provider, DateTimeStyles.None, out DateTime parsed)) { throw new Exception($"ISO 8601 timestamp format error on line {tsv.Records[i].Line}, field {j}"); } tsv.Records[i].Fields[j] = parsed; } } } return (ExtraTsv)tsv; } }