Add ExtraTSV
This commit is contained in:
125
ExtraTsv/ExtraTsv.cs
Normal file
125
ExtraTsv/ExtraTsv.cs
Normal file
@ -0,0 +1,125 @@
|
||||
|
||||
using System.Globalization;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace NathanMcRae;
|
||||
|
||||
public class ExtraTsv : SaneTsv
|
||||
{
|
||||
public class Iso8601Type : ColumnType { }
|
||||
public class PhysicalUnitsType : ColumnType
|
||||
{
|
||||
public string Units { get; }
|
||||
public PhysicalUnitsType(string Units) { }
|
||||
}
|
||||
|
||||
public static readonly string[] ValidUnits =
|
||||
{
|
||||
"m",
|
||||
"s",
|
||||
"A",
|
||||
"K",
|
||||
"cd",
|
||||
"mol",
|
||||
"kg",
|
||||
"Hz",
|
||||
"rad",
|
||||
"sr",
|
||||
"N",
|
||||
"Pa",
|
||||
"J",
|
||||
"W",
|
||||
"C",
|
||||
"V",
|
||||
"F",
|
||||
"Ω",
|
||||
"S",
|
||||
"Wb",
|
||||
"T",
|
||||
"H",
|
||||
"°C",
|
||||
"lm",
|
||||
"lx",
|
||||
"Bq",
|
||||
"Gy",
|
||||
"Sv",
|
||||
"kat"
|
||||
};
|
||||
|
||||
public static readonly int MajorVersion = 0;
|
||||
public static readonly int MinorVersion = 0;
|
||||
public static readonly int PatchVersion = 1;
|
||||
|
||||
public static Regex VersionRegex = new Regex(@"^ ExtraTSV V(\d+)\.(\d+)\.(\d+)");
|
||||
|
||||
public static ExtraTsv ParseExtraTsv(byte[] inputBuffer)
|
||||
{
|
||||
SaneTsv tsv = ParseCommentedTsv(inputBuffer);
|
||||
|
||||
if (tsv.FileComment == null) {
|
||||
throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}");
|
||||
}
|
||||
|
||||
Match match = VersionRegex.Match(tsv.FileComment);
|
||||
if (!match.Success)
|
||||
{
|
||||
throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}");
|
||||
}
|
||||
|
||||
int fileMajorVersion = int.Parse(match.Groups[1].Value);
|
||||
|
||||
if (fileMajorVersion != MajorVersion)
|
||||
{
|
||||
throw new Exception($"File has major version ({fileMajorVersion}) which is newer than this parser's version {MajorVersion}");
|
||||
}
|
||||
|
||||
for (int i = 0; i < tsv.ColumnNames.Count(); i++)
|
||||
{
|
||||
string[] typeParts = tsv.ColumnNames[i].Split(":");
|
||||
if (typeParts[typeParts.Length - 1] == "iso8601" && tsv.ColumnTypes[i] == typeof(StringType))
|
||||
{
|
||||
string columnName = tsv.ColumnNames[i].Substring(0, tsv.ColumnNames[i].Length - ":iso8601".Length);
|
||||
tsv.ColumnNames[i] = columnName;
|
||||
tsv.ColumnTypes[i] = typeof(Iso8601Type);
|
||||
}
|
||||
// TODO: ISO8601 time spans
|
||||
// TODO: ISO8601 time durations
|
||||
else if (typeParts[typeParts.Length - 1] == "units" && (tsv.ColumnTypes[i] == typeof(Float64Type) || tsv.ColumnTypes[i] == typeof(Float32Type)))
|
||||
{
|
||||
if (typeParts.Count() > 1 && ValidUnits.Contains(typeParts[typeParts.Length - 2]))
|
||||
{
|
||||
// TODO: How to store type information since the ColumnTypes is of type Type?
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception($"Invalid units type '{typeParts[typeParts.Length - 2]}' for column {i}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CultureInfo provider = CultureInfo.InvariantCulture;
|
||||
|
||||
for (int i = 0; i < tsv.Records.Count; i++)
|
||||
{
|
||||
if (tsv.Records[i].Comment != null)
|
||||
{
|
||||
throw new Exception($"Line {tsv.Records[i].Line} has comment above it which is not allowed");
|
||||
}
|
||||
|
||||
for (int j = 0; j < tsv.ColumnNames.Count(); j++)
|
||||
{
|
||||
if (tsv.ColumnTypes[j] == typeof(Iso8601Type))
|
||||
{
|
||||
if (!DateTime.TryParseExact((string)tsv.Records[i][j], "yyyy-MM-ddTHH:mm:ss.ffff", provider, DateTimeStyles.None, out DateTime parsed))
|
||||
{
|
||||
throw new Exception($"ISO 8601 timestamp format error on line {tsv.Records[i].Line}, field {j}");
|
||||
}
|
||||
|
||||
tsv.Records[i].Fields[j] = parsed;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (ExtraTsv)tsv;
|
||||
}
|
||||
}
|
14
ExtraTsv/ExtraTsv.csproj
Normal file
14
ExtraTsv/ExtraTsv.csproj
Normal file
@ -0,0 +1,14 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<RootNamespace>NathanMcRae</RootNamespace>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\SaneTsv.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
44
ExtraTsv/readme.md
Normal file
44
ExtraTsv/readme.md
Normal file
@ -0,0 +1,44 @@
|
||||
Extra TSV adds many convenience types to Sane TSV:
|
||||
|
||||
- Timestamps
|
||||
Just this format for now: yyyy-MM-ddTHH:mm:ss.ffff
|
||||
- Timespans
|
||||
- Time durations
|
||||
- Multiformats
|
||||
- Multihashes
|
||||
- Multiprotocols
|
||||
- ...
|
||||
- Physical units
|
||||
To start with, just use SI base and derived units
|
||||
- Base units
|
||||
- m
|
||||
- s
|
||||
- A
|
||||
- K
|
||||
- cd
|
||||
- mol
|
||||
- kg
|
||||
- Derived units
|
||||
- Hz
|
||||
- rad
|
||||
- sr
|
||||
- N
|
||||
- Pa
|
||||
- J
|
||||
- W
|
||||
- C
|
||||
- V
|
||||
- F
|
||||
- Ω
|
||||
- S
|
||||
- Wb
|
||||
- T
|
||||
- H
|
||||
- °C
|
||||
- lm
|
||||
- lx
|
||||
- Bq
|
||||
- Gy
|
||||
- Sv
|
||||
- kat
|
||||
How to handle derived units?
|
Reference in New Issue
Block a user