Add ExtraTSV
This commit is contained in:
		
							
								
								
									
										125
									
								
								ExtraTsv/ExtraTsv.cs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								ExtraTsv/ExtraTsv.cs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,125 @@
 | 
			
		||||
 | 
			
		||||
using System.Globalization;
 | 
			
		||||
using System.Text.RegularExpressions;
 | 
			
		||||
 | 
			
		||||
namespace NathanMcRae;
 | 
			
		||||
 | 
			
		||||
public class ExtraTsv : SaneTsv
 | 
			
		||||
{
 | 
			
		||||
  public class Iso8601Type : ColumnType { }
 | 
			
		||||
  public class PhysicalUnitsType : ColumnType 
 | 
			
		||||
  {
 | 
			
		||||
    public string Units { get; }
 | 
			
		||||
    public PhysicalUnitsType(string Units) { }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  public static readonly string[] ValidUnits =
 | 
			
		||||
  {
 | 
			
		||||
    "m",
 | 
			
		||||
    "s",
 | 
			
		||||
    "A",
 | 
			
		||||
    "K",
 | 
			
		||||
    "cd",
 | 
			
		||||
    "mol",
 | 
			
		||||
    "kg",
 | 
			
		||||
    "Hz",
 | 
			
		||||
    "rad",
 | 
			
		||||
    "sr",
 | 
			
		||||
    "N",
 | 
			
		||||
    "Pa",
 | 
			
		||||
    "J",
 | 
			
		||||
    "W",
 | 
			
		||||
    "C",
 | 
			
		||||
    "V",
 | 
			
		||||
    "F",
 | 
			
		||||
    "Ω",
 | 
			
		||||
    "S",
 | 
			
		||||
    "Wb",
 | 
			
		||||
    "T",
 | 
			
		||||
    "H",
 | 
			
		||||
    "°C",
 | 
			
		||||
    "lm",
 | 
			
		||||
    "lx",
 | 
			
		||||
    "Bq",
 | 
			
		||||
    "Gy",
 | 
			
		||||
    "Sv",
 | 
			
		||||
    "kat"
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  public static readonly int MajorVersion = 0;
 | 
			
		||||
  public static readonly int MinorVersion = 0;
 | 
			
		||||
  public static readonly int PatchVersion = 1;
 | 
			
		||||
 | 
			
		||||
  public static Regex VersionRegex = new Regex(@"^ ExtraTSV V(\d+)\.(\d+)\.(\d+)");
 | 
			
		||||
 | 
			
		||||
  public static ExtraTsv ParseExtraTsv(byte[] inputBuffer)
 | 
			
		||||
  {
 | 
			
		||||
    SaneTsv tsv = ParseCommentedTsv(inputBuffer);
 | 
			
		||||
 | 
			
		||||
    if (tsv.FileComment == null) {
 | 
			
		||||
      throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Match match = VersionRegex.Match(tsv.FileComment);
 | 
			
		||||
    if (!match.Success)
 | 
			
		||||
    {
 | 
			
		||||
      throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    int fileMajorVersion = int.Parse(match.Groups[1].Value);
 | 
			
		||||
 | 
			
		||||
    if (fileMajorVersion != MajorVersion)
 | 
			
		||||
    {
 | 
			
		||||
      throw new Exception($"File has major version ({fileMajorVersion}) which is newer than this parser's version {MajorVersion}");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for (int i = 0; i < tsv.ColumnNames.Count(); i++)
 | 
			
		||||
    {
 | 
			
		||||
      string[] typeParts = tsv.ColumnNames[i].Split(":");
 | 
			
		||||
      if (typeParts[typeParts.Length - 1] == "iso8601" && tsv.ColumnTypes[i] == typeof(StringType))
 | 
			
		||||
      {
 | 
			
		||||
        string columnName = tsv.ColumnNames[i].Substring(0, tsv.ColumnNames[i].Length - ":iso8601".Length);
 | 
			
		||||
        tsv.ColumnNames[i] = columnName;
 | 
			
		||||
        tsv.ColumnTypes[i] = typeof(Iso8601Type);
 | 
			
		||||
      }
 | 
			
		||||
      // TODO: ISO8601 time spans
 | 
			
		||||
      // TODO: ISO8601 time durations
 | 
			
		||||
      else if (typeParts[typeParts.Length - 1] == "units" && (tsv.ColumnTypes[i] == typeof(Float64Type) || tsv.ColumnTypes[i] == typeof(Float32Type)))
 | 
			
		||||
      {
 | 
			
		||||
        if (typeParts.Count() > 1 && ValidUnits.Contains(typeParts[typeParts.Length - 2]))
 | 
			
		||||
        {
 | 
			
		||||
          // TODO: How to store type information since the ColumnTypes is of type Type?
 | 
			
		||||
        }
 | 
			
		||||
        else
 | 
			
		||||
        {
 | 
			
		||||
          throw new Exception($"Invalid units type '{typeParts[typeParts.Length - 2]}' for column {i}");
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    CultureInfo provider = CultureInfo.InvariantCulture;
 | 
			
		||||
 | 
			
		||||
    for (int i = 0; i < tsv.Records.Count; i++)
 | 
			
		||||
    {
 | 
			
		||||
      if (tsv.Records[i].Comment != null)
 | 
			
		||||
      {
 | 
			
		||||
        throw new Exception($"Line {tsv.Records[i].Line} has comment above it which is not allowed");
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      for (int j = 0; j < tsv.ColumnNames.Count(); j++)
 | 
			
		||||
      {
 | 
			
		||||
        if (tsv.ColumnTypes[j] == typeof(Iso8601Type))
 | 
			
		||||
        {
 | 
			
		||||
          if (!DateTime.TryParseExact((string)tsv.Records[i][j], "yyyy-MM-ddTHH:mm:ss.ffff", provider, DateTimeStyles.None, out DateTime parsed))
 | 
			
		||||
          {
 | 
			
		||||
            throw new Exception($"ISO 8601 timestamp format error on line {tsv.Records[i].Line}, field {j}");
 | 
			
		||||
          }
 | 
			
		||||
 | 
			
		||||
          tsv.Records[i].Fields[j] = parsed;
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return (ExtraTsv)tsv;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										14
									
								
								ExtraTsv/ExtraTsv.csproj
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								ExtraTsv/ExtraTsv.csproj
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,14 @@
 | 
			
		||||
<Project Sdk="Microsoft.NET.Sdk">
 | 
			
		||||
 | 
			
		||||
  <PropertyGroup>
 | 
			
		||||
    <TargetFramework>net6.0</TargetFramework>
 | 
			
		||||
    <ImplicitUsings>enable</ImplicitUsings>
 | 
			
		||||
    <Nullable>enable</Nullable>
 | 
			
		||||
    <RootNamespace>NathanMcRae</RootNamespace>
 | 
			
		||||
  </PropertyGroup>
 | 
			
		||||
 | 
			
		||||
  <ItemGroup>
 | 
			
		||||
    <ProjectReference Include="..\SaneTsv.csproj" />
 | 
			
		||||
  </ItemGroup>
 | 
			
		||||
 | 
			
		||||
</Project>
 | 
			
		||||
							
								
								
									
										44
									
								
								ExtraTsv/readme.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								ExtraTsv/readme.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,44 @@
 | 
			
		||||
Extra TSV adds many convenience types to Sane TSV:
 | 
			
		||||
 | 
			
		||||
- Timestamps
 | 
			
		||||
  Just this format for now: yyyy-MM-ddTHH:mm:ss.ffff
 | 
			
		||||
- Timespans
 | 
			
		||||
- Time durations
 | 
			
		||||
- Multiformats
 | 
			
		||||
  - Multihashes
 | 
			
		||||
  - Multiprotocols
 | 
			
		||||
  - ...
 | 
			
		||||
- Physical units
 | 
			
		||||
  To start with, just use SI base and derived units
 | 
			
		||||
  - Base units
 | 
			
		||||
    - m
 | 
			
		||||
    - s
 | 
			
		||||
    - A
 | 
			
		||||
    - K
 | 
			
		||||
    - cd
 | 
			
		||||
    - mol
 | 
			
		||||
    - kg
 | 
			
		||||
  - Derived units
 | 
			
		||||
    - Hz
 | 
			
		||||
    - rad
 | 
			
		||||
    - sr
 | 
			
		||||
    - N
 | 
			
		||||
    - Pa
 | 
			
		||||
    - J
 | 
			
		||||
    - W
 | 
			
		||||
    - C
 | 
			
		||||
    - V
 | 
			
		||||
    - F
 | 
			
		||||
    - Ω
 | 
			
		||||
    - S
 | 
			
		||||
    - Wb
 | 
			
		||||
    - T
 | 
			
		||||
    - H
 | 
			
		||||
    - °C
 | 
			
		||||
    - lm
 | 
			
		||||
    - lx
 | 
			
		||||
    - Bq
 | 
			
		||||
    - Gy
 | 
			
		||||
    - Sv
 | 
			
		||||
    - kat
 | 
			
		||||
  How to handle derived units?
 | 
			
		||||
							
								
								
									
										14
									
								
								ExtraTsvTest/ExtraTsvTest.csproj
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								ExtraTsvTest/ExtraTsvTest.csproj
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,14 @@
 | 
			
		||||
<Project Sdk="Microsoft.NET.Sdk">
 | 
			
		||||
 | 
			
		||||
  <PropertyGroup>
 | 
			
		||||
    <OutputType>Exe</OutputType>
 | 
			
		||||
    <TargetFramework>net6.0</TargetFramework>
 | 
			
		||||
    <ImplicitUsings>enable</ImplicitUsings>
 | 
			
		||||
    <Nullable>enable</Nullable>
 | 
			
		||||
  </PropertyGroup>
 | 
			
		||||
 | 
			
		||||
  <ItemGroup>
 | 
			
		||||
    <ProjectReference Include="..\ExtraTsv\ExtraTsv.csproj" />
 | 
			
		||||
  </ItemGroup>
 | 
			
		||||
 | 
			
		||||
</Project>
 | 
			
		||||
							
								
								
									
										20
									
								
								ExtraTsvTest/Program.cs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								ExtraTsvTest/Program.cs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,20 @@
 | 
			
		||||
using NathanMcRae;
 | 
			
		||||
using System.Text;
 | 
			
		||||
 | 
			
		||||
{
 | 
			
		||||
  string testName = "Bool test";
 | 
			
		||||
  string testString1 = "# ExtraTSV V0.0.1\n" +
 | 
			
		||||
    "column1:ty\\#pe:boolean\tcolumn2:binary\tcolumnthree\\nyep:iso8601:string" +
 | 
			
		||||
    "\nTRUE\tvalue\\\\t\0woo\t2024-02-15T18:03:30.0000" +
 | 
			
		||||
    "\nFALSE\tnother\t2024-02-15T18:03:39.0001";
 | 
			
		||||
 | 
			
		||||
  ExtraTsv parsed = ExtraTsv.ParseExtraTsv(Encoding.UTF8.GetBytes(testString1));
 | 
			
		||||
  if (parsed.Records[0]["column1:ty#pe"] is bool result && result)
 | 
			
		||||
  {
 | 
			
		||||
    Console.WriteLine($"Passed {testName}");
 | 
			
		||||
  }
 | 
			
		||||
  else
 | 
			
		||||
  {
 | 
			
		||||
    Console.WriteLine($"Failed {testName}");
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@@ -10,8 +10,14 @@
 | 
			
		||||
  </PropertyGroup>
 | 
			
		||||
 | 
			
		||||
  <ItemGroup>
 | 
			
		||||
    <Compile Remove="ExtraTsvTest\**" />
 | 
			
		||||
    <Compile Remove="ExtraTsv\**" />
 | 
			
		||||
    <Compile Remove="SaneTsvTest\**" />
 | 
			
		||||
    <EmbeddedResource Remove="ExtraTsvTest\**" />
 | 
			
		||||
    <EmbeddedResource Remove="ExtraTsv\**" />
 | 
			
		||||
    <EmbeddedResource Remove="SaneTsvTest\**" />
 | 
			
		||||
    <None Remove="ExtraTsvTest\**" />
 | 
			
		||||
    <None Remove="ExtraTsv\**" />
 | 
			
		||||
    <None Remove="SaneTsvTest\**" />
 | 
			
		||||
  </ItemGroup>
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										14
									
								
								SaneTsv.sln
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								SaneTsv.sln
									
									
									
									
									
								
							@@ -5,7 +5,11 @@ VisualStudioVersion = 17.7.34024.191
 | 
			
		||||
MinimumVisualStudioVersion = 10.0.40219.1
 | 
			
		||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SaneTsv", "SaneTsv.csproj", "{DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}"
 | 
			
		||||
EndProject
 | 
			
		||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SaneTsvTest", "SaneTsvTest\SaneTsvTest.csproj", "{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}"
 | 
			
		||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SaneTsvTest", "SaneTsvTest\SaneTsvTest.csproj", "{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}"
 | 
			
		||||
EndProject
 | 
			
		||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ExtraTsv", "ExtraTsv\ExtraTsv.csproj", "{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}"
 | 
			
		||||
EndProject
 | 
			
		||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ExtraTsvTest", "ExtraTsvTest\ExtraTsvTest.csproj", "{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}"
 | 
			
		||||
EndProject
 | 
			
		||||
Global
 | 
			
		||||
	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 | 
			
		||||
@@ -21,6 +25,14 @@ Global
 | 
			
		||||
		{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Debug|Any CPU.Build.0 = Debug|Any CPU
 | 
			
		||||
		{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.ActiveCfg = Release|Any CPU
 | 
			
		||||
		{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.Build.0 = Release|Any CPU
 | 
			
		||||
		{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 | 
			
		||||
		{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Debug|Any CPU.Build.0 = Debug|Any CPU
 | 
			
		||||
		{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Release|Any CPU.ActiveCfg = Release|Any CPU
 | 
			
		||||
		{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Release|Any CPU.Build.0 = Release|Any CPU
 | 
			
		||||
		{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
 | 
			
		||||
		{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Debug|Any CPU.Build.0 = Debug|Any CPU
 | 
			
		||||
		{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Release|Any CPU.ActiveCfg = Release|Any CPU
 | 
			
		||||
		{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Release|Any CPU.Build.0 = Release|Any CPU
 | 
			
		||||
	EndGlobalSection
 | 
			
		||||
	GlobalSection(SolutionProperties) = preSolution
 | 
			
		||||
		HideSolutionNode = FALSE
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user