Add ExtraTSV
This commit is contained in:
		
							
								
								
									
										125
									
								
								SaneTsv/ExtraTsv/ExtraTsv.cs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								SaneTsv/ExtraTsv/ExtraTsv.cs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,125 @@ | ||||
|  | ||||
| using System.Globalization; | ||||
| using System.Text.RegularExpressions; | ||||
|  | ||||
| namespace NathanMcRae; | ||||
|  | ||||
| public class ExtraTsv : SaneTsv | ||||
| { | ||||
|   public class Iso8601Type : ColumnType { } | ||||
|   public class PhysicalUnitsType : ColumnType  | ||||
|   { | ||||
|     public string Units { get; } | ||||
|     public PhysicalUnitsType(string Units) { } | ||||
|   } | ||||
|  | ||||
|   public static readonly string[] ValidUnits = | ||||
|   { | ||||
|     "m", | ||||
|     "s", | ||||
|     "A", | ||||
|     "K", | ||||
|     "cd", | ||||
|     "mol", | ||||
|     "kg", | ||||
|     "Hz", | ||||
|     "rad", | ||||
|     "sr", | ||||
|     "N", | ||||
|     "Pa", | ||||
|     "J", | ||||
|     "W", | ||||
|     "C", | ||||
|     "V", | ||||
|     "F", | ||||
|     "Ω", | ||||
|     "S", | ||||
|     "Wb", | ||||
|     "T", | ||||
|     "H", | ||||
|     "°C", | ||||
|     "lm", | ||||
|     "lx", | ||||
|     "Bq", | ||||
|     "Gy", | ||||
|     "Sv", | ||||
|     "kat" | ||||
|   }; | ||||
|  | ||||
|   public static readonly int MajorVersion = 0; | ||||
|   public static readonly int MinorVersion = 0; | ||||
|   public static readonly int PatchVersion = 1; | ||||
|  | ||||
|   public static Regex VersionRegex = new Regex(@"^ ExtraTSV V(\d+)\.(\d+)\.(\d+)"); | ||||
|  | ||||
|   public static ExtraTsv ParseExtraTsv(byte[] inputBuffer) | ||||
|   { | ||||
|     SaneTsv tsv = ParseCommentedTsv(inputBuffer); | ||||
|  | ||||
|     if (tsv.FileComment == null) { | ||||
|       throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}"); | ||||
|     } | ||||
|  | ||||
|     Match match = VersionRegex.Match(tsv.FileComment); | ||||
|     if (!match.Success) | ||||
|     { | ||||
|       throw new Exception($"ExtraTSV expects the file to start with '# ExtraTSV Vx.y.z' where x.y.z is a version compatible with {MajorVersion}.{MinorVersion}.{PatchVersion}"); | ||||
|     } | ||||
|  | ||||
|     int fileMajorVersion = int.Parse(match.Groups[1].Value); | ||||
|  | ||||
|     if (fileMajorVersion != MajorVersion) | ||||
|     { | ||||
|       throw new Exception($"File has major version ({fileMajorVersion}) which is newer than this parser's version {MajorVersion}"); | ||||
|     } | ||||
|  | ||||
|     for (int i = 0; i < tsv.ColumnNames.Count(); i++) | ||||
|     { | ||||
|       string[] typeParts = tsv.ColumnNames[i].Split(":"); | ||||
|       if (typeParts[typeParts.Length - 1] == "iso8601" && tsv.ColumnTypes[i] == typeof(StringType)) | ||||
|       { | ||||
|         string columnName = tsv.ColumnNames[i].Substring(0, tsv.ColumnNames[i].Length - ":iso8601".Length); | ||||
|         tsv.ColumnNames[i] = columnName; | ||||
|         tsv.ColumnTypes[i] = typeof(Iso8601Type); | ||||
|       } | ||||
|       // TODO: ISO8601 time spans | ||||
|       // TODO: ISO8601 time durations | ||||
|       else if (typeParts[typeParts.Length - 1] == "units" && (tsv.ColumnTypes[i] == typeof(Float64Type) || tsv.ColumnTypes[i] == typeof(Float32Type))) | ||||
|       { | ||||
|         if (typeParts.Count() > 1 && ValidUnits.Contains(typeParts[typeParts.Length - 2])) | ||||
|         { | ||||
|           // TODO: How to store type information since the ColumnTypes is of type Type? | ||||
|         } | ||||
|         else | ||||
|         { | ||||
|           throw new Exception($"Invalid units type '{typeParts[typeParts.Length - 2]}' for column {i}"); | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     CultureInfo provider = CultureInfo.InvariantCulture; | ||||
|  | ||||
|     for (int i = 0; i < tsv.Records.Count; i++) | ||||
|     { | ||||
|       if (tsv.Records[i].Comment != null) | ||||
|       { | ||||
|         throw new Exception($"Line {tsv.Records[i].Line} has comment above it which is not allowed"); | ||||
|       } | ||||
|  | ||||
|       for (int j = 0; j < tsv.ColumnNames.Count(); j++) | ||||
|       { | ||||
|         if (tsv.ColumnTypes[j] == typeof(Iso8601Type)) | ||||
|         { | ||||
|           if (!DateTime.TryParseExact((string)tsv.Records[i][j], "yyyy-MM-ddTHH:mm:ss.ffff", provider, DateTimeStyles.None, out DateTime parsed)) | ||||
|           { | ||||
|             throw new Exception($"ISO 8601 timestamp format error on line {tsv.Records[i].Line}, field {j}"); | ||||
|           } | ||||
|  | ||||
|           tsv.Records[i].Fields[j] = parsed; | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     return (ExtraTsv)tsv; | ||||
|   } | ||||
| } | ||||
							
								
								
									
										14
									
								
								SaneTsv/ExtraTsv/ExtraTsv.csproj
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								SaneTsv/ExtraTsv/ExtraTsv.csproj
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,14 @@ | ||||
| <Project Sdk="Microsoft.NET.Sdk"> | ||||
|  | ||||
|   <PropertyGroup> | ||||
|     <TargetFramework>net6.0</TargetFramework> | ||||
|     <ImplicitUsings>enable</ImplicitUsings> | ||||
|     <Nullable>enable</Nullable> | ||||
|     <RootNamespace>NathanMcRae</RootNamespace> | ||||
|   </PropertyGroup> | ||||
|  | ||||
|   <ItemGroup> | ||||
|     <ProjectReference Include="..\SaneTsv.csproj" /> | ||||
|   </ItemGroup> | ||||
|  | ||||
| </Project> | ||||
							
								
								
									
										44
									
								
								SaneTsv/ExtraTsv/readme.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								SaneTsv/ExtraTsv/readme.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,44 @@ | ||||
| Extra TSV adds many convenience types to Sane TSV: | ||||
|  | ||||
| - Timestamps | ||||
|   Just this format for now: yyyy-MM-ddTHH:mm:ss.ffff | ||||
| - Timespans | ||||
| - Time durations | ||||
| - Multiformats | ||||
|   - Multihashes | ||||
|   - Multiprotocols | ||||
|   - ... | ||||
| - Physical units | ||||
|   To start with, just use SI base and derived units | ||||
|   - Base units | ||||
|     - m | ||||
|     - s | ||||
|     - A | ||||
|     - K | ||||
|     - cd | ||||
|     - mol | ||||
|     - kg | ||||
|   - Derived units | ||||
|     - Hz | ||||
|     - rad | ||||
|     - sr | ||||
|     - N | ||||
|     - Pa | ||||
|     - J | ||||
|     - W | ||||
|     - C | ||||
|     - V | ||||
|     - F | ||||
|     - Ω | ||||
|     - S | ||||
|     - Wb | ||||
|     - T | ||||
|     - H | ||||
|     - °C | ||||
|     - lm | ||||
|     - lx | ||||
|     - Bq | ||||
|     - Gy | ||||
|     - Sv | ||||
|     - kat | ||||
|   How to handle derived units? | ||||
							
								
								
									
										14
									
								
								SaneTsv/ExtraTsvTest/ExtraTsvTest.csproj
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								SaneTsv/ExtraTsvTest/ExtraTsvTest.csproj
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,14 @@ | ||||
| <Project Sdk="Microsoft.NET.Sdk"> | ||||
|  | ||||
|   <PropertyGroup> | ||||
|     <OutputType>Exe</OutputType> | ||||
|     <TargetFramework>net6.0</TargetFramework> | ||||
|     <ImplicitUsings>enable</ImplicitUsings> | ||||
|     <Nullable>enable</Nullable> | ||||
|   </PropertyGroup> | ||||
|  | ||||
|   <ItemGroup> | ||||
|     <ProjectReference Include="..\ExtraTsv\ExtraTsv.csproj" /> | ||||
|   </ItemGroup> | ||||
|  | ||||
| </Project> | ||||
							
								
								
									
										20
									
								
								SaneTsv/ExtraTsvTest/Program.cs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								SaneTsv/ExtraTsvTest/Program.cs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,20 @@ | ||||
| using NathanMcRae; | ||||
| using System.Text; | ||||
|  | ||||
| { | ||||
|   string testName = "Bool test"; | ||||
|   string testString1 = "# ExtraTSV V0.0.1\n" + | ||||
|     "column1:ty\\#pe:boolean\tcolumn2:binary\tcolumnthree\\nyep:iso8601:string" + | ||||
|     "\nTRUE\tvalue\\\\t\0woo\t2024-02-15T18:03:30.0000" + | ||||
|     "\nFALSE\tnother\t2024-02-15T18:03:39.0001"; | ||||
|  | ||||
|   ExtraTsv parsed = ExtraTsv.ParseExtraTsv(Encoding.UTF8.GetBytes(testString1)); | ||||
|   if (parsed.Records[0]["column1:ty#pe"] is bool result && result) | ||||
|   { | ||||
|     Console.WriteLine($"Passed {testName}"); | ||||
|   } | ||||
|   else | ||||
|   { | ||||
|     Console.WriteLine($"Failed {testName}"); | ||||
|   } | ||||
| } | ||||
| @@ -10,8 +10,14 @@ | ||||
|   </PropertyGroup> | ||||
|  | ||||
|   <ItemGroup> | ||||
|     <Compile Remove="ExtraTsvTest\**" /> | ||||
|     <Compile Remove="ExtraTsv\**" /> | ||||
|     <Compile Remove="SaneTsvTest\**" /> | ||||
|     <EmbeddedResource Remove="ExtraTsvTest\**" /> | ||||
|     <EmbeddedResource Remove="ExtraTsv\**" /> | ||||
|     <EmbeddedResource Remove="SaneTsvTest\**" /> | ||||
|     <None Remove="ExtraTsvTest\**" /> | ||||
|     <None Remove="ExtraTsv\**" /> | ||||
|     <None Remove="SaneTsvTest\**" /> | ||||
|   </ItemGroup> | ||||
|  | ||||
|   | ||||
| @@ -5,7 +5,11 @@ VisualStudioVersion = 17.7.34024.191 | ||||
| MinimumVisualStudioVersion = 10.0.40219.1 | ||||
| Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SaneTsv", "SaneTsv.csproj", "{DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}" | ||||
| EndProject | ||||
| Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SaneTsvTest", "SaneTsvTest\SaneTsvTest.csproj", "{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}" | ||||
| Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SaneTsvTest", "SaneTsvTest\SaneTsvTest.csproj", "{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}" | ||||
| EndProject | ||||
| Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ExtraTsv", "ExtraTsv\ExtraTsv.csproj", "{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}" | ||||
| EndProject | ||||
| Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ExtraTsvTest", "ExtraTsvTest\ExtraTsvTest.csproj", "{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}" | ||||
| EndProject | ||||
| Global | ||||
| 	GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||||
| @@ -21,6 +25,14 @@ Global | ||||
| 		{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||||
| 		{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||||
| 		{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.Build.0 = Release|Any CPU | ||||
| 		{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||||
| 		{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||||
| 		{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||||
| 		{D9F2E9C8-4F52-4BB7-9BBD-AE9A0C6168E7}.Release|Any CPU.Build.0 = Release|Any CPU | ||||
| 		{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||||
| 		{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||||
| 		{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||||
| 		{A545B0DB-F799-43E2-9DFA-C18BDF3535F1}.Release|Any CPU.Build.0 = Release|Any CPU | ||||
| 	EndGlobalSection | ||||
| 	GlobalSection(SolutionProperties) = preSolution | ||||
| 		HideSolutionNode = FALSE | ||||
|   | ||||
		Reference in New Issue
	
	Block a user