Move .NET implementation to SaneTsv

This commit is contained in:
Nathan McRae 2024-02-13 19:15:07 -08:00
commit 695f98b67c
6 changed files with 487 additions and 0 deletions

243
.editorconfig Normal file
View File

@ -0,0 +1,243 @@
# Remove the line below if you want to inherit .editorconfig settings from higher directories
root = true
# C# files
[*.cs]
#### Core EditorConfig Options ####
# Indentation and spacing
indent_size = 2
indent_style = space
tab_width = 2
# New line preferences
end_of_line = crlf
insert_final_newline = false
#### .NET Coding Conventions ####
# Organize usings
dotnet_separate_import_directive_groups = false
dotnet_sort_system_directives_first = false
file_header_template = unset
# this. and Me. preferences
dotnet_style_qualification_for_event = false
dotnet_style_qualification_for_field = false
dotnet_style_qualification_for_method = false
dotnet_style_qualification_for_property = false
# Language keywords vs BCL types preferences
dotnet_style_predefined_type_for_locals_parameters_members = true
dotnet_style_predefined_type_for_member_access = true
# Parentheses preferences
dotnet_style_parentheses_in_arithmetic_binary_operators = always_for_clarity
dotnet_style_parentheses_in_other_binary_operators = always_for_clarity
dotnet_style_parentheses_in_other_operators = never_if_unnecessary
dotnet_style_parentheses_in_relational_binary_operators = always_for_clarity
# Modifier preferences
dotnet_style_require_accessibility_modifiers = for_non_interface_members
# Expression-level preferences
dotnet_style_coalesce_expression = true
dotnet_style_collection_initializer = true
dotnet_style_explicit_tuple_names = true
dotnet_style_namespace_match_folder = true
dotnet_style_null_propagation = true
dotnet_style_object_initializer = true
dotnet_style_operator_placement_when_wrapping = beginning_of_line
dotnet_style_prefer_auto_properties = true
dotnet_style_prefer_compound_assignment = true
dotnet_style_prefer_conditional_expression_over_assignment = true
dotnet_style_prefer_conditional_expression_over_return = true
dotnet_style_prefer_foreach_explicit_cast_in_source = when_strongly_typed
dotnet_style_prefer_inferred_anonymous_type_member_names = true
dotnet_style_prefer_inferred_tuple_names = true
dotnet_style_prefer_is_null_check_over_reference_equality_method = true
dotnet_style_prefer_simplified_boolean_expressions = true
dotnet_style_prefer_simplified_interpolation = true
# Field preferences
dotnet_style_readonly_field = true
# Parameter preferences
dotnet_code_quality_unused_parameters = all
# Suppression preferences
dotnet_remove_unnecessary_suppression_exclusions = none
# New line preferences
dotnet_style_allow_multiple_blank_lines_experimental = true:warning
dotnet_style_allow_statement_immediately_after_block_experimental = true
#### C# Coding Conventions ####
# var preferences
csharp_style_var_elsewhere = false
csharp_style_var_for_built_in_types = false
csharp_style_var_when_type_is_apparent = false
# Expression-bodied members
csharp_style_expression_bodied_accessors = true
csharp_style_expression_bodied_constructors = false
csharp_style_expression_bodied_indexers = true
csharp_style_expression_bodied_lambdas = true
csharp_style_expression_bodied_local_functions = false
csharp_style_expression_bodied_methods = false
csharp_style_expression_bodied_operators = false
csharp_style_expression_bodied_properties = true
# Pattern matching preferences
csharp_style_pattern_matching_over_as_with_null_check = true
csharp_style_pattern_matching_over_is_with_cast_check = true
csharp_style_prefer_extended_property_pattern = true
csharp_style_prefer_not_pattern = true
csharp_style_prefer_pattern_matching = true
csharp_style_prefer_switch_expression = true
# Null-checking preferences
csharp_style_conditional_delegate_call = true
# Modifier preferences
csharp_prefer_static_local_function = true
csharp_preferred_modifier_order = public,private,protected,internal,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,volatile,async
csharp_style_prefer_readonly_struct = true
csharp_style_prefer_readonly_struct_member = true
# Code-block preferences
csharp_prefer_braces = true
csharp_prefer_simple_using_statement = true
csharp_style_namespace_declarations = block_scoped
csharp_style_prefer_method_group_conversion = true
csharp_style_prefer_primary_constructors = true
csharp_style_prefer_top_level_statements = true
# Expression-level preferences
csharp_prefer_simple_default_expression = true
csharp_style_deconstructed_variable_declaration = true
csharp_style_implicit_object_creation_when_type_is_apparent = true
csharp_style_inlined_variable_declaration = true
csharp_style_prefer_index_operator = true
csharp_style_prefer_local_over_anonymous_function = true
csharp_style_prefer_null_check_over_type_check = true
csharp_style_prefer_range_operator = true
csharp_style_prefer_tuple_swap = true
csharp_style_prefer_utf8_string_literals = true
csharp_style_throw_expression = true
csharp_style_unused_value_assignment_preference = discard_variable:warning
csharp_style_unused_value_expression_statement_preference = discard_variable
# 'using' directive preferences
csharp_using_directive_placement = outside_namespace
# New line preferences
csharp_style_allow_blank_line_after_colon_in_constructor_initializer_experimental = true
csharp_style_allow_blank_line_after_token_in_arrow_expression_clause_experimental = true
csharp_style_allow_blank_line_after_token_in_conditional_expression_experimental = true
csharp_style_allow_blank_lines_between_consecutive_braces_experimental = true:warning
csharp_style_allow_embedded_statements_on_same_line_experimental = true
#### C# Formatting Rules ####
# New line preferences
csharp_new_line_before_catch = true
csharp_new_line_before_else = true
csharp_new_line_before_finally = true
csharp_new_line_before_members_in_anonymous_types = true
csharp_new_line_before_members_in_object_initializers = true
csharp_new_line_before_open_brace = all
csharp_new_line_between_query_expression_clauses = true
# Indentation preferences
csharp_indent_block_contents = true
csharp_indent_braces = false
csharp_indent_case_contents = true
csharp_indent_case_contents_when_block = true
csharp_indent_labels = one_less_than_current
csharp_indent_switch_labels = true
# Space preferences
csharp_space_after_cast = false
csharp_space_after_colon_in_inheritance_clause = true
csharp_space_after_comma = true
csharp_space_after_dot = false
csharp_space_after_keywords_in_control_flow_statements = true
csharp_space_after_semicolon_in_for_statement = true
csharp_space_around_binary_operators = before_and_after
csharp_space_around_declaration_statements = false
csharp_space_before_colon_in_inheritance_clause = true
csharp_space_before_comma = false
csharp_space_before_dot = false
csharp_space_before_open_square_brackets = false
csharp_space_before_semicolon_in_for_statement = false
csharp_space_between_empty_square_brackets = false
csharp_space_between_method_call_empty_parameter_list_parentheses = false
csharp_space_between_method_call_name_and_opening_parenthesis = false
csharp_space_between_method_call_parameter_list_parentheses = false
csharp_space_between_method_declaration_empty_parameter_list_parentheses = false
csharp_space_between_method_declaration_name_and_open_parenthesis = false
csharp_space_between_method_declaration_parameter_list_parentheses = false
csharp_space_between_parentheses = false
csharp_space_between_square_brackets = false
# Wrapping preferences
csharp_preserve_single_line_blocks = true
csharp_preserve_single_line_statements = true
#### Naming styles ####
# Naming rules
dotnet_naming_rule.interface_should_be_begins_with_i.severity = suggestion
dotnet_naming_rule.interface_should_be_begins_with_i.symbols = interface
dotnet_naming_rule.interface_should_be_begins_with_i.style = begins_with_i
dotnet_naming_rule.types_should_be_pascal_case.severity = suggestion
dotnet_naming_rule.types_should_be_pascal_case.symbols = types
dotnet_naming_rule.types_should_be_pascal_case.style = pascal_case
dotnet_naming_rule.non_field_members_should_be_pascal_case.severity = suggestion
dotnet_naming_rule.non_field_members_should_be_pascal_case.symbols = non_field_members
dotnet_naming_rule.non_field_members_should_be_pascal_case.style = pascal_case
dotnet_naming_rule.private_or_internal_field_should_be_begins_with_underscore.severity = suggestion
dotnet_naming_rule.private_or_internal_field_should_be_begins_with_underscore.symbols = private_or_internal_field
dotnet_naming_rule.private_or_internal_field_should_be_begins_with_underscore.style = begins_with_underscore
# Symbol specifications
dotnet_naming_symbols.interface.applicable_kinds = interface
dotnet_naming_symbols.interface.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
dotnet_naming_symbols.interface.required_modifiers =
dotnet_naming_symbols.private_or_internal_field.applicable_kinds = field
dotnet_naming_symbols.private_or_internal_field.applicable_accessibilities = internal, private, private_protected
dotnet_naming_symbols.private_or_internal_field.required_modifiers =
dotnet_naming_symbols.types.applicable_kinds = class, struct, interface, enum
dotnet_naming_symbols.types.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
dotnet_naming_symbols.types.required_modifiers =
dotnet_naming_symbols.non_field_members.applicable_kinds = property, event, method
dotnet_naming_symbols.non_field_members.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
dotnet_naming_symbols.non_field_members.required_modifiers =
# Naming styles
dotnet_naming_style.pascal_case.required_prefix =
dotnet_naming_style.pascal_case.required_suffix =
dotnet_naming_style.pascal_case.word_separator =
dotnet_naming_style.pascal_case.capitalization = pascal_case
dotnet_naming_style.begins_with_i.required_prefix = I
dotnet_naming_style.begins_with_i.required_suffix =
dotnet_naming_style.begins_with_i.word_separator =
dotnet_naming_style.begins_with_i.capitalization = pascal_case
dotnet_naming_style.begins_with_underscore.required_prefix = _
dotnet_naming_style.begins_with_underscore.required_suffix =
dotnet_naming_style.begins_with_underscore.word_separator =
dotnet_naming_style.begins_with_underscore.capitalization = pascal_case

174
SaneTsv.cs Normal file
View File

@ -0,0 +1,174 @@
using System.Text;
namespace NathanMcRae;
/// <summary>
/// Sane Tab-Separated Values
/// </summary>
public class SaneTsv
{
// TODO: We need to be able to update all these in tandem somehow
public string[] ColumnNames { get; protected set; }
public Dictionary<string, List<string>> Columns { get; protected set; }
public List<SaneTsvRecord> Records { get; protected set; }
public static SaneTsv Parse(byte[] inputBuffer)
{
var parsed = new SaneTsv();
parsed.Columns = new Dictionary<string, List<string>>();
parsed.ColumnNames = new string[] { };
parsed.Records = new List<SaneTsvRecord>();
var fieldBytes = new List<byte>();
var fields = new List<string>();
int numFields = -1;
for (int i = 0; i < inputBuffer.Count(); i++)
{
if (inputBuffer[i] == '\\')
{
if (i + 1 == inputBuffer.Count())
{
throw new Exception($"Found '\\' at end of input");
}
if (inputBuffer[i + 1] == 'n')
{
fieldBytes.Add((byte)'\n');
i++;
}
else if (inputBuffer[i + 1] == '\\')
{
fieldBytes.Add((byte)'\\');
i++;
}
else if (inputBuffer[i + 1] == 't')
{
fieldBytes.Add((byte)'\t');
i++;
}
else
{
throw new Exception($"Expected 'n', 't', or '\\' after '\\' at {i}");
}
}
else if (inputBuffer[i] == '\t')
{
// end of field
try
{
fields.Add(Encoding.UTF8.GetString(fieldBytes.ToArray()));
}
catch (Exception e)
{
throw new Exception($"Field {fields.Count} on line {parsed.Records.Count + 1} is not valid UTF-8", e);
}
fieldBytes.Clear();
}
else if (inputBuffer[i] == '\n')
{
try
{
fields.Add(Encoding.UTF8.GetString(fieldBytes.ToArray()));
}
catch (Exception e)
{
throw new Exception($"Field {fields.Count} on line {parsed.Records.Count + 1} is not valid UTF-8", e);
}
fieldBytes.Clear();
if (numFields < 0)
{
// This is the header
numFields = fields.Count;
parsed.ColumnNames = new string[numFields];
for (int j = 0; j < fields.Count; j++)
{
string columnName = fields[j];
try
{
parsed.Columns.Add(columnName, new List<string>());
}
catch (Exception e)
{
throw new Exception($"Column name {columnName} is not unique", e);
}
parsed.ColumnNames[j] = columnName;
}
fields.Clear();
}
else if (numFields != fields.Count)
{
throw new Exception($"Expected {numFields} fields on line {parsed.Records.Count + 2}, but found {fields.Count}");
}
else
{
for (int j = 0; j < fields.Count; j++)
{
parsed.Columns[parsed.ColumnNames[j]].Add(fields[j]);
}
parsed.Records.Add(new SaneTsvRecord(parsed, fields.ToArray()));
fields.Clear();
}
}
else
{
fieldBytes.Add(inputBuffer[i]);
}
}
try
{
fields.Add(Encoding.UTF8.GetString(fieldBytes.ToArray()));
}
catch (Exception e)
{
throw new Exception($"Field {fields.Count} on line {parsed.Records.Count + 1} is not valid UTF-8", e);
}
if (numFields != fields.Count)
{
throw new Exception($"Expected {numFields} fields on line {parsed.Records.Count + 2}, but found {fields.Count}");
}
else
{
for (int j = 0; j < fields.Count; j++)
{
try
{
parsed.Columns[parsed.ColumnNames[j]].Add(fields[j]);
}
catch (Exception e)
{
throw new Exception($"Field {j} on line {parsed.Records.Count + 1} is not valid UTF-8", e);
}
}
parsed.Records.Add(new SaneTsvRecord(parsed, fields.ToArray()));
fields.Clear();
}
return parsed;
}
public SaneTsvRecord this[int i] => Records[i];
public class SaneTsvRecord
{
public SaneTsv Parent { get; }
public string[] Fields { get; }
public string this[string columnName] => Fields[Array.IndexOf(Parent.ColumnNames, columnName)];
public SaneTsvRecord(SaneTsv parent, string[] fields)
{
Parent = parent;
Fields = fields;
}
}
}

17
SaneTsv.csproj Normal file
View File

@ -0,0 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>disable</Nullable>
<GenerateAssemblyInfo>false</GenerateAssemblyInfo>
<GenerateTargetFrameworkAttribute>false</GenerateTargetFrameworkAttribute>
</PropertyGroup>
<ItemGroup>
<Compile Remove="SaneTsvTest\**" />
<EmbeddedResource Remove="SaneTsvTest\**" />
<None Remove="SaneTsvTest\**" />
</ItemGroup>
</Project>

31
SaneTsv.sln Normal file
View File

@ -0,0 +1,31 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.7.34024.191
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SaneTsv", "SaneTsv.csproj", "{DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SaneTsvTest", "SaneTsvTest\SaneTsvTest.csproj", "{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}.Release|Any CPU.Build.0 = Release|Any CPU
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {FF70BD8A-F4D7-4A49-9DBC-4009465D5706}
EndGlobalSection
EndGlobal

8
SaneTsvTest/Program.cs Normal file
View File

@ -0,0 +1,8 @@
using NathanMcRae;
using System.Text;
string testString1 = "column1\tcolumn2\tcolumnthree\\nyep\nvalue1\tvalue\\\\twoo\tvaluetrhee\nthis\\nis\\na\\nvalue\tnother\tno\\ther";
SaneTsv parsed = SaneTsv.Parse(Encoding.UTF8.GetBytes(testString1));
// See https://aka.ms/new-console-template for more information
Console.WriteLine("Hello, World!");

View File

@ -0,0 +1,14 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\SaneTsv.csproj" />
</ItemGroup>
</Project>