Move .NET implementation to SaneTsv
This commit is contained in:
commit
695f98b67c
243
.editorconfig
Normal file
243
.editorconfig
Normal file
@ -0,0 +1,243 @@
|
||||
# Remove the line below if you want to inherit .editorconfig settings from higher directories
|
||||
root = true
|
||||
|
||||
# C# files
|
||||
[*.cs]
|
||||
|
||||
#### Core EditorConfig Options ####
|
||||
|
||||
# Indentation and spacing
|
||||
indent_size = 2
|
||||
indent_style = space
|
||||
tab_width = 2
|
||||
|
||||
# New line preferences
|
||||
end_of_line = crlf
|
||||
insert_final_newline = false
|
||||
|
||||
#### .NET Coding Conventions ####
|
||||
|
||||
# Organize usings
|
||||
dotnet_separate_import_directive_groups = false
|
||||
dotnet_sort_system_directives_first = false
|
||||
file_header_template = unset
|
||||
|
||||
# this. and Me. preferences
|
||||
dotnet_style_qualification_for_event = false
|
||||
dotnet_style_qualification_for_field = false
|
||||
dotnet_style_qualification_for_method = false
|
||||
dotnet_style_qualification_for_property = false
|
||||
|
||||
# Language keywords vs BCL types preferences
|
||||
dotnet_style_predefined_type_for_locals_parameters_members = true
|
||||
dotnet_style_predefined_type_for_member_access = true
|
||||
|
||||
# Parentheses preferences
|
||||
dotnet_style_parentheses_in_arithmetic_binary_operators = always_for_clarity
|
||||
dotnet_style_parentheses_in_other_binary_operators = always_for_clarity
|
||||
dotnet_style_parentheses_in_other_operators = never_if_unnecessary
|
||||
dotnet_style_parentheses_in_relational_binary_operators = always_for_clarity
|
||||
|
||||
# Modifier preferences
|
||||
dotnet_style_require_accessibility_modifiers = for_non_interface_members
|
||||
|
||||
# Expression-level preferences
|
||||
dotnet_style_coalesce_expression = true
|
||||
dotnet_style_collection_initializer = true
|
||||
dotnet_style_explicit_tuple_names = true
|
||||
dotnet_style_namespace_match_folder = true
|
||||
dotnet_style_null_propagation = true
|
||||
dotnet_style_object_initializer = true
|
||||
dotnet_style_operator_placement_when_wrapping = beginning_of_line
|
||||
dotnet_style_prefer_auto_properties = true
|
||||
dotnet_style_prefer_compound_assignment = true
|
||||
dotnet_style_prefer_conditional_expression_over_assignment = true
|
||||
dotnet_style_prefer_conditional_expression_over_return = true
|
||||
dotnet_style_prefer_foreach_explicit_cast_in_source = when_strongly_typed
|
||||
dotnet_style_prefer_inferred_anonymous_type_member_names = true
|
||||
dotnet_style_prefer_inferred_tuple_names = true
|
||||
dotnet_style_prefer_is_null_check_over_reference_equality_method = true
|
||||
dotnet_style_prefer_simplified_boolean_expressions = true
|
||||
dotnet_style_prefer_simplified_interpolation = true
|
||||
|
||||
# Field preferences
|
||||
dotnet_style_readonly_field = true
|
||||
|
||||
# Parameter preferences
|
||||
dotnet_code_quality_unused_parameters = all
|
||||
|
||||
# Suppression preferences
|
||||
dotnet_remove_unnecessary_suppression_exclusions = none
|
||||
|
||||
# New line preferences
|
||||
dotnet_style_allow_multiple_blank_lines_experimental = true:warning
|
||||
dotnet_style_allow_statement_immediately_after_block_experimental = true
|
||||
|
||||
#### C# Coding Conventions ####
|
||||
|
||||
# var preferences
|
||||
csharp_style_var_elsewhere = false
|
||||
csharp_style_var_for_built_in_types = false
|
||||
csharp_style_var_when_type_is_apparent = false
|
||||
|
||||
# Expression-bodied members
|
||||
csharp_style_expression_bodied_accessors = true
|
||||
csharp_style_expression_bodied_constructors = false
|
||||
csharp_style_expression_bodied_indexers = true
|
||||
csharp_style_expression_bodied_lambdas = true
|
||||
csharp_style_expression_bodied_local_functions = false
|
||||
csharp_style_expression_bodied_methods = false
|
||||
csharp_style_expression_bodied_operators = false
|
||||
csharp_style_expression_bodied_properties = true
|
||||
|
||||
# Pattern matching preferences
|
||||
csharp_style_pattern_matching_over_as_with_null_check = true
|
||||
csharp_style_pattern_matching_over_is_with_cast_check = true
|
||||
csharp_style_prefer_extended_property_pattern = true
|
||||
csharp_style_prefer_not_pattern = true
|
||||
csharp_style_prefer_pattern_matching = true
|
||||
csharp_style_prefer_switch_expression = true
|
||||
|
||||
# Null-checking preferences
|
||||
csharp_style_conditional_delegate_call = true
|
||||
|
||||
# Modifier preferences
|
||||
csharp_prefer_static_local_function = true
|
||||
csharp_preferred_modifier_order = public,private,protected,internal,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,volatile,async
|
||||
csharp_style_prefer_readonly_struct = true
|
||||
csharp_style_prefer_readonly_struct_member = true
|
||||
|
||||
# Code-block preferences
|
||||
csharp_prefer_braces = true
|
||||
csharp_prefer_simple_using_statement = true
|
||||
csharp_style_namespace_declarations = block_scoped
|
||||
csharp_style_prefer_method_group_conversion = true
|
||||
csharp_style_prefer_primary_constructors = true
|
||||
csharp_style_prefer_top_level_statements = true
|
||||
|
||||
# Expression-level preferences
|
||||
csharp_prefer_simple_default_expression = true
|
||||
csharp_style_deconstructed_variable_declaration = true
|
||||
csharp_style_implicit_object_creation_when_type_is_apparent = true
|
||||
csharp_style_inlined_variable_declaration = true
|
||||
csharp_style_prefer_index_operator = true
|
||||
csharp_style_prefer_local_over_anonymous_function = true
|
||||
csharp_style_prefer_null_check_over_type_check = true
|
||||
csharp_style_prefer_range_operator = true
|
||||
csharp_style_prefer_tuple_swap = true
|
||||
csharp_style_prefer_utf8_string_literals = true
|
||||
csharp_style_throw_expression = true
|
||||
csharp_style_unused_value_assignment_preference = discard_variable:warning
|
||||
csharp_style_unused_value_expression_statement_preference = discard_variable
|
||||
|
||||
# 'using' directive preferences
|
||||
csharp_using_directive_placement = outside_namespace
|
||||
|
||||
# New line preferences
|
||||
csharp_style_allow_blank_line_after_colon_in_constructor_initializer_experimental = true
|
||||
csharp_style_allow_blank_line_after_token_in_arrow_expression_clause_experimental = true
|
||||
csharp_style_allow_blank_line_after_token_in_conditional_expression_experimental = true
|
||||
csharp_style_allow_blank_lines_between_consecutive_braces_experimental = true:warning
|
||||
csharp_style_allow_embedded_statements_on_same_line_experimental = true
|
||||
|
||||
#### C# Formatting Rules ####
|
||||
|
||||
# New line preferences
|
||||
csharp_new_line_before_catch = true
|
||||
csharp_new_line_before_else = true
|
||||
csharp_new_line_before_finally = true
|
||||
csharp_new_line_before_members_in_anonymous_types = true
|
||||
csharp_new_line_before_members_in_object_initializers = true
|
||||
csharp_new_line_before_open_brace = all
|
||||
csharp_new_line_between_query_expression_clauses = true
|
||||
|
||||
# Indentation preferences
|
||||
csharp_indent_block_contents = true
|
||||
csharp_indent_braces = false
|
||||
csharp_indent_case_contents = true
|
||||
csharp_indent_case_contents_when_block = true
|
||||
csharp_indent_labels = one_less_than_current
|
||||
csharp_indent_switch_labels = true
|
||||
|
||||
# Space preferences
|
||||
csharp_space_after_cast = false
|
||||
csharp_space_after_colon_in_inheritance_clause = true
|
||||
csharp_space_after_comma = true
|
||||
csharp_space_after_dot = false
|
||||
csharp_space_after_keywords_in_control_flow_statements = true
|
||||
csharp_space_after_semicolon_in_for_statement = true
|
||||
csharp_space_around_binary_operators = before_and_after
|
||||
csharp_space_around_declaration_statements = false
|
||||
csharp_space_before_colon_in_inheritance_clause = true
|
||||
csharp_space_before_comma = false
|
||||
csharp_space_before_dot = false
|
||||
csharp_space_before_open_square_brackets = false
|
||||
csharp_space_before_semicolon_in_for_statement = false
|
||||
csharp_space_between_empty_square_brackets = false
|
||||
csharp_space_between_method_call_empty_parameter_list_parentheses = false
|
||||
csharp_space_between_method_call_name_and_opening_parenthesis = false
|
||||
csharp_space_between_method_call_parameter_list_parentheses = false
|
||||
csharp_space_between_method_declaration_empty_parameter_list_parentheses = false
|
||||
csharp_space_between_method_declaration_name_and_open_parenthesis = false
|
||||
csharp_space_between_method_declaration_parameter_list_parentheses = false
|
||||
csharp_space_between_parentheses = false
|
||||
csharp_space_between_square_brackets = false
|
||||
|
||||
# Wrapping preferences
|
||||
csharp_preserve_single_line_blocks = true
|
||||
csharp_preserve_single_line_statements = true
|
||||
|
||||
#### Naming styles ####
|
||||
|
||||
# Naming rules
|
||||
|
||||
dotnet_naming_rule.interface_should_be_begins_with_i.severity = suggestion
|
||||
dotnet_naming_rule.interface_should_be_begins_with_i.symbols = interface
|
||||
dotnet_naming_rule.interface_should_be_begins_with_i.style = begins_with_i
|
||||
|
||||
dotnet_naming_rule.types_should_be_pascal_case.severity = suggestion
|
||||
dotnet_naming_rule.types_should_be_pascal_case.symbols = types
|
||||
dotnet_naming_rule.types_should_be_pascal_case.style = pascal_case
|
||||
|
||||
dotnet_naming_rule.non_field_members_should_be_pascal_case.severity = suggestion
|
||||
dotnet_naming_rule.non_field_members_should_be_pascal_case.symbols = non_field_members
|
||||
dotnet_naming_rule.non_field_members_should_be_pascal_case.style = pascal_case
|
||||
|
||||
dotnet_naming_rule.private_or_internal_field_should_be_begins_with_underscore.severity = suggestion
|
||||
dotnet_naming_rule.private_or_internal_field_should_be_begins_with_underscore.symbols = private_or_internal_field
|
||||
dotnet_naming_rule.private_or_internal_field_should_be_begins_with_underscore.style = begins_with_underscore
|
||||
|
||||
# Symbol specifications
|
||||
|
||||
dotnet_naming_symbols.interface.applicable_kinds = interface
|
||||
dotnet_naming_symbols.interface.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
|
||||
dotnet_naming_symbols.interface.required_modifiers =
|
||||
|
||||
dotnet_naming_symbols.private_or_internal_field.applicable_kinds = field
|
||||
dotnet_naming_symbols.private_or_internal_field.applicable_accessibilities = internal, private, private_protected
|
||||
dotnet_naming_symbols.private_or_internal_field.required_modifiers =
|
||||
|
||||
dotnet_naming_symbols.types.applicable_kinds = class, struct, interface, enum
|
||||
dotnet_naming_symbols.types.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
|
||||
dotnet_naming_symbols.types.required_modifiers =
|
||||
|
||||
dotnet_naming_symbols.non_field_members.applicable_kinds = property, event, method
|
||||
dotnet_naming_symbols.non_field_members.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
|
||||
dotnet_naming_symbols.non_field_members.required_modifiers =
|
||||
|
||||
# Naming styles
|
||||
|
||||
dotnet_naming_style.pascal_case.required_prefix =
|
||||
dotnet_naming_style.pascal_case.required_suffix =
|
||||
dotnet_naming_style.pascal_case.word_separator =
|
||||
dotnet_naming_style.pascal_case.capitalization = pascal_case
|
||||
|
||||
dotnet_naming_style.begins_with_i.required_prefix = I
|
||||
dotnet_naming_style.begins_with_i.required_suffix =
|
||||
dotnet_naming_style.begins_with_i.word_separator =
|
||||
dotnet_naming_style.begins_with_i.capitalization = pascal_case
|
||||
|
||||
dotnet_naming_style.begins_with_underscore.required_prefix = _
|
||||
dotnet_naming_style.begins_with_underscore.required_suffix =
|
||||
dotnet_naming_style.begins_with_underscore.word_separator =
|
||||
dotnet_naming_style.begins_with_underscore.capitalization = pascal_case
|
174
SaneTsv.cs
Normal file
174
SaneTsv.cs
Normal file
@ -0,0 +1,174 @@
|
||||
using System.Text;
|
||||
|
||||
namespace NathanMcRae;
|
||||
|
||||
/// <summary>
|
||||
/// Sane Tab-Separated Values
|
||||
/// </summary>
|
||||
public class SaneTsv
|
||||
{
|
||||
// TODO: We need to be able to update all these in tandem somehow
|
||||
public string[] ColumnNames { get; protected set; }
|
||||
public Dictionary<string, List<string>> Columns { get; protected set; }
|
||||
public List<SaneTsvRecord> Records { get; protected set; }
|
||||
|
||||
public static SaneTsv Parse(byte[] inputBuffer)
|
||||
{
|
||||
var parsed = new SaneTsv();
|
||||
parsed.Columns = new Dictionary<string, List<string>>();
|
||||
parsed.ColumnNames = new string[] { };
|
||||
parsed.Records = new List<SaneTsvRecord>();
|
||||
|
||||
var fieldBytes = new List<byte>();
|
||||
var fields = new List<string>();
|
||||
int numFields = -1;
|
||||
for (int i = 0; i < inputBuffer.Count(); i++)
|
||||
{
|
||||
if (inputBuffer[i] == '\\')
|
||||
{
|
||||
if (i + 1 == inputBuffer.Count())
|
||||
{
|
||||
throw new Exception($"Found '\\' at end of input");
|
||||
}
|
||||
if (inputBuffer[i + 1] == 'n')
|
||||
{
|
||||
fieldBytes.Add((byte)'\n');
|
||||
i++;
|
||||
}
|
||||
else if (inputBuffer[i + 1] == '\\')
|
||||
{
|
||||
fieldBytes.Add((byte)'\\');
|
||||
i++;
|
||||
}
|
||||
else if (inputBuffer[i + 1] == 't')
|
||||
{
|
||||
fieldBytes.Add((byte)'\t');
|
||||
i++;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception($"Expected 'n', 't', or '\\' after '\\' at {i}");
|
||||
}
|
||||
}
|
||||
else if (inputBuffer[i] == '\t')
|
||||
{
|
||||
// end of field
|
||||
try
|
||||
{
|
||||
fields.Add(Encoding.UTF8.GetString(fieldBytes.ToArray()));
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new Exception($"Field {fields.Count} on line {parsed.Records.Count + 1} is not valid UTF-8", e);
|
||||
}
|
||||
fieldBytes.Clear();
|
||||
}
|
||||
else if (inputBuffer[i] == '\n')
|
||||
{
|
||||
try
|
||||
{
|
||||
fields.Add(Encoding.UTF8.GetString(fieldBytes.ToArray()));
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new Exception($"Field {fields.Count} on line {parsed.Records.Count + 1} is not valid UTF-8", e);
|
||||
}
|
||||
fieldBytes.Clear();
|
||||
|
||||
if (numFields < 0)
|
||||
{
|
||||
// This is the header
|
||||
|
||||
numFields = fields.Count;
|
||||
|
||||
parsed.ColumnNames = new string[numFields];
|
||||
|
||||
for (int j = 0; j < fields.Count; j++)
|
||||
{
|
||||
string columnName = fields[j];
|
||||
|
||||
try
|
||||
{
|
||||
parsed.Columns.Add(columnName, new List<string>());
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new Exception($"Column name {columnName} is not unique", e);
|
||||
}
|
||||
|
||||
parsed.ColumnNames[j] = columnName;
|
||||
}
|
||||
|
||||
fields.Clear();
|
||||
}
|
||||
else if (numFields != fields.Count)
|
||||
{
|
||||
throw new Exception($"Expected {numFields} fields on line {parsed.Records.Count + 2}, but found {fields.Count}");
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int j = 0; j < fields.Count; j++)
|
||||
{
|
||||
parsed.Columns[parsed.ColumnNames[j]].Add(fields[j]);
|
||||
}
|
||||
|
||||
parsed.Records.Add(new SaneTsvRecord(parsed, fields.ToArray()));
|
||||
fields.Clear();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fieldBytes.Add(inputBuffer[i]);
|
||||
}
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
fields.Add(Encoding.UTF8.GetString(fieldBytes.ToArray()));
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new Exception($"Field {fields.Count} on line {parsed.Records.Count + 1} is not valid UTF-8", e);
|
||||
}
|
||||
|
||||
if (numFields != fields.Count)
|
||||
{
|
||||
throw new Exception($"Expected {numFields} fields on line {parsed.Records.Count + 2}, but found {fields.Count}");
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int j = 0; j < fields.Count; j++)
|
||||
{
|
||||
try
|
||||
{
|
||||
parsed.Columns[parsed.ColumnNames[j]].Add(fields[j]);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
throw new Exception($"Field {j} on line {parsed.Records.Count + 1} is not valid UTF-8", e);
|
||||
}
|
||||
}
|
||||
|
||||
parsed.Records.Add(new SaneTsvRecord(parsed, fields.ToArray()));
|
||||
fields.Clear();
|
||||
}
|
||||
|
||||
return parsed;
|
||||
}
|
||||
|
||||
public SaneTsvRecord this[int i] => Records[i];
|
||||
|
||||
public class SaneTsvRecord
|
||||
{
|
||||
public SaneTsv Parent { get; }
|
||||
public string[] Fields { get; }
|
||||
|
||||
public string this[string columnName] => Fields[Array.IndexOf(Parent.ColumnNames, columnName)];
|
||||
|
||||
public SaneTsvRecord(SaneTsv parent, string[] fields)
|
||||
{
|
||||
Parent = parent;
|
||||
Fields = fields;
|
||||
}
|
||||
}
|
||||
}
|
17
SaneTsv.csproj
Normal file
17
SaneTsv.csproj
Normal file
@ -0,0 +1,17 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>disable</Nullable>
|
||||
<GenerateAssemblyInfo>false</GenerateAssemblyInfo>
|
||||
<GenerateTargetFrameworkAttribute>false</GenerateTargetFrameworkAttribute>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<Compile Remove="SaneTsvTest\**" />
|
||||
<EmbeddedResource Remove="SaneTsvTest\**" />
|
||||
<None Remove="SaneTsvTest\**" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
31
SaneTsv.sln
Normal file
31
SaneTsv.sln
Normal file
@ -0,0 +1,31 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 17
|
||||
VisualStudioVersion = 17.7.34024.191
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SaneTsv", "SaneTsv.csproj", "{DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SaneTsvTest", "SaneTsvTest\SaneTsvTest.csproj", "{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
Release|Any CPU = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {FF70BD8A-F4D7-4A49-9DBC-4009465D5706}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
8
SaneTsvTest/Program.cs
Normal file
8
SaneTsvTest/Program.cs
Normal file
@ -0,0 +1,8 @@
|
||||
using NathanMcRae;
|
||||
using System.Text;
|
||||
|
||||
string testString1 = "column1\tcolumn2\tcolumnthree\\nyep\nvalue1\tvalue\\\\twoo\tvaluetrhee\nthis\\nis\\na\\nvalue\tnother\tno\\ther";
|
||||
|
||||
SaneTsv parsed = SaneTsv.Parse(Encoding.UTF8.GetBytes(testString1));
|
||||
// See https://aka.ms/new-console-template for more information
|
||||
Console.WriteLine("Hello, World!");
|
14
SaneTsvTest/SaneTsvTest.csproj
Normal file
14
SaneTsvTest/SaneTsvTest.csproj
Normal file
@ -0,0 +1,14 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\SaneTsv.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
Loading…
Reference in New Issue
Block a user