commit 4dd3a4878ebae90cd1a5d1a8725388f859c80cf9 Author: Nathan McRae Date: Tue Feb 13 18:56:20 2024 -0800 Add initial files diff --git a/Stsv/.editorconfig b/Stsv/.editorconfig new file mode 100644 index 0000000..2e8263a --- /dev/null +++ b/Stsv/.editorconfig @@ -0,0 +1,243 @@ +# Remove the line below if you want to inherit .editorconfig settings from higher directories +root = true + +# C# files +[*.cs] + +#### Core EditorConfig Options #### + +# Indentation and spacing +indent_size = 2 +indent_style = space +tab_width = 2 + +# New line preferences +end_of_line = crlf +insert_final_newline = false + +#### .NET Coding Conventions #### + +# Organize usings +dotnet_separate_import_directive_groups = false +dotnet_sort_system_directives_first = false +file_header_template = unset + +# this. and Me. preferences +dotnet_style_qualification_for_event = false +dotnet_style_qualification_for_field = false +dotnet_style_qualification_for_method = false +dotnet_style_qualification_for_property = false + +# Language keywords vs BCL types preferences +dotnet_style_predefined_type_for_locals_parameters_members = true +dotnet_style_predefined_type_for_member_access = true + +# Parentheses preferences +dotnet_style_parentheses_in_arithmetic_binary_operators = always_for_clarity +dotnet_style_parentheses_in_other_binary_operators = always_for_clarity +dotnet_style_parentheses_in_other_operators = never_if_unnecessary +dotnet_style_parentheses_in_relational_binary_operators = always_for_clarity + +# Modifier preferences +dotnet_style_require_accessibility_modifiers = for_non_interface_members + +# Expression-level preferences +dotnet_style_coalesce_expression = true +dotnet_style_collection_initializer = true +dotnet_style_explicit_tuple_names = true +dotnet_style_namespace_match_folder = true +dotnet_style_null_propagation = true +dotnet_style_object_initializer = true +dotnet_style_operator_placement_when_wrapping = beginning_of_line +dotnet_style_prefer_auto_properties = true +dotnet_style_prefer_compound_assignment = true +dotnet_style_prefer_conditional_expression_over_assignment = true +dotnet_style_prefer_conditional_expression_over_return = true +dotnet_style_prefer_foreach_explicit_cast_in_source = when_strongly_typed +dotnet_style_prefer_inferred_anonymous_type_member_names = true +dotnet_style_prefer_inferred_tuple_names = true +dotnet_style_prefer_is_null_check_over_reference_equality_method = true +dotnet_style_prefer_simplified_boolean_expressions = true +dotnet_style_prefer_simplified_interpolation = true + +# Field preferences +dotnet_style_readonly_field = true + +# Parameter preferences +dotnet_code_quality_unused_parameters = all + +# Suppression preferences +dotnet_remove_unnecessary_suppression_exclusions = none + +# New line preferences +dotnet_style_allow_multiple_blank_lines_experimental = true:warning +dotnet_style_allow_statement_immediately_after_block_experimental = true + +#### C# Coding Conventions #### + +# var preferences +csharp_style_var_elsewhere = false +csharp_style_var_for_built_in_types = false +csharp_style_var_when_type_is_apparent = false + +# Expression-bodied members +csharp_style_expression_bodied_accessors = true +csharp_style_expression_bodied_constructors = false +csharp_style_expression_bodied_indexers = true +csharp_style_expression_bodied_lambdas = true +csharp_style_expression_bodied_local_functions = false +csharp_style_expression_bodied_methods = false +csharp_style_expression_bodied_operators = false +csharp_style_expression_bodied_properties = true + +# Pattern matching preferences +csharp_style_pattern_matching_over_as_with_null_check = true +csharp_style_pattern_matching_over_is_with_cast_check = true +csharp_style_prefer_extended_property_pattern = true +csharp_style_prefer_not_pattern = true +csharp_style_prefer_pattern_matching = true +csharp_style_prefer_switch_expression = true + +# Null-checking preferences +csharp_style_conditional_delegate_call = true + +# Modifier preferences +csharp_prefer_static_local_function = true +csharp_preferred_modifier_order = public,private,protected,internal,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,volatile,async +csharp_style_prefer_readonly_struct = true +csharp_style_prefer_readonly_struct_member = true + +# Code-block preferences +csharp_prefer_braces = true +csharp_prefer_simple_using_statement = true +csharp_style_namespace_declarations = block_scoped +csharp_style_prefer_method_group_conversion = true +csharp_style_prefer_primary_constructors = true +csharp_style_prefer_top_level_statements = true + +# Expression-level preferences +csharp_prefer_simple_default_expression = true +csharp_style_deconstructed_variable_declaration = true +csharp_style_implicit_object_creation_when_type_is_apparent = true +csharp_style_inlined_variable_declaration = true +csharp_style_prefer_index_operator = true +csharp_style_prefer_local_over_anonymous_function = true +csharp_style_prefer_null_check_over_type_check = true +csharp_style_prefer_range_operator = true +csharp_style_prefer_tuple_swap = true +csharp_style_prefer_utf8_string_literals = true +csharp_style_throw_expression = true +csharp_style_unused_value_assignment_preference = discard_variable:warning +csharp_style_unused_value_expression_statement_preference = discard_variable + +# 'using' directive preferences +csharp_using_directive_placement = outside_namespace + +# New line preferences +csharp_style_allow_blank_line_after_colon_in_constructor_initializer_experimental = true +csharp_style_allow_blank_line_after_token_in_arrow_expression_clause_experimental = true +csharp_style_allow_blank_line_after_token_in_conditional_expression_experimental = true +csharp_style_allow_blank_lines_between_consecutive_braces_experimental = true:warning +csharp_style_allow_embedded_statements_on_same_line_experimental = true + +#### C# Formatting Rules #### + +# New line preferences +csharp_new_line_before_catch = true +csharp_new_line_before_else = true +csharp_new_line_before_finally = true +csharp_new_line_before_members_in_anonymous_types = true +csharp_new_line_before_members_in_object_initializers = true +csharp_new_line_before_open_brace = all +csharp_new_line_between_query_expression_clauses = true + +# Indentation preferences +csharp_indent_block_contents = true +csharp_indent_braces = false +csharp_indent_case_contents = true +csharp_indent_case_contents_when_block = true +csharp_indent_labels = one_less_than_current +csharp_indent_switch_labels = true + +# Space preferences +csharp_space_after_cast = false +csharp_space_after_colon_in_inheritance_clause = true +csharp_space_after_comma = true +csharp_space_after_dot = false +csharp_space_after_keywords_in_control_flow_statements = true +csharp_space_after_semicolon_in_for_statement = true +csharp_space_around_binary_operators = before_and_after +csharp_space_around_declaration_statements = false +csharp_space_before_colon_in_inheritance_clause = true +csharp_space_before_comma = false +csharp_space_before_dot = false +csharp_space_before_open_square_brackets = false +csharp_space_before_semicolon_in_for_statement = false +csharp_space_between_empty_square_brackets = false +csharp_space_between_method_call_empty_parameter_list_parentheses = false +csharp_space_between_method_call_name_and_opening_parenthesis = false +csharp_space_between_method_call_parameter_list_parentheses = false +csharp_space_between_method_declaration_empty_parameter_list_parentheses = false +csharp_space_between_method_declaration_name_and_open_parenthesis = false +csharp_space_between_method_declaration_parameter_list_parentheses = false +csharp_space_between_parentheses = false +csharp_space_between_square_brackets = false + +# Wrapping preferences +csharp_preserve_single_line_blocks = true +csharp_preserve_single_line_statements = true + +#### Naming styles #### + +# Naming rules + +dotnet_naming_rule.interface_should_be_begins_with_i.severity = suggestion +dotnet_naming_rule.interface_should_be_begins_with_i.symbols = interface +dotnet_naming_rule.interface_should_be_begins_with_i.style = begins_with_i + +dotnet_naming_rule.types_should_be_pascal_case.severity = suggestion +dotnet_naming_rule.types_should_be_pascal_case.symbols = types +dotnet_naming_rule.types_should_be_pascal_case.style = pascal_case + +dotnet_naming_rule.non_field_members_should_be_pascal_case.severity = suggestion +dotnet_naming_rule.non_field_members_should_be_pascal_case.symbols = non_field_members +dotnet_naming_rule.non_field_members_should_be_pascal_case.style = pascal_case + +dotnet_naming_rule.private_or_internal_field_should_be_begins_with_underscore.severity = suggestion +dotnet_naming_rule.private_or_internal_field_should_be_begins_with_underscore.symbols = private_or_internal_field +dotnet_naming_rule.private_or_internal_field_should_be_begins_with_underscore.style = begins_with_underscore + +# Symbol specifications + +dotnet_naming_symbols.interface.applicable_kinds = interface +dotnet_naming_symbols.interface.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected +dotnet_naming_symbols.interface.required_modifiers = + +dotnet_naming_symbols.private_or_internal_field.applicable_kinds = field +dotnet_naming_symbols.private_or_internal_field.applicable_accessibilities = internal, private, private_protected +dotnet_naming_symbols.private_or_internal_field.required_modifiers = + +dotnet_naming_symbols.types.applicable_kinds = class, struct, interface, enum +dotnet_naming_symbols.types.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected +dotnet_naming_symbols.types.required_modifiers = + +dotnet_naming_symbols.non_field_members.applicable_kinds = property, event, method +dotnet_naming_symbols.non_field_members.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected +dotnet_naming_symbols.non_field_members.required_modifiers = + +# Naming styles + +dotnet_naming_style.pascal_case.required_prefix = +dotnet_naming_style.pascal_case.required_suffix = +dotnet_naming_style.pascal_case.word_separator = +dotnet_naming_style.pascal_case.capitalization = pascal_case + +dotnet_naming_style.begins_with_i.required_prefix = I +dotnet_naming_style.begins_with_i.required_suffix = +dotnet_naming_style.begins_with_i.word_separator = +dotnet_naming_style.begins_with_i.capitalization = pascal_case + +dotnet_naming_style.begins_with_underscore.required_prefix = _ +dotnet_naming_style.begins_with_underscore.required_suffix = +dotnet_naming_style.begins_with_underscore.word_separator = +dotnet_naming_style.begins_with_underscore.capitalization = pascal_case diff --git a/Stsv/Stsv.cs b/Stsv/Stsv.cs new file mode 100644 index 0000000..9c156d5 --- /dev/null +++ b/Stsv/Stsv.cs @@ -0,0 +1,174 @@ +using System.Text; + +namespace NathanMcRae; + +/// +/// Sane Tab-Separated Values +/// +public class Stsv +{ + // TODO: We need to be able to update all these in tandem somehow + public string[] ColumnNames { get; protected set; } + public Dictionary> Columns { get; protected set; } + public List Records { get; protected set; } + + public static Stsv Parse(byte[] inputBuffer) + { + var parsed = new Stsv(); + parsed.Columns = new Dictionary>(); + parsed.ColumnNames = new string[] { }; + parsed.Records = new List(); + + var fieldBytes = new List(); + var fields = new List(); + int numFields = -1; + for (int i = 0; i < inputBuffer.Count(); i++) + { + if (inputBuffer[i] == '\\') + { + if (i + 1 == inputBuffer.Count()) + { + throw new Exception($"Found '\\' at end of input"); + } + if (inputBuffer[i + 1] == 'n') + { + fieldBytes.Add((byte)'\n'); + i++; + } + else if (inputBuffer[i + 1] == '\\') + { + fieldBytes.Add((byte)'\\'); + i++; + } + else if (inputBuffer[i + 1] == 't') + { + fieldBytes.Add((byte)'\t'); + i++; + } + else + { + throw new Exception($"Expected 'n', 't', or '\\' after '\\' at {i}"); + } + } + else if (inputBuffer[i] == '\t') + { + // end of field + try + { + fields.Add(Encoding.UTF8.GetString(fieldBytes.ToArray())); + } + catch (Exception e) + { + throw new Exception($"Field {fields.Count} on line {parsed.Records.Count + 1} is not valid UTF-8", e); + } + fieldBytes.Clear(); + } + else if (inputBuffer[i] == '\n') + { + try + { + fields.Add(Encoding.UTF8.GetString(fieldBytes.ToArray())); + } + catch (Exception e) + { + throw new Exception($"Field {fields.Count} on line {parsed.Records.Count + 1} is not valid UTF-8", e); + } + fieldBytes.Clear(); + + if (numFields < 0) + { + // This is the header + + numFields = fields.Count; + + parsed.ColumnNames = new string[numFields]; + + for (int j = 0; j < fields.Count; j++) + { + string columnName = fields[j]; + + try + { + parsed.Columns.Add(columnName, new List()); + } + catch (Exception e) + { + throw new Exception($"Column name {columnName} is not unique", e); + } + + parsed.ColumnNames[j] = columnName; + } + + fields.Clear(); + } + else if (numFields != fields.Count) + { + throw new Exception($"Expected {numFields} fields on line {parsed.Records.Count + 2}, but found {fields.Count}"); + } + else + { + for (int j = 0; j < fields.Count; j++) + { + parsed.Columns[parsed.ColumnNames[j]].Add(fields[j]); + } + + parsed.Records.Add(new StsvRecord(parsed, fields.ToArray())); + fields.Clear(); + } + } + else + { + fieldBytes.Add(inputBuffer[i]); + } + } + + try + { + fields.Add(Encoding.UTF8.GetString(fieldBytes.ToArray())); + } + catch (Exception e) + { + throw new Exception($"Field {fields.Count} on line {parsed.Records.Count + 1} is not valid UTF-8", e); + } + + if (numFields != fields.Count) + { + throw new Exception($"Expected {numFields} fields on line {parsed.Records.Count + 2}, but found {fields.Count}"); + } + else + { + for (int j = 0; j < fields.Count; j++) + { + try + { + parsed.Columns[parsed.ColumnNames[j]].Add(fields[j]); + } + catch (Exception e) + { + throw new Exception($"Field {j} on line {parsed.Records.Count + 1} is not valid UTF-8", e); + } + } + + parsed.Records.Add(new StsvRecord(parsed, fields.ToArray())); + fields.Clear(); + } + + return parsed; + } + + public StsvRecord this[int i] => Records[i]; + + public class StsvRecord + { + public Stsv Parent { get; } + public string[] Fields { get; } + + public string this[string columnName] => Fields[Array.IndexOf(Parent.ColumnNames, columnName)]; + + public StsvRecord(Stsv parent, string[] fields) + { + Parent = parent; + Fields = fields; + } + } +} diff --git a/Stsv/Stsv.csproj b/Stsv/Stsv.csproj new file mode 100644 index 0000000..141e38f --- /dev/null +++ b/Stsv/Stsv.csproj @@ -0,0 +1,9 @@ + + + + net6.0 + enable + disable + + + diff --git a/Stsv/Stsv.sln b/Stsv/Stsv.sln new file mode 100644 index 0000000..d9437cb --- /dev/null +++ b/Stsv/Stsv.sln @@ -0,0 +1,31 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.7.34024.191 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Stsv", "Stsv.csproj", "{DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "StsvTest", "..\StsvTest\StsvTest.csproj", "{43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}.Debug|Any CPU.Build.0 = Debug|Any CPU + {DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}.Release|Any CPU.ActiveCfg = Release|Any CPU + {DBC5CE44-361C-4387-B1E2-409C1CAE2B4C}.Release|Any CPU.Build.0 = Release|Any CPU + {43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Debug|Any CPU.Build.0 = Debug|Any CPU + {43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.ActiveCfg = Release|Any CPU + {43B1B09C-19BD-4B45-B41B-7C00DB3F7E9C}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {FF70BD8A-F4D7-4A49-9DBC-4009465D5706} + EndGlobalSection +EndGlobal diff --git a/StsvTest/Program.cs b/StsvTest/Program.cs new file mode 100644 index 0000000..c3ca063 --- /dev/null +++ b/StsvTest/Program.cs @@ -0,0 +1,8 @@ +using NathanMcRae; +using System.Text; + +string testString1 = "column1\tcolumn2\tcolumnthree\\nyep\nvalue1\tvalue\\\\twoo\tvaluetrhee\nthis\\nis\\na\\nvalue\tnother\tno\\ther"; + +Stsv parsed = Stsv.Parse(Encoding.UTF8.GetBytes(testString1)); +// See https://aka.ms/new-console-template for more information +Console.WriteLine("Hello, World!"); diff --git a/StsvTest/StsvTest.csproj b/StsvTest/StsvTest.csproj new file mode 100644 index 0000000..fca5899 --- /dev/null +++ b/StsvTest/StsvTest.csproj @@ -0,0 +1,14 @@ + + + + Exe + net6.0 + enable + enable + + + + + + +