From 6a04eaaa355b322c2473b3eec8286895064933b8 Mon Sep 17 00:00:00 2001
From: Henk <henk.van.der.veen@gmail.com>
Date: Mon, 17 Jan 2022 11:30:49 +0100
Subject: [PATCH] Make sort_alphanum consistent for non-alphanum as well

---
 .../core_ext/enumerable/sort_by_alphanum.rb   | 52 ++++++++++++-------
 .../enumerable/sort_by_alphanum_spec.rb       |  5 ++
 2 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/lib/roqua/core_ext/enumerable/sort_by_alphanum.rb b/lib/roqua/core_ext/enumerable/sort_by_alphanum.rb
index 2f1ee82..e25a85f 100644
--- a/lib/roqua/core_ext/enumerable/sort_by_alphanum.rb
+++ b/lib/roqua/core_ext/enumerable/sort_by_alphanum.rb
@@ -21,29 +21,45 @@ module Enumerable
 
   private
 
-  def grouped_compare(a, b)
-    loop {
-      a_chunk, a = extract_alpha_or_number_group(a)
-      b_chunk, b = extract_alpha_or_number_group(b)
+  ALL_NUM = /\d+/
+  ALL_ALPHA = /[A-Za-z]+/
+  NON_ALPHANUM = /[^A-Za-z0-9]+/
 
-      ret = if a_chunk =~ /\d/ && b_chunk =~ /\d/ && a_chunk.to_i != b_chunk.to_i
+  def grouped_compare(a, b)
+    a_scanner = StringScanner.new(a)
+    b_scanner = StringScanner.new(b)
+    # each loop has to do exactly 1 non-nil-scan on both scanners or return a non-zero value.
+    loop do
+      ret = \
+        if a_scanner.eos?
+          -1
+        elsif (a_chunk = a_scanner.scan(ALL_NUM))
+          if (b_chunk = b_scanner.scan(ALL_NUM))
+            if a_chunk.to_i != b_chunk.to_i
               a_chunk.to_i <=> b_chunk.to_i
-            else
+            else # 03 vs 3
               a_chunk <=> b_chunk
             end
-
-      return -1 if a_chunk == ''
+          elsif b_scanner.scan(ALL_ALPHA)
+            -1
+          else # NON_ALPHANUM
+            1
+          end
+        elsif (a_chunk = a_scanner.scan(ALL_ALPHA))
+          if (b_chunk = b_scanner.scan(ALL_ALPHA))
+            a_chunk <=> b_chunk
+          else # ALL_NUM or NON_ALPHANUM
+            1
+          end
+        else # NON_ALPHANUM
+          a_chunk = a_scanner.scan(NON_ALPHANUM)
+          if (b_chunk = b_scanner.scan(NON_ALPHANUM))
+            a_chunk <=> b_chunk
+          else
+            -1
+          end
+        end
       return ret if ret != 0
-    }
-  end
-
-  def extract_alpha_or_number_group(item)
-    matchdata = /([A-Za-z]+|[\d]+)/.match(item)
-
-    if matchdata.nil?
-      ["", ""]
-    else
-      [matchdata[0], item = item[matchdata.offset(0)[1] .. -1]]
     end
   end
 end
diff --git a/spec/roqua/core_ext/enumerable/sort_by_alphanum_spec.rb b/spec/roqua/core_ext/enumerable/sort_by_alphanum_spec.rb
index db112e7..8de7e5c 100644
--- a/spec/roqua/core_ext/enumerable/sort_by_alphanum_spec.rb
+++ b/spec/roqua/core_ext/enumerable/sort_by_alphanum_spec.rb
@@ -11,6 +11,11 @@ describe Enumerable do
       expect(input.sort_by_alphanum(&:reverse)).to eq ["004some10thing", "004some11thing", "3another"]
     end
 
+    it 'treats non-alphanum as lower than alpha and num' do
+      input = %w[b3a b{c bԘb]  # curlies are above alpha in utf-8, Ԙ is multi-byte
+      expect(input.sort_by_alphanum).to eq %w[b{c bԘb b3a]
+    end
+
     it 'compares number chunks as integers' do
       expect(%w(004 3).sort_by_alphanum).to eq %w(3 004)
     end
-- 
GitLab