diff --git a/lib/roqua/core_ext/enumerable/sort_by_alphanum.rb b/lib/roqua/core_ext/enumerable/sort_by_alphanum.rb index 2f1ee827b7d734abc91e21e4a4c680262e2129c5..e25a85f8c1f5293600accf6f6a9f89f80c1d29b6 100644 --- a/lib/roqua/core_ext/enumerable/sort_by_alphanum.rb +++ b/lib/roqua/core_ext/enumerable/sort_by_alphanum.rb @@ -21,29 +21,45 @@ module Enumerable private - def grouped_compare(a, b) - loop { - a_chunk, a = extract_alpha_or_number_group(a) - b_chunk, b = extract_alpha_or_number_group(b) + ALL_NUM = /\d+/ + ALL_ALPHA = /[A-Za-z]+/ + NON_ALPHANUM = /[^A-Za-z0-9]+/ - ret = if a_chunk =~ /\d/ && b_chunk =~ /\d/ && a_chunk.to_i != b_chunk.to_i + def grouped_compare(a, b) + a_scanner = StringScanner.new(a) + b_scanner = StringScanner.new(b) + # each loop has to do exactly 1 non-nil-scan on both scanners or return a non-zero value. + loop do + ret = \ + if a_scanner.eos? + -1 + elsif (a_chunk = a_scanner.scan(ALL_NUM)) + if (b_chunk = b_scanner.scan(ALL_NUM)) + if a_chunk.to_i != b_chunk.to_i a_chunk.to_i <=> b_chunk.to_i - else + else # 03 vs 3 a_chunk <=> b_chunk end - - return -1 if a_chunk == '' + elsif b_scanner.scan(ALL_ALPHA) + -1 + else # NON_ALPHANUM + 1 + end + elsif (a_chunk = a_scanner.scan(ALL_ALPHA)) + if (b_chunk = b_scanner.scan(ALL_ALPHA)) + a_chunk <=> b_chunk + else # ALL_NUM or NON_ALPHANUM + 1 + end + else # NON_ALPHANUM + a_chunk = a_scanner.scan(NON_ALPHANUM) + if (b_chunk = b_scanner.scan(NON_ALPHANUM)) + a_chunk <=> b_chunk + else + -1 + end + end return ret if ret != 0 - } - end - - def extract_alpha_or_number_group(item) - matchdata = /([A-Za-z]+|[\d]+)/.match(item) - - if matchdata.nil? - ["", ""] - else - [matchdata[0], item = item[matchdata.offset(0)[1] .. -1]] end end end diff --git a/spec/roqua/core_ext/enumerable/sort_by_alphanum_spec.rb b/spec/roqua/core_ext/enumerable/sort_by_alphanum_spec.rb index db112e7517a0d80ecf16bbd387485e2581428dd2..8de7e5c0d247f60e7d8bbf28fe0cbeca01270cb0 100644 --- a/spec/roqua/core_ext/enumerable/sort_by_alphanum_spec.rb +++ b/spec/roqua/core_ext/enumerable/sort_by_alphanum_spec.rb @@ -11,6 +11,11 @@ describe Enumerable do expect(input.sort_by_alphanum(&:reverse)).to eq ["004some10thing", "004some11thing", "3another"] end + it 'treats non-alphanum as lower than alpha and num' do + input = %w[b3a b{c bԘb] # curlies are above alpha in utf-8, Ԙ is multi-byte + expect(input.sort_by_alphanum).to eq %w[b{c bԘb b3a] + end + it 'compares number chunks as integers' do expect(%w(004 3).sort_by_alphanum).to eq %w(3 004) end