From cd65604073a2ab3b4175671f35a20fada7bdc6e1 Mon Sep 17 00:00:00 2001 From: Clemens Schwaighofer Date: Wed, 14 Jan 2026 10:36:09 +0900 Subject: [PATCH] Add parseCharacterRanges function to Strings.php and tests Parses character ranges like A-Z into individual characters and returns as an array --- 4dev/checking/phan.sh | 2 +- 4dev/checking/phpstan.sh | 2 +- 4dev/checking/phpunit.sh | 2 +- .../Convert/CoreLibsConvertStringsTest.php | 78 +++++++++++++++++++ www/admin/class_test.strings.php | 21 +++++ www/lib/CoreLibs/Convert/Strings.php | 45 +++++++++++ 6 files changed, 147 insertions(+), 3 deletions(-) diff --git a/4dev/checking/phan.sh b/4dev/checking/phan.sh index e4833205..06c6bd6e 100755 --- a/4dev/checking/phan.sh +++ b/4dev/checking/phan.sh @@ -6,7 +6,7 @@ function error() { usage() { cat < [ + ['a-z'], + implode('', range('a', 'z')), + null, + ], + 'simple A-Z' => [ + ['A-Z'], + implode('', range('A', 'Z')), + null, + ], + 'simple 0-9' => [ + ['0-9'], + implode('', range('0', '9')), + null, + ], + 'mixed ranges' => [ + ['a-c', 'X-Z', '3-5'], + 'abcXYZ345', + null, + ], + 'reverse ranges' => [ + ['z-a'], + 'abcdefghijklmnopqrstuvwxyz', + null, + ], + 'overlapping ranges' => [ + ['a-f', 'd-j'], + 'abcdefghij', + null, + ], + 'mixed valid and overlap ranges' => [ + ['a-f', 'z-a', '0-3'], + 'abcdefghijklmnopqrstuvwxyz0123', + null, + ], + 'invalid ranges' => [ + ['a-あ', 'A-あ', '0-あ'], + '', + \InvalidArgumentException::class, + ], + ]; + } + + /** + * Undocumented function + * + * @covers ::parseCharacterRanges + * @dataProvider parseCharacterRangesProvider + * @testdox parseCharacterRanges $input to $expected [$_dataName] + * + * @param array $input + * @param string $expected + * @param string|null $expected_exception + * @return void + */ + public function testParseCharacterRanges( + array $input, + string $expected, + ?string $expected_exception + ): void { + if ($expected_exception !== null) { + $this->expectException($expected_exception); + } + $this->assertEquals( + $expected, + implode('', \CoreLibs\Convert\Strings::parseCharacterRanges(implode('', $input))) + ); + } } // __END__ diff --git a/www/admin/class_test.strings.php b/www/admin/class_test.strings.php index 95e4457a..95fb3f69 100644 --- a/www/admin/class_test.strings.php +++ b/www/admin/class_test.strings.php @@ -140,6 +140,27 @@ $preg_error = Strings::isValidRegex($regex_string); print "[B] LAST PREGE ERROR: " . preg_last_error() . " -> " . Strings::getLastRegexErrorString() . " -> " . preg_last_error_msg() . "
"; +$base_strings = [ + 'A-Z', + 'a-z', + 'A-Za-z', + 'A-Df-g', + 'A-D0-9', + 'D-A7-0', + 'A-FB-G', + '0-9', + 'あ-お', + 'ア-オ', +]; +foreach ($base_strings as $string) { + try { + $parsed = Strings::parseCharacterRanges($string); + print "Parsed ranges for '$string': " . DgS::printAr($parsed) . "
"; + } catch (\InvalidArgumentException $e) { + print "Error parsing ranges for '$string': " . $e->getMessage() . "
"; + } +} + print ""; // __END__ diff --git a/www/lib/CoreLibs/Convert/Strings.php b/www/lib/CoreLibs/Convert/Strings.php index 30d4ad44..dd30b470 100644 --- a/www/lib/CoreLibs/Convert/Strings.php +++ b/www/lib/CoreLibs/Convert/Strings.php @@ -268,6 +268,51 @@ class Strings )); } + /** + * Split up character ranges in format A-Z, a-z, 0-9 + * + * @param string $input + * @return string[] + */ + public static function parseCharacterRanges(string $input): array + { + // if not alphanumeric, throw value error + if (!preg_match("/^[A-Za-z0-9\-\s]+$/u", $input)) { + throw new \InvalidArgumentException( + "The input string contains invalid characters, " + . "only alphanumeric, dash (-), space and 'or' are allowed: " + . $input + ); + } + // Remove all spaces + $input = str_replace(' ', '', $input); + $result = []; + // Find all patterns like "A-Z" (character-dash-character) + preg_match_all('/(.)-(.)/u', $input, $matches, PREG_SET_ORDER); + foreach ($matches as $match) { + $start = $match[1]; + $end = $match[2]; + // Get ASCII/Unicode values + $startOrd = ord($start[0]); + $endOrd = ord($end[0]); + // make sure start is before end + if ($startOrd > $endOrd) { + [$startOrd, $endOrd] = [$endOrd, $startOrd]; + } + + // Generate range of characters + for ($i = $startOrd; $i <= $endOrd; $i++) { + $char = chr($i); + if (!in_array($char, $result)) { + $result[] = $char; + } + } + } + // make the result unique + $result = array_unique($result); + return $result; + } + /** * Check if a regex is valid. Does not return the detail regex parser error *