Add parseCharacterRanges function to Strings.php and tests

Parses character ranges like A-Z into individual characters and returns as an array
This commit is contained in:
Clemens Schwaighofer
2026-01-14 10:36:09 +09:00
parent a3cf5f45f9
commit cd65604073
6 changed files with 147 additions and 3 deletions

View File

@@ -6,7 +6,7 @@ function error() {
usage() {
cat <<EOF
Usage: $(basename "${BASH_SOURCE[0]}") [-h] [-p VERSION]
Usage: $(basename "${BASH_SOURCE[0]}") [-h | --help] [-p | --php VERSION] [-c | --composer]
Runs phan static analyzer.

View File

@@ -6,7 +6,7 @@ function error() {
usage() {
cat <<EOF
Usage: $(basename "${BASH_SOURCE[0]}") [-h] [-p VERSION]
Usage: $(basename "${BASH_SOURCE[0]}") [-h] [-h | --help] [-p | --php VERSION] [-c | --composer]
Runs phan static analyzer.

View File

@@ -6,7 +6,7 @@ function error() {
usage() {
cat <<EOF
Usage: $(basename "${BASH_SOURCE[0]}") [-h] [-t] [-v] [-p VERSION]
Usage: $(basename "${BASH_SOURCE[0]}") [-h | --help] [-p | --php VERSION] [-c | --composer] [-t | --testdox] [-v | --verbose]
Runs all the PHP unit tests.

View File

@@ -698,6 +698,84 @@ final class CoreLibsConvertStringsTest extends TestCase
'Cannot match last preg error string'
);
}
/**
* Undocumented function
*
* @return array
*/
public function parseCharacterRangesProvider(): array
{
return [
'simple a-z' => [
['a-z'],
implode('', range('a', 'z')),
null,
],
'simple A-Z' => [
['A-Z'],
implode('', range('A', 'Z')),
null,
],
'simple 0-9' => [
['0-9'],
implode('', range('0', '9')),
null,
],
'mixed ranges' => [
['a-c', 'X-Z', '3-5'],
'abcXYZ345',
null,
],
'reverse ranges' => [
['z-a'],
'abcdefghijklmnopqrstuvwxyz',
null,
],
'overlapping ranges' => [
['a-f', 'd-j'],
'abcdefghij',
null,
],
'mixed valid and overlap ranges' => [
['a-f', 'z-a', '0-3'],
'abcdefghijklmnopqrstuvwxyz0123',
null,
],
'invalid ranges' => [
['a-あ', 'A-あ', '0-あ'],
'',
\InvalidArgumentException::class,
],
];
}
/**
* Undocumented function
*
* @covers ::parseCharacterRanges
* @dataProvider parseCharacterRangesProvider
* @testdox parseCharacterRanges $input to $expected [$_dataName]
*
* @param array $input
* @param string $expected
* @param string|null $expected_exception
* @return void
*/
public function testParseCharacterRanges(
array $input,
string $expected,
?string $expected_exception
): void {
if ($expected_exception !== null) {
$this->expectException($expected_exception);
}
$this->assertEquals(
$expected,
implode('', \CoreLibs\Convert\Strings::parseCharacterRanges(implode('', $input)))
);
}
}
// __END__

View File

@@ -140,6 +140,27 @@ $preg_error = Strings::isValidRegex($regex_string);
print "[B] LAST PREGE ERROR: " . preg_last_error() . " -> "
. Strings::getLastRegexErrorString() . " -> " . preg_last_error_msg() . "<br>";
$base_strings = [
'A-Z',
'a-z',
'A-Za-z',
'A-Df-g',
'A-D0-9',
'D-A7-0',
'A-FB-G',
'0-9',
'あ-お',
'ア-オ',
];
foreach ($base_strings as $string) {
try {
$parsed = Strings::parseCharacterRanges($string);
print "Parsed ranges for '$string': " . DgS::printAr($parsed) . "<br>";
} catch (\InvalidArgumentException $e) {
print "Error parsing ranges for '$string': " . $e->getMessage() . "<br>";
}
}
print "</body></html>";
// __END__

View File

@@ -268,6 +268,51 @@ class Strings
));
}
/**
* Split up character ranges in format A-Z, a-z, 0-9
*
* @param string $input
* @return string[]
*/
public static function parseCharacterRanges(string $input): array
{
// if not alphanumeric, throw value error
if (!preg_match("/^[A-Za-z0-9\-\s]+$/u", $input)) {
throw new \InvalidArgumentException(
"The input string contains invalid characters, "
. "only alphanumeric, dash (-), space and 'or' are allowed: "
. $input
);
}
// Remove all spaces
$input = str_replace(' ', '', $input);
$result = [];
// Find all patterns like "A-Z" (character-dash-character)
preg_match_all('/(.)-(.)/u', $input, $matches, PREG_SET_ORDER);
foreach ($matches as $match) {
$start = $match[1];
$end = $match[2];
// Get ASCII/Unicode values
$startOrd = ord($start[0]);
$endOrd = ord($end[0]);
// make sure start is before end
if ($startOrd > $endOrd) {
[$startOrd, $endOrd] = [$endOrd, $startOrd];
}
// Generate range of characters
for ($i = $startOrd; $i <= $endOrd; $i++) {
$char = chr($i);
if (!in_array($char, $result)) {
$result[] = $char;
}
}
}
// make the result unique
$result = array_unique($result);
return $result;
}
/**
* Check if a regex is valid. Does not return the detail regex parser error
*