From d4db235e5b3f3864bc4cd346747cccec44fca8b1 Mon Sep 17 00:00:00 2001 From: Clemens Schwaighofer Date: Wed, 4 Jun 2025 14:15:45 +0900 Subject: [PATCH] Add new split string, update split string format, add create string from array list, char in char list, remove duplicates NEW: - remove duplicates in string - check character string list in other character string list - build character string from array (or nested array) values - split string with fixed split length UPDATE: - split string with format * throw exceptions for wrong paramters * remove the "split chracters", as they get extracted from the format string --- .../Convert/CoreLibsConvertStringsTest.php | 323 +++++++++++++++--- www/admin/class_test.strings.php | 75 +++- www/lib/CoreLibs/Convert/Strings.php | 148 ++++++-- 3 files changed, 478 insertions(+), 68 deletions(-) diff --git a/4dev/tests/Convert/CoreLibsConvertStringsTest.php b/4dev/tests/Convert/CoreLibsConvertStringsTest.php index c6c92251..1eb0cc29 100644 --- a/4dev/tests/Convert/CoreLibsConvertStringsTest.php +++ b/4dev/tests/Convert/CoreLibsConvertStringsTest.php @@ -24,117 +24,83 @@ final class CoreLibsConvertStringsTest extends TestCase { // 0: input // 1: format - // 2: split characters as string, null for default // 3: expected return [ 'all empty string' => [ '', '', - null, '' ], 'empty input string' => [ '', '2-2', - null, '' ], 'empty format string string' => [ '1234', '', - null, '1234' ], 'string format match' => [ '1234', '2-2', - null, '12-34' ], 'string format trailing match' => [ '1234', '2-2-', - null, '12-34' ], 'string format leading match' => [ '1234', '-2-2', - null, '12-34' ], 'string format double inside match' => [ '1234', '2--2', - null, '12--34', ], 'string format short first' => [ '1', '2-2', - null, '1' ], 'string format match first' => [ '12', '2-2', - null, '12' ], 'string format short second' => [ '123', '2-2', - null, '12-3' ], 'string format too long' => [ '1234567', '2-2', - null, '12-34-567' ], - 'string format invalid format string' => [ - '1234', - '2_2', - null, - '1234' - ], 'different split character' => [ '1234', '2_2', - '_', '12_34' ], 'mixed split characters' => [ '123456', '2-2_2', - '-_', '12-34_56' ], 'length mixed' => [ 'ABCD12345568ABC13', '2-4_5-2#4', - '-_#', 'AB-CD12_34556-8A#BC13' ], 'split with split chars in string' => [ '12-34', '2-2', - null, '12--3-4' ], - 'mutltibyte string' => [ - 'あいうえ', - '2-2', - null, - 'あいうえ' - ], - 'mutltibyte split string' => [ - '1234', - '2-2', - null, - '1234' - ], ]; } @@ -143,29 +109,132 @@ final class CoreLibsConvertStringsTest extends TestCase * * @covers ::splitFormatString * @dataProvider splitFormatStringProvider - * @testdox splitFormatString $input with format $format and splitters $split_characters will be $expected [$_dataName] + * @testdox splitFormatString $input with format $format will be $expected [$_dataName] * * @param string $input * @param string $format - * @param string|null $split_characters * @param string $expected * @return void */ public function testSplitFormatString( string $input, string $format, + string $expected + ): void { + $output = \CoreLibs\Convert\Strings::splitFormatString( + $input, + $format, + ); + $this->assertEquals( + $expected, + $output + ); + } + + /** check exceptions */ + public function splitFormatStringExceptionProvider(): array + { + return [ + 'invalid format string' => [ + '1234', + '2あ2', + ], + 'mutltibyte string' => [ + 'あいうえ', + '2-2', + ], + 'mutltibyte split string' => [ + '1234', + '2-2', + ], + ]; + } + + /** + * Undocumented function + * + * @covers ::splitFormatStringFixed + * @dataProvider splitFormatStringExceptionProvider + * @testdox splitFormatString Exception catch checks for $input with $format[$_dataName] + * + * @return void + */ + public function testSplitFormatStringExceptions(string $input, string $format): void + { + // catch exception + $this->expectException(\InvalidArgumentException::class); + \CoreLibs\Convert\Strings::splitFormatString($input, $format); + } + + /** + * test for split Format string fixed length + * + * @return array + */ + public function splitFormatStringFixedProvider(): array + { + return [ + 'normal split, default split char' => [ + 'abcdefg', + 4, + null, + 'abcd-efg' + ], + 'noraml split, other single split char' => [ + 'abcdefg', + 4, + "=", + 'abcd=efg' + ], + 'noraml split, other multiple split char' => [ + 'abcdefg', + 4, + "-=-", + 'abcd-=-efg' + ], + 'non ascii characters' => [ + 'あいうえお', + 2, + "-", + 'あい-うえ-お' + ], + 'empty string' => [ + '', + 4, + "-", + '' + ] + ]; + } + + /** + * Undocumented function + * + * @covers ::splitFormatStringFixed + * @dataProvider splitFormatStringFixedProvider + * @testdox splitFormatStringFixed $input with length $split_length and split chars $split_characters will be $expected [$_dataName] + * + * @param string $input + * @param int $split_length + * @param string|null $split_characters + * @param string $expected + * @return void + */ + public function testSplitFormatStringFixed( + string $input, + int $split_length, ?string $split_characters, string $expected ): void { if ($split_characters === null) { - $output = \CoreLibs\Convert\Strings::splitFormatString( + $output = \CoreLibs\Convert\Strings::splitFormatStringFixed( $input, - $format + $split_length ); } else { - $output = \CoreLibs\Convert\Strings::splitFormatString( + $output = \CoreLibs\Convert\Strings::splitFormatStringFixed( $input, - $format, + $split_length, $split_characters ); } @@ -175,6 +244,36 @@ final class CoreLibsConvertStringsTest extends TestCase ); } + public function splitFormatStringFixedExceptionProvider(): array + { + return [ + 'split length too short' => [ + 'abcdefg', + -1, + ], + 'split length longer than string' => [ + 'abcdefg', + 20, + ], + ]; + } + + /** + * Undocumented function + * + * @covers ::splitFormatStringFixed + * @dataProvider splitFormatStringFixedExceptionProvider + * @testdox splitFormatStringFixed Exception catch checks for $input with $length [$_dataName] + * + * @return void + */ + public function testSplitFormatStringFixedExceptions(string $input, int $length): void + { + // catch exception + $this->expectException(\InvalidArgumentException::class); + \CoreLibs\Convert\Strings::splitFormatStringFixed($input, $length); + } + /** * Undocumented function * @@ -378,6 +477,150 @@ final class CoreLibsConvertStringsTest extends TestCase \CoreLibs\Convert\Strings::stripUTF8BomBytes($file) ); } + + /** + * Undocumented function + * + * @return array + */ + public function allCharsInSetProvider(): array + { + return [ + 'find' => [ + 'abc', + 'abcdef', + true + ], + 'not found' => [ + 'abcz', + 'abcdef', + false + ] + ]; + } + + /** + * Undocumented function + * + * @covers ::allCharsInSet + * @dataProvider allCharsInSetProvider + * @testdox allCharsInSet $input in $haystack with expected $expected [$_dataName] + * + * @param string $needle + * @param string $haystack + * @param bool $expected + * @return void + */ + public function testAllCharsInSet(string $needle, string $haystack, bool $expected): void + { + $this->assertEquals( + $expected, + \CoreLibs\Convert\Strings::allCharsInSet($needle, $haystack) + ); + } + + public function buildCharStringFromListsProvider(): array + { + return [ + 'test a' => [ + 'abc', + ['a', 'b', 'c'], + ], + 'test b' => [ + 'abc123', + ['a', 'b', 'c'], + ['1', '2', '3'], + ], + 'test c: no params' => [ + '', + ], + 'test c: empty 1' => [ + '', + [] + ], + 'test nested' => [ + 'abc', + [['a'], ['b'], ['c']], + ], + ]; + } + + /** + * Undocumented function + * + * @covers ::buildCharStringFromLists + * @dataProvider buildCharStringFromListsProvider + * @testdox buildCharStringFromLists all $input convert to $expected [$_dataName] + * + * @param string $expected + * @param array ...$input + * @return void + */ + public function testBuildCharStringFromLists(string $expected, array ...$input): void + { + $this->assertEquals( + $expected, + \CoreLibs\Convert\Strings::buildCharStringFromLists(...$input) + ); + } + + /** + * Undocumented function + * + * @return array + */ + public function removeDuplicatesProvider(): array + { + return [ + 'test no change' => [ + 'ABCDEFG', + 'ABCDEFG', + ], + 'test simple' => [ + 'aa', + 'a' + ], + 'test keep lower and uppwer case' => [ + 'AaBbCc', + 'AaBbCc' + ], + 'test unqiue' => [ + 'aabbcc', + 'abc' + ], + 'test multibyte no change' => [ + 'あいうえお', + 'あいうえお', + ], + 'test multibyte' => [ + 'ああいいううええおお', + 'あいうえお', + ], + 'test multibyte special' => [ + 'あぁいぃうぅえぇおぉ', + 'あぁいぃうぅえぇおぉ', + ] + ]; + } + + /** + * Undocumented function + * + * @covers ::removeDuplicates + * @dataProvider removeDuplicatesProvider + * @testdox removeDuplicates make $input unqiue to $expected [$_dataName] + * + * @param string $input + * @param string $expected + * @return void + */ + public function testRemoveDuplicates(string $input, string $expected): void + { + $this->assertEquals( + $expected, + \CoreLibs\Convert\Strings::removeDuplicates($input) + ); + } } // __END__ diff --git a/www/admin/class_test.strings.php b/www/admin/class_test.strings.php index f1ffb689..47f3ed82 100644 --- a/www/admin/class_test.strings.php +++ b/www/admin/class_test.strings.php @@ -14,6 +14,9 @@ require 'config.php'; $LOG_FILE_ID = 'classTest-string'; ob_end_flush(); +use CoreLibs\Convert\Strings; +use CoreLibs\Debug\Support as DgS; + $log = new CoreLibs\Logging\Logging([ 'log_folder' => BASE . LOG, 'log_file_id' => $LOG_FILE_ID, @@ -29,6 +32,7 @@ print '
Class Test Master
'; print '

' . $PAGE_NAME . '

'; $split = '4-4-4'; +$split_length = 4; $test_strings = [ '13', '1234', @@ -40,20 +44,59 @@ $test_strings = [ ]; foreach ($test_strings as $string) { - print "Convert: $string with $split to: " - . \CoreLibs\Convert\Strings::splitFormatString($string, $split) + print "A) Convert: $string with $split to: " + . Strings::splitFormatString($string, $split) . "
"; + try { + print "B) Convert: $string with $split_length to: " + . Strings::splitFormatStringFixed($string, $split_length) + . "
"; + } catch (Exception $e) { + print "Split not possible: " . $e->getMessage() . "
"; + } } $split = '2_2'; +$split_length = 2; $string = '1234'; -print "Convert: $string with $split to: " - . \CoreLibs\Convert\Strings::splitFormatString($string, $split) +print "A) Convert: $string with $split to: " + . Strings::splitFormatString($string, $split) + . "
"; +print "B) Convert: $string with $split_length to: " + . Strings::splitFormatStringFixed($string, $split_length, "_") . "
"; $split = '2-2'; $string = 'あいうえ'; -print "Convert: $string with $split to: " - . \CoreLibs\Convert\Strings::splitFormatString($string, $split) +try { + print "Convert: $string with $split to: " + . Strings::splitFormatString($string, $split) + . "
"; +} catch (\Exception $e) { + print "Cannot split string: " . $e->getMessage() . "
"; +} +print "B) Convert: $string with $split_length to: " + . Strings::splitFormatStringFixed($string, $split_length, "-") + . "
"; + +$string = 'ABCD12345568ABC13'; +$format = '2-4_5-2#4'; +$output = 'AB-CD12_34556-8A#BC13'; +print "A) Convert: $string with $format to: " + . Strings::splitFormatString($string, $format) + . "
"; + +// try other split calls +$string = "ABCDE"; +$split_length = 2; +$split_char = "-=-"; +print "Convert: $string with $split_length / $split_char to: " + . Strings::splitFormatStringFixed($string, $split_length, $split_char) + . "
"; +$string = "あいうえお"; +$split_length = 2; +$split_char = "-=-"; +print "Convert: $string with $split_length / $split_char to: " + . Strings::splitFormatStringFixed($string, $split_length, $split_char) . "
"; $test_splits = [ @@ -63,9 +106,27 @@ $test_splits = [ '2-3-4', ]; foreach ($test_splits as $split) { - print "$split with count: " . \CoreLibs\Convert\Strings::countSplitParts($split) . "
"; + print "$split with count: " . Strings::countSplitParts($split) . "
"; } +// check char list in list +$needle = "abc"; +$haystack = "abcdefg"; +print "Needle: " . $needle . ", Haysteck: " . $haystack . ": " + . DgS::prBl(Strings::allCharsInSet($needle, $haystack)) . "
"; +$needle = "abcz"; +print "Needle: " . $needle . ", Haysteck: " . $haystack . ": " + . DgS::prBl(Strings::allCharsInSet($needle, $haystack)) . "
"; + +print "Combined strings A: " + . Strings::buildCharStringFromLists(['A', 'B', 'C'], ['0', '1', '2']) . "
"; +print "Combined strings B: " + . Strings::buildCharStringFromLists([['F'], ['G'], 'H'], [['5', ['6']], ['0'], '1', '2']) . "
"; + +$input_string = "AaBbCc"; +print "Unique: " . Strings::removeDuplicates($input_string) . "
"; +print "Unique: " . Strings::removeDuplicates(strtolower($input_string)) . "
"; + print ""; // __END__ diff --git a/www/lib/CoreLibs/Convert/Strings.php b/www/lib/CoreLibs/Convert/Strings.php index 81632392..ddff7a4e 100644 --- a/www/lib/CoreLibs/Convert/Strings.php +++ b/www/lib/CoreLibs/Convert/Strings.php @@ -8,6 +8,8 @@ declare(strict_types=1); namespace CoreLibs\Convert; +use CoreLibs\Combined\ArrayHandler; + class Strings { /** @@ -52,29 +54,37 @@ class Strings * Note a string LONGER then the maxium will be attached with the LAST * split character. In above exmaple * ABCD1234EFGHTOOLONG will be ABCD-1234-EFGH-TOOLONG + * If the characters are NOT ASCII it will return the string as is * - * @param string $value string value to split + * @param string $string string value to split * @param string $split_format split format - * @param string $split_characters list of charcters with which we split - * if not set uses dash ('-') * @return string split formatted string or original value if not chnaged + * @throws \InvalidArgumentException for empty split format, invalid values, split characters or split format */ public static function splitFormatString( - string $value, + string $string, string $split_format, - string $split_characters = '-' ): string { - if ( - // abort if split format is empty - empty($split_format) || - // if not in the valid ASCII character range for any of the strings - preg_match('/[^\x20-\x7e]/', $value) || - // preg_match('/[^\x20-\x7e]/', $split_format) || - preg_match('/[^\x20-\x7e]/', $split_characters) || - // only numbers and split characters in split_format - !preg_match("/[0-9" . $split_characters . "]/", $split_format) - ) { - return $value; + // skip if string or split format is empty is empty + if (empty($string) || empty($split_format)) { + return $string; + } + if (preg_match('/[^\x20-\x7e]/', $string)) { + throw new \InvalidArgumentException( + "The string to split can only be ascii characters: " . $string + ); + } + // get the split characters that are not numerical and check they are ascii + $split_characters = self::removeDuplicates(preg_replace('/[0-9]/', '', $split_format)); + if (preg_match('/[^\x20-\x7e]/', $split_characters)) { + throw new \InvalidArgumentException( + "The split character has to be a valid ascii character: " . $split_characters + ); + } + if (!preg_match("/^[0-9" . $split_characters . "]+$/", $split_format)) { + throw new \InvalidArgumentException( + "The split format can only be numbers and the split characters: " . $split_format + ); } // split format list $split_list = preg_split( @@ -86,14 +96,14 @@ class Strings ); // if this is false, or only one array, abort split if (!is_array($split_list) || count($split_list) == 1) { - return $value; + return $string; } $out = ''; $pos = 0; $last_split = ''; foreach ($split_list as $offset) { if (is_numeric($offset)) { - $_part = substr($value, $pos, (int)$offset); + $_part = substr($string, $pos, (int)$offset); if (empty($_part)) { break; } @@ -104,8 +114,8 @@ class Strings $last_split = $offset; } } - if (!empty($out) && $pos < strlen($value)) { - $out .= $last_split . substr($value, $pos); + if (!empty($out) && $pos < strlen($string)) { + $out .= $last_split . substr($string, $pos); } // if last is not alphanumeric remove, remove if (!strcspn(substr($out, -1, 1), $split_characters)) { @@ -115,10 +125,49 @@ class Strings if (!empty($out)) { return $out; } else { - return $value; + return $string; } } + /** + * Split a string into n-length blocks with a split character inbetween + * This is simplified version from splitFormatString that uses + * fixed split length with a characters, this evenly splits the string out into the + * given length + * This works with non ASCII characters too + * + * @param string $string string to split + * @param int $split_length split length, must be smaller than string and larger than 0 + * @param string $split_characters [default=-] the character to split, can be more than one + * @return string + * @throws \InvalidArgumentException Thrown if split length style is invalid + */ + public static function splitFormatStringFixed( + string $string, + int $split_length, + string $split_characters = '-' + ): string { + // if empty string or if split lenght is 0 or empty split characters + // then we skip any splitting + if (empty($string) || $split_length == 0 || empty($split_characters)) { + return $string; + } + $return_string = ''; + $string_length = mb_strlen($string); + // check that the length is not too short + if ($split_length < 1 || $split_length >= $string_length) { + throw new \InvalidArgumentException( + "The split length must be at least 1 character and less than the string length to split. " + . "Split length: " . $split_length . ", string length: " . $string_length + ); + } + for ($i = 0; $i < $string_length; $i += $split_length) { + $return_string .= mb_substr($string, $i, $split_length) . $split_characters; + } + // remove last trailing character which is always the split char length + return mb_substr($return_string, 0, -1 * mb_strlen($split_characters)); + } + /** * Strip any duplicated slahes from a path * eg: //foo///bar/foo.inc -> /foo/bar/foo.inc @@ -146,6 +195,63 @@ class Strings { return trim($text, pack('H*', 'EFBBBF')); } + + /** + * Make as string of characters unique + * + * @param string $string + * @return string + */ + public static function removeDuplicates(string $string): string + { + // combine again + $result = implode( + '', + // unique list + array_unique( + // split into array + mb_str_split($string) + ) + ); + + return $result; + } + + /** + * check if all characters are in set + * + * @param string $needle Needle to search + * @param string $haystack Haystack to search in + * @return bool True on found, False if not in haystack + */ + public static function allCharsInSet(string $needle, string $haystack): bool + { + $input_length = strlen($needle); + + for ($i = 0; $i < $input_length; $i++) { + if (strpos($haystack, $needle[$i]) === false) { + return false; + } + } + + return true; + } + + /** + * converts a list of arrays of strings into a string of unique entries + * input arrays can be nested, only values are used + * + * @param array ...$char_lists + * @return string + */ + public static function buildCharStringFromLists(array ...$char_lists): string + { + return implode('', array_unique( + ArrayHandler::flattenArray( + array_merge(...$char_lists) + ) + )); + } } // __END__