Add new split string, update split string format, add create string from array list, char in char list, remove duplicates

NEW:
- remove duplicates in string
- check character string list in other character string list
- build character string from array (or nested array) values
- split string with fixed split length

UPDATE:
- split string with format
* throw exceptions for wrong paramters
* remove the "split chracters", as they get extracted from the format string
This commit is contained in:
Clemens Schwaighofer
2025-06-04 14:15:45 +09:00
parent c70cdf457f
commit d4db235e5b
3 changed files with 478 additions and 68 deletions

View File

@@ -24,117 +24,83 @@ final class CoreLibsConvertStringsTest extends TestCase
{
// 0: input
// 1: format
// 2: split characters as string, null for default
// 3: expected
return [
'all empty string' => [
'',
'',
null,
''
],
'empty input string' => [
'',
'2-2',
null,
''
],
'empty format string string' => [
'1234',
'',
null,
'1234'
],
'string format match' => [
'1234',
'2-2',
null,
'12-34'
],
'string format trailing match' => [
'1234',
'2-2-',
null,
'12-34'
],
'string format leading match' => [
'1234',
'-2-2',
null,
'12-34'
],
'string format double inside match' => [
'1234',
'2--2',
null,
'12--34',
],
'string format short first' => [
'1',
'2-2',
null,
'1'
],
'string format match first' => [
'12',
'2-2',
null,
'12'
],
'string format short second' => [
'123',
'2-2',
null,
'12-3'
],
'string format too long' => [
'1234567',
'2-2',
null,
'12-34-567'
],
'string format invalid format string' => [
'1234',
'2_2',
null,
'1234'
],
'different split character' => [
'1234',
'2_2',
'_',
'12_34'
],
'mixed split characters' => [
'123456',
'2-2_2',
'-_',
'12-34_56'
],
'length mixed' => [
'ABCD12345568ABC13',
'2-4_5-2#4',
'-_#',
'AB-CD12_34556-8A#BC13'
],
'split with split chars in string' => [
'12-34',
'2-2',
null,
'12--3-4'
],
'mutltibyte string' => [
'あいうえ',
'2-2',
null,
'あいうえ'
],
'mutltibyte split string' => [
'1234',
'-',
null,
'1234'
],
];
}
@@ -143,29 +109,132 @@ final class CoreLibsConvertStringsTest extends TestCase
*
* @covers ::splitFormatString
* @dataProvider splitFormatStringProvider
* @testdox splitFormatString $input with format $format and splitters $split_characters will be $expected [$_dataName]
* @testdox splitFormatString $input with format $format will be $expected [$_dataName]
*
* @param string $input
* @param string $format
* @param string|null $split_characters
* @param string $expected
* @return void
*/
public function testSplitFormatString(
string $input,
string $format,
string $expected
): void {
$output = \CoreLibs\Convert\Strings::splitFormatString(
$input,
$format,
);
$this->assertEquals(
$expected,
$output
);
}
/** check exceptions */
public function splitFormatStringExceptionProvider(): array
{
return [
'invalid format string' => [
'1234',
'2あ2',
],
'mutltibyte string' => [
'あいうえ',
'2-2',
],
'mutltibyte split string' => [
'1234',
'-',
],
];
}
/**
* Undocumented function
*
* @covers ::splitFormatStringFixed
* @dataProvider splitFormatStringExceptionProvider
* @testdox splitFormatString Exception catch checks for $input with $format[$_dataName]
*
* @return void
*/
public function testSplitFormatStringExceptions(string $input, string $format): void
{
// catch exception
$this->expectException(\InvalidArgumentException::class);
\CoreLibs\Convert\Strings::splitFormatString($input, $format);
}
/**
* test for split Format string fixed length
*
* @return array
*/
public function splitFormatStringFixedProvider(): array
{
return [
'normal split, default split char' => [
'abcdefg',
4,
null,
'abcd-efg'
],
'noraml split, other single split char' => [
'abcdefg',
4,
"=",
'abcd=efg'
],
'noraml split, other multiple split char' => [
'abcdefg',
4,
"-=-",
'abcd-=-efg'
],
'non ascii characters' => [
'あいうえお',
2,
"-",
'あい-うえ-お'
],
'empty string' => [
'',
4,
"-",
''
]
];
}
/**
* Undocumented function
*
* @covers ::splitFormatStringFixed
* @dataProvider splitFormatStringFixedProvider
* @testdox splitFormatStringFixed $input with length $split_length and split chars $split_characters will be $expected [$_dataName]
*
* @param string $input
* @param int $split_length
* @param string|null $split_characters
* @param string $expected
* @return void
*/
public function testSplitFormatStringFixed(
string $input,
int $split_length,
?string $split_characters,
string $expected
): void {
if ($split_characters === null) {
$output = \CoreLibs\Convert\Strings::splitFormatString(
$output = \CoreLibs\Convert\Strings::splitFormatStringFixed(
$input,
$format
$split_length
);
} else {
$output = \CoreLibs\Convert\Strings::splitFormatString(
$output = \CoreLibs\Convert\Strings::splitFormatStringFixed(
$input,
$format,
$split_length,
$split_characters
);
}
@@ -175,6 +244,36 @@ final class CoreLibsConvertStringsTest extends TestCase
);
}
public function splitFormatStringFixedExceptionProvider(): array
{
return [
'split length too short' => [
'abcdefg',
-1,
],
'split length longer than string' => [
'abcdefg',
20,
],
];
}
/**
* Undocumented function
*
* @covers ::splitFormatStringFixed
* @dataProvider splitFormatStringFixedExceptionProvider
* @testdox splitFormatStringFixed Exception catch checks for $input with $length [$_dataName]
*
* @return void
*/
public function testSplitFormatStringFixedExceptions(string $input, int $length): void
{
// catch exception
$this->expectException(\InvalidArgumentException::class);
\CoreLibs\Convert\Strings::splitFormatStringFixed($input, $length);
}
/**
* Undocumented function
*
@@ -378,6 +477,150 @@ final class CoreLibsConvertStringsTest extends TestCase
\CoreLibs\Convert\Strings::stripUTF8BomBytes($file)
);
}
/**
* Undocumented function
*
* @return array
*/
public function allCharsInSetProvider(): array
{
return [
'find' => [
'abc',
'abcdef',
true
],
'not found' => [
'abcz',
'abcdef',
false
]
];
}
/**
* Undocumented function
*
* @covers ::allCharsInSet
* @dataProvider allCharsInSetProvider
* @testdox allCharsInSet $input in $haystack with expected $expected [$_dataName]
*
* @param string $needle
* @param string $haystack
* @param bool $expected
* @return void
*/
public function testAllCharsInSet(string $needle, string $haystack, bool $expected): void
{
$this->assertEquals(
$expected,
\CoreLibs\Convert\Strings::allCharsInSet($needle, $haystack)
);
}
public function buildCharStringFromListsProvider(): array
{
return [
'test a' => [
'abc',
['a', 'b', 'c'],
],
'test b' => [
'abc123',
['a', 'b', 'c'],
['1', '2', '3'],
],
'test c: no params' => [
'',
],
'test c: empty 1' => [
'',
[]
],
'test nested' => [
'abc',
[['a'], ['b'], ['c']],
],
];
}
/**
* Undocumented function
*
* @covers ::buildCharStringFromLists
* @dataProvider buildCharStringFromListsProvider
* @testdox buildCharStringFromLists all $input convert to $expected [$_dataName]
*
* @param string $expected
* @param array ...$input
* @return void
*/
public function testBuildCharStringFromLists(string $expected, array ...$input): void
{
$this->assertEquals(
$expected,
\CoreLibs\Convert\Strings::buildCharStringFromLists(...$input)
);
}
/**
* Undocumented function
*
* @return array
*/
public function removeDuplicatesProvider(): array
{
return [
'test no change' => [
'ABCDEFG',
'ABCDEFG',
],
'test simple' => [
'aa',
'a'
],
'test keep lower and uppwer case' => [
'AaBbCc',
'AaBbCc'
],
'test unqiue' => [
'aabbcc',
'abc'
],
'test multibyte no change' => [
'あいうえお',
'あいうえお',
],
'test multibyte' => [
'ああいいううええおお',
'あいうえお',
],
'test multibyte special' => [
'あぁいぃうぅえぇおぉ',
'あぁいぃうぅえぇおぉ',
]
];
}
/**
* Undocumented function
*
* @covers ::removeDuplicates
* @dataProvider removeDuplicatesProvider
* @testdox removeDuplicates make $input unqiue to $expected [$_dataName]
*
* @param string $input
* @param string $expected
* @return void
*/
public function testRemoveDuplicates(string $input, string $expected): void
{
$this->assertEquals(
$expected,
\CoreLibs\Convert\Strings::removeDuplicates($input)
);
}
}
// __END__

View File

@@ -14,6 +14,9 @@ require 'config.php';
$LOG_FILE_ID = 'classTest-string';
ob_end_flush();
use CoreLibs\Convert\Strings;
use CoreLibs\Debug\Support as DgS;
$log = new CoreLibs\Logging\Logging([
'log_folder' => BASE . LOG,
'log_file_id' => $LOG_FILE_ID,
@@ -29,6 +32,7 @@ print '<div><a href="class_test.php">Class Test Master</a></div>';
print '<div><h1>' . $PAGE_NAME . '</h1></div>';
$split = '4-4-4';
$split_length = 4;
$test_strings = [
'13',
'1234',
@@ -40,20 +44,59 @@ $test_strings = [
];
foreach ($test_strings as $string) {
print "Convert: $string with $split to: "
. \CoreLibs\Convert\Strings::splitFormatString($string, $split)
print "A) Convert: $string with $split to: "
. Strings::splitFormatString($string, $split)
. "<br>";
try {
print "B) Convert: $string with $split_length to: "
. Strings::splitFormatStringFixed($string, $split_length)
. "<br>";
} catch (Exception $e) {
print "Split not possible: " . $e->getMessage() . "<br>";
}
}
$split = '2_2';
$split_length = 2;
$string = '1234';
print "Convert: $string with $split to: "
. \CoreLibs\Convert\Strings::splitFormatString($string, $split)
print "A) Convert: $string with $split to: "
. Strings::splitFormatString($string, $split)
. "<br>";
print "B) Convert: $string with $split_length to: "
. Strings::splitFormatStringFixed($string, $split_length, "_")
. "<br>";
$split = '2-2';
$string = 'あいうえ';
print "Convert: $string with $split to: "
. \CoreLibs\Convert\Strings::splitFormatString($string, $split)
try {
print "Convert: $string with $split to: "
. Strings::splitFormatString($string, $split)
. "<br>";
} catch (\Exception $e) {
print "Cannot split string: " . $e->getMessage() . "<br>";
}
print "B) Convert: $string with $split_length to: "
. Strings::splitFormatStringFixed($string, $split_length, "-")
. "<br>";
$string = 'ABCD12345568ABC13';
$format = '2-4_5-2#4';
$output = 'AB-CD12_34556-8A#BC13';
print "A) Convert: $string with $format to: "
. Strings::splitFormatString($string, $format)
. "<br>";
// try other split calls
$string = "ABCDE";
$split_length = 2;
$split_char = "-=-";
print "Convert: $string with $split_length / $split_char to: "
. Strings::splitFormatStringFixed($string, $split_length, $split_char)
. "<br>";
$string = "あいうえお";
$split_length = 2;
$split_char = "-=-";
print "Convert: $string with $split_length / $split_char to: "
. Strings::splitFormatStringFixed($string, $split_length, $split_char)
. "<br>";
$test_splits = [
@@ -63,9 +106,27 @@ $test_splits = [
'2-3-4',
];
foreach ($test_splits as $split) {
print "$split with count: " . \CoreLibs\Convert\Strings::countSplitParts($split) . "<br>";
print "$split with count: " . Strings::countSplitParts($split) . "<br>";
}
// check char list in list
$needle = "abc";
$haystack = "abcdefg";
print "Needle: " . $needle . ", Haysteck: " . $haystack . ": "
. DgS::prBl(Strings::allCharsInSet($needle, $haystack)) . "<br>";
$needle = "abcz";
print "Needle: " . $needle . ", Haysteck: " . $haystack . ": "
. DgS::prBl(Strings::allCharsInSet($needle, $haystack)) . "<br>";
print "Combined strings A: "
. Strings::buildCharStringFromLists(['A', 'B', 'C'], ['0', '1', '2']) . "<br>";
print "Combined strings B: "
. Strings::buildCharStringFromLists([['F'], ['G'], 'H'], [['5', ['6']], ['0'], '1', '2']) . "<br>";
$input_string = "AaBbCc";
print "Unique: " . Strings::removeDuplicates($input_string) . "<br>";
print "Unique: " . Strings::removeDuplicates(strtolower($input_string)) . "<br>";
print "</body></html>";
// __END__

View File

@@ -8,6 +8,8 @@ declare(strict_types=1);
namespace CoreLibs\Convert;
use CoreLibs\Combined\ArrayHandler;
class Strings
{
/**
@@ -52,29 +54,37 @@ class Strings
* Note a string LONGER then the maxium will be attached with the LAST
* split character. In above exmaple
* ABCD1234EFGHTOOLONG will be ABCD-1234-EFGH-TOOLONG
* If the characters are NOT ASCII it will return the string as is
*
* @param string $value string value to split
* @param string $string string value to split
* @param string $split_format split format
* @param string $split_characters list of charcters with which we split
* if not set uses dash ('-')
* @return string split formatted string or original value if not chnaged
* @throws \InvalidArgumentException for empty split format, invalid values, split characters or split format
*/
public static function splitFormatString(
string $value,
string $string,
string $split_format,
string $split_characters = '-'
): string {
if (
// abort if split format is empty
empty($split_format) ||
// if not in the valid ASCII character range for any of the strings
preg_match('/[^\x20-\x7e]/', $value) ||
// preg_match('/[^\x20-\x7e]/', $split_format) ||
preg_match('/[^\x20-\x7e]/', $split_characters) ||
// only numbers and split characters in split_format
!preg_match("/[0-9" . $split_characters . "]/", $split_format)
) {
return $value;
// skip if string or split format is empty is empty
if (empty($string) || empty($split_format)) {
return $string;
}
if (preg_match('/[^\x20-\x7e]/', $string)) {
throw new \InvalidArgumentException(
"The string to split can only be ascii characters: " . $string
);
}
// get the split characters that are not numerical and check they are ascii
$split_characters = self::removeDuplicates(preg_replace('/[0-9]/', '', $split_format));
if (preg_match('/[^\x20-\x7e]/', $split_characters)) {
throw new \InvalidArgumentException(
"The split character has to be a valid ascii character: " . $split_characters
);
}
if (!preg_match("/^[0-9" . $split_characters . "]+$/", $split_format)) {
throw new \InvalidArgumentException(
"The split format can only be numbers and the split characters: " . $split_format
);
}
// split format list
$split_list = preg_split(
@@ -86,14 +96,14 @@ class Strings
);
// if this is false, or only one array, abort split
if (!is_array($split_list) || count($split_list) == 1) {
return $value;
return $string;
}
$out = '';
$pos = 0;
$last_split = '';
foreach ($split_list as $offset) {
if (is_numeric($offset)) {
$_part = substr($value, $pos, (int)$offset);
$_part = substr($string, $pos, (int)$offset);
if (empty($_part)) {
break;
}
@@ -104,8 +114,8 @@ class Strings
$last_split = $offset;
}
}
if (!empty($out) && $pos < strlen($value)) {
$out .= $last_split . substr($value, $pos);
if (!empty($out) && $pos < strlen($string)) {
$out .= $last_split . substr($string, $pos);
}
// if last is not alphanumeric remove, remove
if (!strcspn(substr($out, -1, 1), $split_characters)) {
@@ -115,10 +125,49 @@ class Strings
if (!empty($out)) {
return $out;
} else {
return $value;
return $string;
}
}
/**
* Split a string into n-length blocks with a split character inbetween
* This is simplified version from splitFormatString that uses
* fixed split length with a characters, this evenly splits the string out into the
* given length
* This works with non ASCII characters too
*
* @param string $string string to split
* @param int $split_length split length, must be smaller than string and larger than 0
* @param string $split_characters [default=-] the character to split, can be more than one
* @return string
* @throws \InvalidArgumentException Thrown if split length style is invalid
*/
public static function splitFormatStringFixed(
string $string,
int $split_length,
string $split_characters = '-'
): string {
// if empty string or if split lenght is 0 or empty split characters
// then we skip any splitting
if (empty($string) || $split_length == 0 || empty($split_characters)) {
return $string;
}
$return_string = '';
$string_length = mb_strlen($string);
// check that the length is not too short
if ($split_length < 1 || $split_length >= $string_length) {
throw new \InvalidArgumentException(
"The split length must be at least 1 character and less than the string length to split. "
. "Split length: " . $split_length . ", string length: " . $string_length
);
}
for ($i = 0; $i < $string_length; $i += $split_length) {
$return_string .= mb_substr($string, $i, $split_length) . $split_characters;
}
// remove last trailing character which is always the split char length
return mb_substr($return_string, 0, -1 * mb_strlen($split_characters));
}
/**
* Strip any duplicated slahes from a path
* eg: //foo///bar/foo.inc -> /foo/bar/foo.inc
@@ -146,6 +195,63 @@ class Strings
{
return trim($text, pack('H*', 'EFBBBF'));
}
/**
* Make as string of characters unique
*
* @param string $string
* @return string
*/
public static function removeDuplicates(string $string): string
{
// combine again
$result = implode(
'',
// unique list
array_unique(
// split into array
mb_str_split($string)
)
);
return $result;
}
/**
* check if all characters are in set
*
* @param string $needle Needle to search
* @param string $haystack Haystack to search in
* @return bool True on found, False if not in haystack
*/
public static function allCharsInSet(string $needle, string $haystack): bool
{
$input_length = strlen($needle);
for ($i = 0; $i < $input_length; $i++) {
if (strpos($haystack, $needle[$i]) === false) {
return false;
}
}
return true;
}
/**
* converts a list of arrays of strings into a string of unique entries
* input arrays can be nested, only values are used
*
* @param array<mixed> ...$char_lists
* @return string
*/
public static function buildCharStringFromLists(array ...$char_lists): string
{
return implode('', array_unique(
ArrayHandler::flattenArray(
array_merge(...$char_lists)
)
));
}
}
// __END__