all the preg error messages */ public const array PREG_ERROR_MESSAGES = [ PREG_NO_ERROR => 'No error', PREG_INTERNAL_ERROR => 'Internal PCRE error', PREG_BACKTRACK_LIMIT_ERROR => 'Backtrack limit exhausted', PREG_RECURSION_LIMIT_ERROR => 'Recursion limit exhausted', PREG_BAD_UTF8_ERROR => 'Malformed UTF-8 data', PREG_BAD_UTF8_OFFSET_ERROR => 'Bad UTF-8 offset', PREG_JIT_STACKLIMIT_ERROR => 'JIT stack limit exhausted' ]; /** * return the number of elements in the split list * 0 if nothing / invalid split * 1 if no split character found * n for the numbers in the split list * * @param string $split_format * @param string $split_characters * @return int */ public static function countSplitParts( string $split_format, string $split_characters = '-' ): int { if ( empty($split_format) || // non valid characters inside, abort !preg_match("/^[0-9" . $split_characters . "]/", $split_format) || preg_match('/[^\x20-\x7e]/', $split_characters) ) { return 0; } $split_list = preg_split( // allowed split characters "/([" . $split_characters . "]{1})/", $split_format ); if (!is_array($split_list)) { return 0; } return count(array_filter($split_list)); } /** * split format a string base on a split format string * split format string is eg * 4-4-4 that means 4 characters DASH 4 characters DASH 4 characters * So a string in the format of * ABCD1234EFGH will be ABCD-1234-EFGH * Note a string LONGER then the maxium will be attached with the LAST * split character. In above exmaple * ABCD1234EFGHTOOLONG will be ABCD-1234-EFGH-TOOLONG * If the characters are NOT ASCII it will return the string as is * * @param string $string string value to split * @param string $split_format split format * @return string split formatted string or original value if not chnaged * @throws \InvalidArgumentException for empty split format, invalid values, split characters or split format */ public static function splitFormatString( string $string, string $split_format, ): string { // skip if string or split format is empty is empty if (empty($string) || empty($split_format)) { return $string; } if (preg_match('/[^\x20-\x7e]/', $string)) { throw new \InvalidArgumentException( "The string to split can only be ascii characters: " . $string ); } // get the split characters that are not numerical and check they are ascii $split_characters = self::removeDuplicates(preg_replace('/[0-9]/', '', $split_format) ?: ''); if (empty($split_characters)) { throw new \InvalidArgumentException( "A split character must exist in the format string: " . $split_format ); } if (preg_match('/[^\x20-\x7e]/', $split_characters)) { throw new \InvalidArgumentException( "The split character has to be a valid ascii character: " . $split_characters ); } if (!preg_match("/^[0-9" . $split_characters . "]+$/", $split_format)) { throw new \InvalidArgumentException( "The split format can only be numbers and the split characters: " . $split_format ); } // split format list $split_list = preg_split( // allowed split characters "/([" . $split_characters . "]{1})/", $split_format, -1, PREG_SPLIT_DELIM_CAPTURE ); // if this is false, or only one array, abort split if (!is_array($split_list) || count($split_list) == 1) { return $string; } $out = ''; $pos = 0; $last_split = ''; foreach ($split_list as $offset) { if (is_numeric($offset)) { $_part = substr($string, $pos, (int)$offset); if (empty($_part)) { break; } $out .= $_part; $pos += (int)$offset; } elseif ($pos) { // if first, do not add $out .= $offset; $last_split = $offset; } } if (!empty($out) && $pos < strlen($string)) { $out .= $last_split . substr($string, $pos); } // if last is not alphanumeric remove, remove if (!strcspn(substr($out, -1, 1), $split_characters)) { $out = substr($out, 0, -1); } // overwrite only if out is set if (!empty($out)) { return $out; } else { return $string; } } /** * Split a string into n-length blocks with a split character inbetween * This is simplified version from splitFormatString that uses * fixed split length with a characters, this evenly splits the string out into the * given length * This works with non ASCII characters too * * @param string $string string to split * @param int $split_length split length, must be smaller than string and larger than 0 * @param string $split_characters [default=-] the character to split, can be more than one * @return string * @throws \InvalidArgumentException Thrown if split length style is invalid */ public static function splitFormatStringFixed( string $string, int $split_length, string $split_characters = '-' ): string { // if empty string or if split lenght is 0 or empty split characters // then we skip any splitting if (empty($string) || $split_length == 0 || empty($split_characters)) { return $string; } $return_string = ''; $string_length = mb_strlen($string); // check that the length is not too short if ($split_length < 1 || $split_length >= $string_length) { throw new \InvalidArgumentException( "The split length must be at least 1 character and less than the string length to split. " . "Split length: " . $split_length . ", string length: " . $string_length ); } for ($i = 0; $i < $string_length; $i += $split_length) { $return_string .= mb_substr($string, $i, $split_length) . $split_characters; } // remove last trailing character which is always the split char length return mb_substr($return_string, 0, -1 * mb_strlen($split_characters)); } /** * Strip any duplicated slahes from a path * eg: //foo///bar/foo.inc -> /foo/bar/foo.inc * * @param string $path Path to strip slashes from * @return string Clean path, on error returns original path */ public static function stripMultiplePathSlashes(string $path): string { return preg_replace( '#/+#', '/', $path ) ?? $path; } /** * Remove UTF8 BOM Byte string from line * Note: this is often found in CSV files exported from Excel at the first row, first element * * @param string $text * @return string */ public static function stripUTF8BomBytes(string $text): string { return trim($text, pack('H*', 'EFBBBF')); } /** * Make as string of characters unique * * @param string $string * @return string */ public static function removeDuplicates(string $string): string { // combine again $result = implode( '', // unique list array_unique( // split into array mb_str_split($string) ) ); return $result; } /** * check if all characters are in set * * @param string $needle Needle to search * @param string $haystack Haystack to search in * @return bool True on found, False if not in haystack */ public static function allCharsInSet(string $needle, string $haystack): bool { $input_length = strlen($needle); for ($i = 0; $i < $input_length; $i++) { if (strpos($haystack, $needle[$i]) === false) { return false; } } return true; } /** * converts a list of arrays of strings into a string of unique entries * input arrays can be nested, only values are used * * @param array ...$char_lists * @return string */ public static function buildCharStringFromLists(array ...$char_lists): string { return implode('', array_unique( ArrayHandler::flattenArray( array_merge(...$char_lists) ) )); } /** * Split up character ranges in format A-Z, a-z, 0-9 * * @param string $input * @return string[] */ public static function parseCharacterRanges(string $input): array { // if not alphanumeric, throw value error if (!preg_match("/^[A-Za-z0-9\-\s]+$/u", $input)) { throw new \InvalidArgumentException( "The input string contains invalid characters, " . "only alphanumeric, dash (-), space and 'or' are allowed: " . $input ); } // Remove all spaces $input = str_replace(' ', '', $input); $result = []; // if there is no - inside, return unique characters as array if (strpos($input, '-') === false) { return array_unique(mb_str_split($input)); } // Find all patterns like "A-Z" (character-dash-character) preg_match_all('/(.)-(.)/u', $input, $matches, PREG_SET_ORDER); foreach ($matches as $match) { $start = $match[1]; $end = $match[2]; // Get ASCII/Unicode values $startOrd = ord($start[0]); $endOrd = ord($end[0]); // make sure start is before end if ($startOrd > $endOrd) { [$startOrd, $endOrd] = [$endOrd, $startOrd]; } // Generate range of characters for ($i = $startOrd; $i <= $endOrd; $i++) { $char = chr($i); if (!in_array($char, $result)) { $result[] = $char; } } } // make the result unique $result = array_unique($result); return $result; } /** * Check if a regex is valid. Does not return the detail regex parser error * * @param string $pattern Any regex string * @return bool False on invalid regex */ public static function isValidRegex(string $pattern): bool { preg_last_error(); try { $var = ''; @preg_match($pattern, $var); return preg_last_error() === PREG_NO_ERROR; } catch (\Error $e) { return false; } } /** * Returns the last preg error messages as string * all messages are defined in PREG_ERROR_MESSAGES * * @return string */ public static function getLastRegexErrorString(): string { return self::PREG_ERROR_MESSAGES[preg_last_error()] ?? 'Unknown error'; } /** * check if a regex is invalid, returns array with flag and error string * * @param string $pattern * @return array{valid:bool,preg_error:int,error:null|string,pcre_error:null|string} */ public static function validateRegex(string $pattern): array { // Clear any previous PCRE errors preg_last_error(); $var = ''; if (@preg_match($pattern, $var) === false) { $error = preg_last_error(); return [ 'valid' => false, 'preg_error' => $error, 'error' => self::PREG_ERROR_MESSAGES[$error] ?? 'Unknown error', 'pcre_error' => preg_last_error_msg(), ]; } return ['valid' => true, 'preg_error' => PREG_NO_ERROR, 'error' => null, 'pcre_error' => null]; } } // __END__