|int<1, max>|string */ private static int|string $mb_error_char = ''; /** * set error char * * @param string|int|null $string The character to use to represent * error chars * "long" for long, "none" for none * or a valid code point in int * like 0x2234 (8756, ∴) * default character is ? (63) * if null is set then "none" * @return void */ public static function setErrorChar(string|int|null $string): void { if (empty($string)) { $string = 'none'; } // if not special string or char but code point if (in_array($string, ['none', 'long', 'entity'])) { self::$mb_error_char = $string; } else { // always convert to char for internal use self::$mb_error_char = \IntlChar::chr($string); // if string convert to code point if (is_string($string)) { $string = \IntlChar::ord($string); } } mb_substitute_character($string); } /** * get the current set error character * * @param bool $return_substitute_func if set to true return the set * character from the php function * directly * @return string|int Set error character */ public static function getErrorChar(bool $return_substitute_func = false): string|int { // return mb_substitute_character(); if ($return_substitute_func === true) { // if false abort with error if (($return = mb_substitute_character()) === false) { return self::$mb_error_char; } return $return; } else { return self::$mb_error_char; } } /** * test if a string can be safely convert between encodings. * mostly utf8 to shift jis * the default compare has a possibility of failure, especially with windows * it is recommended to the following in the script which uses this method: * mb_substitute_character(0x2234); * $class->mb_error_char = '∴'; * if check to Shift JIS * if check to ISO-2022-JP * if check to ISO-2022-JP-MS * set three dots (∴) as wrong character for correct convert error detect * (this char is used, because it is one of the least used ones) * * @param string $string string to test * @param string $from_encoding encoding of string to test * @param string $to_encoding target encoding * @return array|false false if no error or * array with failed characters */ public static function checkConvertEncoding( string $string, string $from_encoding, string $to_encoding ): array|false { // convert to target encoding and convert back $temp = mb_convert_encoding($string, $to_encoding, $from_encoding); if ($temp === false) { return false; } $compare = mb_convert_encoding($temp, $from_encoding, $to_encoding); if ($compare === false) { return false; } // if string does not match anymore we have a convert problem if ($string == $compare) { return false; } $failed = []; // go through each character and find the ones that do not match for ($i = 0, $iMax = mb_strlen($string, $from_encoding); $i < $iMax; $i++) { $char = mb_substr($string, $i, 1, $from_encoding); $r_char = mb_substr($compare, $i, 1, $from_encoding); // the ord 194 is a hack to fix the IE7/IE8 // bug with line break and illegal character if ( (($char != $r_char && (!self::$mb_error_char || in_array(self::$mb_error_char, ['none', 'long', 'entity']))) || ($char != $r_char && $r_char == self::$mb_error_char && self::$mb_error_char)) && ord($char) != 194 ) { $failed[] = $char; } } return $failed; } } // __END__