Files
development/www/lib/CoreLibs/Check/Encoding.php
Clemens Schwaighofer c5fed66237 PHP 8.5 fixes and updates
All tested with PHP 8.4 and PHP 8.3 too

Major changes:
- cube root Math (cbrt) now throws InvalidArgumentException if NAN is returned instead of returning NAN
- Byte convert from string to int will throw errors if value is too large (\LengthException)
- new flag for returning string type but for this bcmath must be installed (\RuntimeException if no bcmath)
- Updated curl class and remove close handler as not needed and deprecated as of PHP 8.5
- Curl phpunit tests: convert string to JSON convert flow for return content check (to avoid per PHP version check)
- image close handler for ImageMagick removed as not needed and deprecated as of PHP 8.5
- updated all check calls too use phive tools if possible (except phpunit) and all scripts can have dynamic php version set
2026-01-06 15:55:47 +09:00

127 lines
3.9 KiB
PHP

<?php
/*
* check if string is valid in target encoding
*/
declare(strict_types=1);
namespace CoreLibs\Check;
class Encoding
{
/** @var int<min, -1>|int<1, max>|string */
private static int|string $mb_error_char = '';
/**
* set error char
*
* @param string|int|null $string The character to use to represent
* error chars
* "long" for long, "none" for none
* or a valid code point in int
* like 0x2234 (8756, ∴)
* default character is ? (63)
* if null is set then "none"
* @return void
*/
public static function setErrorChar(string|int|null $string): void
{
if (empty($string)) {
$string = 'none';
}
// if not special string or char but code point
if (in_array($string, ['none', 'long', 'entity'])) {
self::$mb_error_char = $string;
} else {
// always convert to char for internal use
self::$mb_error_char = \IntlChar::chr($string);
// if string convert to code point
if (is_string($string)) {
$string = \IntlChar::ord($string);
}
}
mb_substitute_character($string);
}
/**
* get the current set error character
*
* @param bool $return_substitute_func if set to true return the set
* character from the php function
* directly
* @return string|int Set error character
*/
public static function getErrorChar(bool $return_substitute_func = false): string|int
{
// return mb_substitute_character();
if ($return_substitute_func === true) {
// if false abort with error
if (($return = mb_substitute_character()) === false) {
return self::$mb_error_char;
}
return $return;
} else {
return self::$mb_error_char;
}
}
/**
* test if a string can be safely convert between encodings.
* mostly utf8 to shift jis
* the default compare has a possibility of failure, especially with windows
* it is recommended to the following in the script which uses this method:
* mb_substitute_character(0x2234);
* $class->mb_error_char = '∴';
* if check to Shift JIS
* if check to ISO-2022-JP
* if check to ISO-2022-JP-MS
* set three dots (∴) as wrong character for correct convert error detect
* (this char is used, because it is one of the least used ones)
*
* @param string $string string to test
* @param string $from_encoding encoding of string to test
* @param string $to_encoding target encoding
* @return array<string>|false false if no error or
* array with failed characters
*/
public static function checkConvertEncoding(
string $string,
string $from_encoding,
string $to_encoding
): array|false {
// convert to target encoding and convert back
$temp = mb_convert_encoding($string, $to_encoding, $from_encoding);
if ($temp === false) {
return false;
}
$compare = mb_convert_encoding($temp, $from_encoding, $to_encoding);
if ($compare === false) {
return false;
}
// if string does not match anymore we have a convert problem
if ($string == $compare) {
return false;
}
$failed = [];
// go through each character and find the ones that do not match
for ($i = 0, $iMax = mb_strlen($string, $from_encoding); $i < $iMax; $i++) {
$char = mb_substr($string, $i, 1, $from_encoding);
$r_char = mb_substr($compare, $i, 1, $from_encoding);
// the ord 194 is a hack to fix the IE7/IE8
// bug with line break and illegal character
if (
(($char != $r_char && (!self::$mb_error_char ||
in_array(self::$mb_error_char, ['none', 'long', 'entity']))) ||
($char != $r_char && $r_char == self::$mb_error_char && self::$mb_error_char)) &&
ord($char[0]) != 194
) {
$failed[] = $char;
}
}
return $failed;
}
}
// __END__