Move Encoding class away from the Language namespace

Language\Encoding::__mbMimeEncode -> Convert\MimeEncode::__mbMimeEncode
Langauge\Encoding::checkConvertEncoding -> Check\Encoding::checkConvertEncoding
Langauge\Encoding::setErrorChar -> Check\Encoding::setErrorChar
Langauge\Encoding::getErrorChar -> Encoding::getErrorChar
Langauge\Encoding::convertEncoding -> Convert\Encoding::convertEncoding

Also fixed encoding check that not only a code point but a string can
also be used as a parameter.

Update phpunit tests and split them out for each class

Normal test page is still combined for all classes but updated to
correctly use each class
This commit is contained in:
Clemens Schwaighofer
2022-04-13 09:25:42 +09:00
parent a3c49e408a
commit 6f4c5e36e6
12 changed files with 618 additions and 401 deletions

View File

@@ -0,0 +1,117 @@
<?php
declare(strict_types=1);
namespace tests;
use PHPUnit\Framework\TestCase;
/**
* Test class for Check\Encoding
* @coversDefaultClass \CoreLibs\Check\Encoding
* @testdox \CoreLibs\Check\Encoding method tests
*/
final class CoreLibsCheckEncodingTest extends TestCase
{
/**
* Undocumented function
*
* @return array
*/
public function checkConvertEncodingProvider(): array
{
return [
// 0: string to test
// 1: source encoding
// 2: target encoding
// 3: substitue character
// 4: false for ok, array with error list
'valid test UTF-8 to SJIS (default)' => [
'日本語',
'UTF-8',
'SJIS',
null,
false
],
'invalid test UTF-8 to SJIS (dots as code point)' => [
'❶',
'UTF-8',
'SJIS',
0x2234,
['❶']
],
'invalid test UTF-8 to SJIS (dots as string)' => [
'❶',
'UTF-8',
'SJIS',
'∴',
['❶']
],
'invalid test UTF-8 to SJIS (none)' => [
'❶',
'UTF-8',
'SJIS',
'none',
['❶']
],
'invalid test UTF-8 to SJIS (long)' => [
'❶',
'UTF-8',
'SJIS',
'long',
['❶']
],
'invalid test UTF-8 to SJIS (entity)' => [
'❶',
'UTF-8',
'SJIS',
'entity',
['❶']
],
];
}
/**
* Undocumented function
*
* @covers ::checkConvertEncoding
* @dataProvider checkConvertEncodingProvider
* @testdox check encoding convert from $from_encoding to $to_encoding [$_dataName]
*
* @param string $input
* @param string $from_encoding
* @param string $to_encoding
* @param string|int|null $error_char
* @param array|bool $expected
* @return void
*/
public function testCheckConvertEncoding(
string $input,
string $from_encoding,
string $to_encoding,
$error_char,
$expected
): void {
if ($error_char !== null) {
\CoreLibs\Check\Encoding::setErrorChar($error_char);
if (!in_array($error_char, ['none', 'long', 'entity'])) {
$this->assertEquals(
\IntlChar::chr($error_char),
\CoreLibs\Check\Encoding::getErrorChar()
);
} else {
$this->assertEquals(
$error_char,
\CoreLibs\Check\Encoding::getErrorChar()
);
}
}
$return = \CoreLibs\Check\Encoding::checkConvertEncoding($input, $from_encoding, $to_encoding);
$this->assertEquals(
$expected,
$return
);
}
}
// __END__

View File

@@ -0,0 +1,102 @@
<?php
declare(strict_types=1);
namespace tests;
use PHPUnit\Framework\TestCase;
/**
* Test class for Convert\Encoding
* @coversDefaultClass \CoreLibs\Convert\Encoding
* @testdox \CoreLibs\Convert\Encoding method tests
*/
final class CoreLibsConvertEncodingTest extends TestCase
{
/**
* Undocumented function
*
* @return array
*/
public function convertEncodingProvider(): array
{
return [
// 0: original string
// 1: target encoding
// 2: optional source encoding
// 3: auto check (not used)
// 4: expected string
// 5: expected string encoding
'simple from UTF-8 to SJIS' => [
'input string',
'SJIS',
null,
null,
'input string',
'SJIS'
],
'kanji from UTF-8 to SJIS' => [
'日本語',
'SJIS',
null,
null,
'日本語',
'SJIS'
],
'kanji from UTF-8 to SJIS with source' => [
'日本語',
'SJIS',
'UTF-8',
null,
'日本語',
'SJIS'
],
];
}
/**
* Undocumented function
*
* @covers ::convertEncoding
* @dataProvider convertEncodingProvider
* @testdox convert encoding $target_encoding, source: $source_encoding, auto: $auto_check [$_dataName]
*
* @param string $input
* @param string $target_encoding
* @param string $source_encoding
* @param bool $auto_check
* @param string $expected
* @param string $expected_encoding
* @return void
*/
public function testConvertEncoding(
string $input,
string $target_encoding,
?string $source_encoding,
?bool $auto_check,
string $expected,
string $expected_encoding
): void {
if ($source_encoding === null and $auto_check === null) {
$string = \CoreLibs\Convert\Encoding::convertEncoding($input, $target_encoding);
} elseif ($auto_check === null) {
$string = \CoreLibs\Convert\Encoding::convertEncoding($input, $target_encoding, $source_encoding);
} else {
$string = \CoreLibs\Convert\Encoding::convertEncoding(
$input,
$target_encoding,
$source_encoding,
$auto_check
);
}
// because we can't store encoding in here anyway
$target = mb_convert_encoding($expected, $expected_encoding, 'UTF-8');
// print "IN: $input, $target_encoding\n";
$this->assertEquals(
$target,
$string
);
}
}
// __END__

View File

@@ -0,0 +1,101 @@
<?php
declare(strict_types=1);
namespace tests;
use PHPUnit\Framework\TestCase;
/**
* Test class for Convert\MimeEncode
* @coversDefaultClass \CoreLibs\Convert\MimeEncode
* @testdox \CoreLibs\Convert\MimeEncode method tests
*/
final class CoreLibsConvertMimeEncodeTest extends TestCase
{
/**
* Undocumented function
*
* @return array
*/
public function mbMimeEncodeProvider(): array
{
return [
// 0: input string
// 1: encoding
// 2: expected
'standard UTF-8' => [
'Test string',
'UTF-8',
'Test string'
],
'long text UTF-8' => [
'The quick brown fox jumps over the lazy sheep that sleeps in the ravine '
. 'and has no idea what is going on here',
'UTF-8',
'The quick brown fox jumps over the lazy sheep that sleeps in the ravine '
. 'and has no idea what is going on here'
],
'standard with special chars UTF-8' => [
'This is ümläßtと漢字もカタカナ!^$%&',
'UTF-8',
'This is =?UTF-8?B?w7xtbMOkw59044Go5ryi5a2X44KC44Kr44K/44Kr44OK77yBIV4k?='
. "\r\n"
. ' =?UTF-8?B?JQ==?=&'
],
'35 chars and space at the end UTF-8' => [
'12345678901234567890123456789012345 '
. 'is there a space?',
'UTF-8',
'12345678901234567890123456789012345 '
. 'is there a =?UTF-8?B?c3BhY2U/?='
],
'36 chars and space at the end UTF-8' => [
'123456789012345678901234567890123456 '
. 'is there a space?',
'UTF-8',
'123456789012345678901234567890123456 '
. 'is there a =?UTF-8?B?c3BhY2U/?='
],
'36 kanji and space UTF-8' => [
'カタカナカタカナかなカタカナカタカナかなカタカナカタカナかなカタカナカタ '
. 'is there a space?',
'UTF-8',
"=?UTF-8?B?44Kr44K/44Kr44OK44Kr44K/44Kr44OK44GL44Gq44Kr44K/44Kr44OK44Kr?=\r\n"
. " =?UTF-8?B?44K/44Kr44OK?=\r\n"
. " =?UTF-8?B?44GL44Gq44Kr44K/44Kr44OK44Kr44K/44Kr44OK44GL44Gq44Kr44K/44Kr?=\r\n"
. " =?UTF-8?B?44OK44Kr44K/?= is there a =?UTF-8?B?c3BhY2U/?="
]
];
}
/**
* mb mime header encoding test
*
* @covers ::__mbMimeEncode
* @dataProvider mbMimeEncodeProvider
* @testdox mb encoding target $encoding [$_dataName]
*
* @return void
*/
public function testUuMbMimeEncode(string $input, string $encoding, string $expected): void
{
// encode string first
$encoded = \CoreLibs\Convert\MimeEncode::__mbMimeEncode($input, $encoding);
// print "MIME: -" . $encoded . "-\n";
$this->assertEquals(
$expected,
$encoded
);
$decoded = mb_decode_mimeheader($encoded);
// print "INPUT : " . $input . "\n";
// print "DECODED: " . $decoded . "\n";
// back compare decoded
$this->assertEquals(
$input,
$decoded
);
}
}
// __END__

View File

@@ -1,279 +0,0 @@
<?php
declare(strict_types=1);
namespace tests;
use PHPUnit\Framework\TestCase;
/**
* Test class for Language\Encoding
* @coversDefaultClass \CoreLibs\Language\Encoding
* @testdox \CoreLibs\Language\Encoding method tests
*/
final class CoreLibsLanguageEncodingTest extends TestCase
{
/**
* Undocumented function
*
* @return array
*/
public function mbMimeEncodeProvider(): array
{
return [
// 0: input string
// 1: encoding
// 2: expected
'standard UTF-8' => [
'Test string',
'UTF-8',
'Test string'
],
'long text UTF-8' => [
'The quick brown fox jumps over the lazy sheep that sleeps in the ravine '
. 'and has no idea what is going on here',
'UTF-8',
'The quick brown fox jumps over the lazy sheep that sleeps in the ravine '
. 'and has no idea what is going on here'
],
'standard with special chars UTF-8' => [
'This is ümläßtと漢字もカタカナ!^$%&',
'UTF-8',
'This is =?UTF-8?B?w7xtbMOkw59044Go5ryi5a2X44KC44Kr44K/44Kr44OK77yBIV4k?='
. "\r\n"
. ' =?UTF-8?B?JQ==?=&'
],
'35 chars and space at the end UTF-8' => [
'12345678901234567890123456789012345 '
. 'is there a space?',
'UTF-8',
'12345678901234567890123456789012345 '
. 'is there a =?UTF-8?B?c3BhY2U/?='
],
'36 chars and space at the end UTF-8' => [
'123456789012345678901234567890123456 '
. 'is there a space?',
'UTF-8',
'123456789012345678901234567890123456 '
. 'is there a =?UTF-8?B?c3BhY2U/?='
],
'36 kanji and space UTF-8' => [
'カタカナカタカナかなカタカナカタカナかなカタカナカタカナかなカタカナカタ '
. 'is there a space?',
'UTF-8',
"=?UTF-8?B?44Kr44K/44Kr44OK44Kr44K/44Kr44OK44GL44Gq44Kr44K/44Kr44OK44Kr?=\r\n"
. " =?UTF-8?B?44K/44Kr44OK?=\r\n"
. " =?UTF-8?B?44GL44Gq44Kr44K/44Kr44OK44Kr44K/44Kr44OK44GL44Gq44Kr44K/44Kr?=\r\n"
. " =?UTF-8?B?44OK44Kr44K/?= is there a =?UTF-8?B?c3BhY2U/?="
]
];
}
/**
* mb mime header encoding test
*
* @covers ::__mbMimeEncode
* @dataProvider mbMimeEncodeProvider
* @testdox mb encoding target $encoding [$_dataName]
*
* @return void
*/
public function testUuMbMimeEncode(string $input, string $encoding, string $expected): void
{
// encode string first
$encoded = \CoreLibs\Language\Encoding::__mbMimeEncode($input, $encoding);
// print "MIME: -" . $encoded . "-\n";
$this->assertEquals(
$expected,
$encoded
);
$decoded = mb_decode_mimeheader($encoded);
// print "INPUT : " . $input . "\n";
// print "DECODED: " . $decoded . "\n";
// back compare decoded
$this->assertEquals(
$input,
$decoded
);
}
/**
* Undocumented function
*
* @return array
*/
public function convertEncodingProvider(): array
{
return [
// 0: original string
// 1: target encoding
// 2: optional source encoding
// 3: auto check (not used)
// 4: expected string
// 5: expected string encoding
'simple from UTF-8 to SJIS' => [
'input string',
'SJIS',
null,
null,
'input string',
'SJIS'
],
'kanji from UTF-8 to SJIS' => [
'日本語',
'SJIS',
null,
null,
'日本語',
'SJIS'
],
'kanji from UTF-8 to SJIS with source' => [
'日本語',
'SJIS',
'UTF-8',
null,
'日本語',
'SJIS'
],
];
}
/**
* Undocumented function
*
* @covers ::convertEncoding
* @dataProvider convertEncodingProvider
* @testdox convert encoding $target_encoding, source: $source_encoding, auto: $auto_check [$_dataName]
*
* @param string $input
* @param string $target_encoding
* @param string $source_encoding
* @param bool $auto_check
* @param string $expected
* @param string $expected_encoding
* @return void
*/
public function testConvertEncoding(
string $input,
string $target_encoding,
?string $source_encoding,
?bool $auto_check,
string $expected,
string $expected_encoding
): void {
if ($source_encoding === null and $auto_check === null) {
$string = \CoreLibs\Language\Encoding::convertEncoding($input, $target_encoding);
} elseif ($auto_check === null) {
$string = \CoreLibs\Language\Encoding::convertEncoding($input, $target_encoding, $source_encoding);
} else {
$string = \CoreLibs\Language\Encoding::convertEncoding(
$input,
$target_encoding,
$source_encoding,
$auto_check
);
}
// because we can't store encoding in here anyway
$target = mb_convert_encoding($expected, $expected_encoding, 'UTF-8');
// print "IN: $input, $target_encoding\n";
$this->assertEquals(
$target,
$string
);
}
/**
* Undocumented function
*
* @return array
*/
public function checkConvertEncodingProvider(): array
{
return [
// 0: string to test
// 1: source encoding
// 2: target encoding
// 3: substitue character
// 4: false for ok, array with error list
'valid test UTF-8 to SJIS (default)' => [
'日本語',
'UTF-8',
'SJIS',
null,
false
],
'invalid test UTF-8 to SJIS (dots)' => [
'❶',
'UTF-8',
'SJIS',
0x2234,
['❶']
],
'invalid test UTF-8 to SJIS (none)' => [
'❶',
'UTF-8',
'SJIS',
'none',
['❶']
],
'invalid test UTF-8 to SJIS (long)' => [
'❶',
'UTF-8',
'SJIS',
'long',
['❶']
],
'invalid test UTF-8 to SJIS (entity)' => [
'❶',
'UTF-8',
'SJIS',
'entity',
['❶']
],
];
}
/**
* Undocumented function
*
* @covers ::checkConvertEncoding
* @dataProvider checkConvertEncodingProvider
* @testdox check encoding convert from $from_encoding to $to_encoding [$_dataName]
*
* @param string $input
* @param string $from_encoding
* @param string $to_encoding
* @param string|int|null $error_char
* @param array|bool $expected
* @return void
*/
public function testCheckConvertEncoding(
string $input,
string $from_encoding,
string $to_encoding,
$error_char,
$expected
): void {
if ($error_char !== null) {
\CoreLibs\Language\Encoding::setErrorChar($error_char);
if (!in_array($error_char, ['none', 'long', 'entity'])) {
$this->assertEquals(
\IntlChar::chr($error_char),
\CoreLibs\Language\Encoding::getErrorChar()
);
} else {
$this->assertEquals(
$error_char,
\CoreLibs\Language\Encoding::getErrorChar()
);
}
}
$return = \CoreLibs\Language\Encoding::checkConvertEncoding($input, $from_encoding, $to_encoding);
$this->assertEquals(
$expected,
$return
);
}
}
// __END__

View File

@@ -29,7 +29,9 @@ if (!defined('SET_SESSION_NAME')) {
$LOG_FILE_ID = 'classTest-encoding';
ob_end_flush();
use CoreLibs\Language\Encoding;
use CoreLibs\Convert\Encoding as ConEnc;
use CoreLibs\Check\Encoding as ChkEnc;
use CoreLibs\Convert\MimeEncode;
$log = new CoreLibs\Debug\Logging([
'log_folder' => BASE . LOG,
@@ -41,10 +43,12 @@ $log = new CoreLibs\Debug\Logging([
'echo_all' => $ECHO_ALL ?? false,
'print_all' => $PRINT_ALL ?? false,
]);
$_encoding = new CoreLibs\Language\Encoding();
$encoding_class = 'CoreLibs\Language\Encoding';
// class type
$_chk_enc = new CoreLibs\Check\Encoding();
$_con_enc = new CoreLibs\Convert\Encoding();
$chk_enc = 'CoreLibs\Check\Encoding';
print "<html><head><title>TEST CLASS: ENCODING</title><head>";
print "<html><head><title>TEST CLASS: ENCODING (CHECK/CONVERT/MIME)</title><head>";
print "<body>";
print '<div><a href="class_test.php">Class Test Master</a></div>';
@@ -58,7 +62,7 @@ $mime_encodes = [
['日本語ながい日本語ながい日本語ながい日本語ながい日本語ながい日本語ながい日本語ながい', 'ISO-2022-JP-MS'],
];
foreach ($mime_encodes as $mime_encode) {
print "__MBMIMEENCODE: $mime_encode[0]: " . Encoding::__mbMimeEncode($mime_encode[0], $mime_encode[1]) . "<br>";
print "__MBMIMEENCODE: $mime_encode[0]: " . MimeEncode::__mbMimeEncode($mime_encode[0], $mime_encode[1]) . "<br>";
}
$enc_strings = [
@@ -68,31 +72,33 @@ $enc_strings = [
''
];
// class
$_encoding->setErrorChar('∴');
$_chk_enc->setErrorChar(0x2234);
$_chk_enc->setErrorChar('∴');
print "ERROR CHAR: " . $_chk_enc->getErrorChar() . "<br>";
foreach ($enc_strings as $_string) {
$string = $_encoding->checkConvertEncoding($_string, 'UTF-8', 'ISO-2022-JP-MS');
$string = $_chk_enc->checkConvertEncoding($_string, 'UTF-8', 'ISO-2022-JP-MS');
print "ENC CHECK: $_string: " . ($string === false ? '-OK-' : $string) . "<br>";
print "CONV ENCODING: $_string: " . $_encoding->convertEncoding($_string, 'ISO-2022-JP') . "<br>";
print "CONV ENCODING (s): $_string: " . $_encoding->convertEncoding($_string, 'ISO-2022-JP', 'UTF-8') . "<br>";
print "CONV ENCODING: $_string: " . $_con_enc->convertEncoding($_string, 'ISO-2022-JP') . "<br>";
print "CONV ENCODING (s): $_string: " . $_con_enc->convertEncoding($_string, 'ISO-2022-JP', 'UTF-8') . "<br>";
print "CONV ENCODING (s,a-false): $_string: "
. $_encoding->convertEncoding($_string, 'ISO-2022-JP', 'UTF-8', false) . "<br>";
. $_con_enc->convertEncoding($_string, 'ISO-2022-JP', 'UTF-8', false) . "<br>";
}
print "ERROR CHAR: " . $_encoding->getErrorChar() . "<br>";
// static
$encoding_class::setErrorChar('∴');
// ChkEnc::setErrorChar('∴');
ChkEnc::setErrorChar(0x2234);
print "S::ERROR CHAR: " . ChkEnc::getErrorChar() . "<br>";
foreach ($enc_strings as $_string) {
$string = $encoding_class::checkConvertEncoding($_string, 'UTF-8', 'ISO-2022-JP-MS');
$string = ChkEnc::checkConvertEncoding($_string, 'UTF-8', 'ISO-2022-JP-MS');
print "S::ENC CHECK: $_string: " . ($string === false ? '-OK-' : $string) . "<br>";
print "S::CONV ENCODING: $_string: " . $encoding_class::convertEncoding($_string, 'ISO-2022-JP') . "<br>";
print "S::CONV ENCODING: $_string: " . ConEnc::convertEncoding($_string, 'ISO-2022-JP') . "<br>";
print "S::CONV ENCODING (s): $_string: "
. $encoding_class::convertEncoding($_string, 'ISO-2022-JP', 'UTF-8') . "<br>";
. ConEnc::convertEncoding($_string, 'ISO-2022-JP', 'UTF-8') . "<br>";
print "S::CONV ENCODING (s,a-false): $_string: "
. $encoding_class::convertEncoding($_string, 'ISO-2022-JP', 'UTF-8', false) . "<br>";
. ConEnc::convertEncoding($_string, 'ISO-2022-JP', 'UTF-8', false) . "<br>";
}
print "S::ERROR CHAR: " . $encoding_class::getErrorChar() . "<br>";
// static use
$_string = $enc_strings[1];
$string = Encoding::checkConvertEncoding($_string, 'UTF-8', 'ISO-2022-JP-MS');
$string = $chk_enc::checkConvertEncoding($_string, 'UTF-8', 'ISO-2022-JP-MS');
print "S::ENC CHECK: $_string: " . ($string === false ? '-OK-' : $string) . "<br>";
// error message

View File

@@ -60,7 +60,7 @@ print '<div><a href="class_test.email.php">Class Test: EMAIL</a></div>';
print '<div><a href="class_test.uids.php">Class Test: UIDS</a></div>';
print '<div><a href="class_test.phpv.php">Class Test: PHP VERSION</a></div>';
print '<div><a href="class_test.hash.php">Class Test: HASH</a></div>';
print '<div><a href="class_test.encoding.php">Class Test: ENCODING</a></div>';
print '<div><a href="class_test.encoding.php">Class Test: ENCODING (CHECK/CONVERT/MIME)</a></div>';
print '<div><a href="class_test.image.php">Class Test: IMAGE</a></div>';
print '<div><a href="class_test.byte.php">Class Test: BYTE CONVERT</a></div>';
print '<div><a href="class_test.datetime.php">Class Test: DATE/TIME</a></div>';

View File

@@ -0,0 +1,117 @@
<?php
/*
* check if string is valid in target encoding
*/
declare(strict_types=1);
namespace CoreLibs\Check;
class Encoding
{
/** @var int<min, -1>|int<1, max>|string */
private static $mb_error_char = '';
/**
* set error char
*
* @param string|int|null $string The character to use to represent
* error chars
* "long" for long, "none" for none
* or a valid code point in int
* like 0x2234 (8756, ∴)
* default character is ? (63)
* if null is set then "none"
* @return void
*/
public static function setErrorChar($string): void
{
if (empty($string)) {
$string = 'none';
}
// if not special string or char but code point
if (in_array($string, ['none', 'long', 'entity'])) {
self::$mb_error_char = $string;
} else {
// always convert to char for internal use
self::$mb_error_char = \IntlChar::chr($string);
// if string convert to code point
if (is_string($string)) {
$string = \IntlChar::ord($string);
}
}
mb_substitute_character($string);
}
/**
* get the current set error character
*
* @param bool $return_substitute_func if set to true return the set
* character from the php function
* directly
* @return string|int Set error character
*/
public static function getErrorChar(bool $return_substitute_func = false)
{
// return mb_substitute_character();
if ($return_substitute_func === true) {
return mb_substitute_character();
} else {
return self::$mb_error_char;
}
}
/**
* test if a string can be safely convert between encodings.
* mostly utf8 to shift jis
* the default compare has a possibility of failure, especially with windows
* it is recommended to the following in the script which uses this method:
* mb_substitute_character(0x2234);
* $class->mb_error_char = '∴';
* if check to Shift JIS
* if check to ISO-2022-JP
* if check to ISO-2022-JP-MS
* set three dots (∴) as wrong character for correct convert error detect
* (this char is used, because it is one of the least used ones)
*
* @param string $string string to test
* @param string $from_encoding encoding of string to test
* @param string $to_encoding target encoding
* @return bool|array<string> false if no error or
* array with failed characters
*/
public static function checkConvertEncoding(
string $string,
string $from_encoding,
string $to_encoding
) {
// convert to target encoding and convert back
$temp = mb_convert_encoding($string, $to_encoding, $from_encoding);
$compare = mb_convert_encoding($temp, $from_encoding, $to_encoding);
// if string does not match anymore we have a convert problem
if ($string != $compare) {
$failed = [];
// go through each character and find the ones that do not match
for ($i = 0, $iMax = mb_strlen($string, $from_encoding); $i < $iMax; $i++) {
$char = mb_substr($string, $i, 1, $from_encoding);
$r_char = mb_substr($compare, $i, 1, $from_encoding);
// the ord 194 is a hack to fix the IE7/IE8
// bug with line break and illegal character
if (
(($char != $r_char && (!self::$mb_error_char ||
in_array(self::$mb_error_char, ['none', 'long', 'entity']))) ||
($char != $r_char && $r_char == self::$mb_error_char && self::$mb_error_char)) &&
ord($char) != 194
) {
$failed[] = $char;
}
}
return $failed;
} else {
return false;
}
}
}
// __END__

View File

@@ -0,0 +1,60 @@
<?php
/*
* check if string is valid in target encoding
*/
declare(strict_types=1);
namespace CoreLibs\Convert;
class Encoding
{
/**
* detects the source encoding of the string and if doesn't match
* to the given target encoding it convert is
* if source encoding is set and auto check is true (default) a second
* check is done so that the source string encoding actually matches
* will be skipped if source encoding detection is ascii
*
* @param string $string string to convert
* @param string $to_encoding target encoding
* @param string $source_encoding optional source encoding, will try to auto detect
* @param bool $auto_check default true, if source encoding is set
* check that the source is actually matching
* to what we sav the source is
* @return string encoding converted string
*/
public static function convertEncoding(
string $string,
string $to_encoding,
string $source_encoding = '',
bool $auto_check = true
): string {
// set if not given
if (!$source_encoding) {
$source_encoding = mb_detect_encoding($string);
} else {
$_source_encoding = mb_detect_encoding($string);
}
if (
$auto_check === true &&
isset($_source_encoding) &&
$_source_encoding == $source_encoding
) {
// trigger check if we have override source encoding.
// if different (_source is all but not ascii) then trigger
// skip if matching
}
if ($source_encoding != $to_encoding) {
if ($source_encoding) {
$string = mb_convert_encoding($string, $to_encoding, $source_encoding);
} else {
$string = mb_convert_encoding($string, $to_encoding);
}
}
return $string;
}
}
// __END__

View File

@@ -0,0 +1,67 @@
<?php
/*
* alternate for header mime encode to void problems with long strings and
* spaces/strange encoding problems.
* Orignal issues during PHP5/7
*/
declare(strict_types=1);
namespace CoreLibs\Convert;
class MimeEncode
{
/**
* wrapper function for mb mime convert
* for correct conversion with long strings
*
* @param string $string string to encode
* @param string $encoding target encoding
* @param string $line_break default line break is \r\n
* @return string encoded string
*/
public static function __mbMimeEncode(
string $string,
string $encoding,
string $line_break = "\r\n"
): string {
// set internal encoding, so the mimeheader encode works correctly
mb_internal_encoding($encoding);
// if a subject, make a work around for the broken mb_mimencode
$pos = 0;
// after 36 single bytes characters,
// if then comes MB, it is broken
// has to 2 x 36 < 74 so the mb_encode_mimeheader
// 74 hardcoded split does not get triggered
$split = 36;
$_string = '';
while ($pos < mb_strlen($string, $encoding)) {
$output = mb_strimwidth($string, $pos, $split, "", $encoding);
$pos += mb_strlen($output, $encoding);
// if the strinlen is 0 here, get out of the loop
if (!mb_strlen($output, $encoding)) {
$pos += mb_strlen($string, $encoding);
}
$_string_encoded = mb_encode_mimeheader($output, $encoding);
// only make linebreaks if we have mime encoded code inside
// the space only belongs in the second line
if ($_string && preg_match("/^=\?/", $_string_encoded)) {
$_string .= $line_break . " ";
} elseif (
// hack for plain text with space at the end
mb_strlen($output, $encoding) == $split &&
mb_substr($output, -1, 1, $encoding) == " "
) {
// if output ends with space, add one more
$_string_encoded .= " ";
}
$_string .= $_string_encoded;
}
// strip out any spaces BEFORE a line break
$string = str_replace(" " . $line_break, $line_break, $_string);
return $string;
}
}
// __END__

View File

@@ -1,7 +1,12 @@
<?php
/*
* hash wrapper functions for old problem fixes
* deprecated function calls
* Language\Encoding::__mbMimeEncode -> Convert\MimeEncode::__mbMimeEncode
* Langauge\Encoding::checkConvertEncoding -> Check\Encoding::checkConvertEncoding
* Langauge\Encoding::setErrorChar -> Check\Encoding::setErrorChar
* Langauge\Encoding::getErrorChar -> Encoding::getErrorChar
* Langauge\Encoding::convertEncoding -> Convert\Encoding::convertEncoding
*/
declare(strict_types=1);
@@ -10,9 +15,6 @@ namespace CoreLibs\Language;
class Encoding
{
/** @var int<min, -1>|int<1, max>|string */
private static $mb_error_char = '';
/**
* wrapper function for mb mime convert
* for correct conversion with long strings
@@ -21,47 +23,14 @@ class Encoding
* @param string $encoding target encoding
* @param string $line_break default line break is \r\n
* @return string encoded string
* @deprecated Use \CoreLibs\Convert\MimeEncode::__mbMimeEncode();
*/
public static function __mbMimeEncode(
string $string,
string $encoding,
string $line_break = "\r\n"
): string {
// set internal encoding, so the mimeheader encode works correctly
mb_internal_encoding($encoding);
// if a subject, make a work around for the broken mb_mimencode
$pos = 0;
// after 36 single bytes characters,
// if then comes MB, it is broken
// has to 2 x 36 < 74 so the mb_encode_mimeheader
// 74 hardcoded split does not get triggered
$split = 36;
$_string = '';
while ($pos < mb_strlen($string, $encoding)) {
$output = mb_strimwidth($string, $pos, $split, "", $encoding);
$pos += mb_strlen($output, $encoding);
// if the strinlen is 0 here, get out of the loop
if (!mb_strlen($output, $encoding)) {
$pos += mb_strlen($string, $encoding);
}
$_string_encoded = mb_encode_mimeheader($output, $encoding);
// only make linebreaks if we have mime encoded code inside
// the space only belongs in the second line
if ($_string && preg_match("/^=\?/", $_string_encoded)) {
$_string .= $line_break . " ";
} elseif (
// hack for plain text with space at the end
mb_strlen($output, $encoding) == $split &&
mb_substr($output, -1, 1, $encoding) == " "
) {
// if output ends with space, add one more
$_string_encoded .= " ";
}
$_string .= $_string_encoded;
}
// strip out any spaces BEFORE a line break
$string = str_replace(" " . $line_break, $line_break, $_string);
return $string;
return \CoreLibs\Convert\MimeEncode::__mbMimeEncode($string, $encoding, $line_break);
}
/**
@@ -75,18 +44,11 @@ class Encoding
* default character is ? (63)
* if null is set then "none"
* @return void
* @deprecated Use \CoreLibs\Check\Encoding::setErrorChar();
*/
public static function setErrorChar($string): void
{
if (empty($string)) {
$string = 'none';
}
if (!in_array($string, ['none', 'long', 'entity'])) {
self::$mb_error_char = \IntlChar::chr($string);
} else {
self::$mb_error_char = $string;
}
mb_substitute_character($string);
\CoreLibs\Check\Encoding::setErrorChar($string);
}
/**
@@ -96,15 +58,11 @@ class Encoding
* character from the php function
* directly
* @return string|int Set error character
* @deprecated Use \CoreLibs\Check\Encoding::getErrorChar();
*/
public static function getErrorChar(bool $return_substitute_func = false)
{
// return mb_substitute_character();
if ($return_substitute_func === true) {
return mb_substitute_character();
} else {
return self::$mb_error_char;
}
return \CoreLibs\Check\Encoding::getErrorChar($return_substitute_func);
}
/**
@@ -123,38 +81,16 @@ class Encoding
* @param string $string string to test
* @param string $from_encoding encoding of string to test
* @param string $to_encoding target encoding
* @return bool|array<string> false if no error or array with failed characters
* @return bool|array<string> false if no error or
* array with failed characters
* @deprecated Use \CoreLibs\Check\Encoding::checkConvertEncoding();
*/
public static function checkConvertEncoding(
string $string,
string $from_encoding,
string $to_encoding
) {
// convert to target encoding and convert back
$temp = mb_convert_encoding($string, $to_encoding, $from_encoding);
$compare = mb_convert_encoding($temp, $from_encoding, $to_encoding);
// if string does not match anymore we have a convert problem
if ($string != $compare) {
$failed = [];
// go through each character and find the ones that do not match
for ($i = 0, $iMax = mb_strlen($string, $from_encoding); $i < $iMax; $i++) {
$char = mb_substr($string, $i, 1, $from_encoding);
$r_char = mb_substr($compare, $i, 1, $from_encoding);
// the ord 194 is a hack to fix the IE7/IE8
// bug with line break and illegal character
if (
(($char != $r_char && (!self::$mb_error_char ||
in_array(self::$mb_error_char, ['none', 'long', 'entity']))) ||
($char != $r_char && $r_char == self::$mb_error_char && self::$mb_error_char)) &&
ord($char) != 194
) {
$failed[] = $char;
}
}
return $failed;
} else {
return false;
}
return \CoreLibs\Check\Encoding::checkConvertEncoding($string, $from_encoding, $to_encoding);
}
/**
@@ -171,6 +107,7 @@ class Encoding
* check that the source is actually matching
* to what we sav the source is
* @return string encoding converted string
* @deprecated Use \CoreLibs\Convert\Encoding::convertEncoding();
*/
public static function convertEncoding(
string $string,
@@ -178,29 +115,12 @@ class Encoding
string $source_encoding = '',
bool $auto_check = true
): string {
// set if not given
if (!$source_encoding) {
$source_encoding = mb_detect_encoding($string);
} else {
$_source_encoding = mb_detect_encoding($string);
}
if (
$auto_check === true &&
isset($_source_encoding) &&
$_source_encoding == $source_encoding
) {
// trigger check if we have override source encoding.
// if different (_source is all but not ascii) then trigger
// skip if matching
}
if ($source_encoding != $to_encoding) {
if ($source_encoding) {
$string = mb_convert_encoding($string, $to_encoding, $source_encoding);
} else {
$string = mb_convert_encoding($string, $to_encoding);
}
}
return $string;
return \CoreLibs\Convert\Encoding::convertEncoding(
$string,
$to_encoding,
$source_encoding,
$auto_check
);
}
}

View File

@@ -12,6 +12,7 @@ return array(
'CoreLibs\\Admin\\Backend' => $baseDir . '/lib/CoreLibs/Admin/Backend.php',
'CoreLibs\\Basic' => $baseDir . '/lib/CoreLibs/Basic.php',
'CoreLibs\\Check\\Email' => $baseDir . '/lib/CoreLibs/Check/Email.php',
'CoreLibs\\Check\\Encoding' => $baseDir . '/lib/CoreLibs/Check/Encoding.php',
'CoreLibs\\Check\\File' => $baseDir . '/lib/CoreLibs/Check/File.php',
'CoreLibs\\Check\\Jason' => $baseDir . '/lib/CoreLibs/Check/Jason.php',
'CoreLibs\\Check\\Password' => $baseDir . '/lib/CoreLibs/Check/Password.php',
@@ -20,10 +21,12 @@ return array(
'CoreLibs\\Combined\\DateTime' => $baseDir . '/lib/CoreLibs/Combined/DateTime.php',
'CoreLibs\\Convert\\Byte' => $baseDir . '/lib/CoreLibs/Convert/Byte.php',
'CoreLibs\\Convert\\Colors' => $baseDir . '/lib/CoreLibs/Convert/Colors.php',
'CoreLibs\\Convert\\Encoding' => $baseDir . '/lib/CoreLibs/Convert/Encoding.php',
'CoreLibs\\Convert\\Html' => $baseDir . '/lib/CoreLibs/Convert/Html.php',
'CoreLibs\\Convert\\Json' => $baseDir . '/lib/CoreLibs/Convert/Json.php',
'CoreLibs\\Convert\\Math' => $baseDir . '/lib/CoreLibs/Convert/Math.php',
'CoreLibs\\Convert\\MimeAppName' => $baseDir . '/lib/CoreLibs/Convert/MimeAppName.php',
'CoreLibs\\Convert\\MimeEncode' => $baseDir . '/lib/CoreLibs/Convert/MimeEncode.php',
'CoreLibs\\Create\\Hash' => $baseDir . '/lib/CoreLibs/Create/Hash.php',
'CoreLibs\\Create\\RandomKey' => $baseDir . '/lib/CoreLibs/Create/RandomKey.php',
'CoreLibs\\Create\\Session' => $baseDir . '/lib/CoreLibs/Create/Session.php',

View File

@@ -77,6 +77,7 @@ class ComposerStaticInit10fe8fe2ec4017b8644d2b64bcf398b9
'CoreLibs\\Admin\\Backend' => __DIR__ . '/../..' . '/lib/CoreLibs/Admin/Backend.php',
'CoreLibs\\Basic' => __DIR__ . '/../..' . '/lib/CoreLibs/Basic.php',
'CoreLibs\\Check\\Email' => __DIR__ . '/../..' . '/lib/CoreLibs/Check/Email.php',
'CoreLibs\\Check\\Encoding' => __DIR__ . '/../..' . '/lib/CoreLibs/Check/Encoding.php',
'CoreLibs\\Check\\File' => __DIR__ . '/../..' . '/lib/CoreLibs/Check/File.php',
'CoreLibs\\Check\\Jason' => __DIR__ . '/../..' . '/lib/CoreLibs/Check/Jason.php',
'CoreLibs\\Check\\Password' => __DIR__ . '/../..' . '/lib/CoreLibs/Check/Password.php',
@@ -85,10 +86,12 @@ class ComposerStaticInit10fe8fe2ec4017b8644d2b64bcf398b9
'CoreLibs\\Combined\\DateTime' => __DIR__ . '/../..' . '/lib/CoreLibs/Combined/DateTime.php',
'CoreLibs\\Convert\\Byte' => __DIR__ . '/../..' . '/lib/CoreLibs/Convert/Byte.php',
'CoreLibs\\Convert\\Colors' => __DIR__ . '/../..' . '/lib/CoreLibs/Convert/Colors.php',
'CoreLibs\\Convert\\Encoding' => __DIR__ . '/../..' . '/lib/CoreLibs/Convert/Encoding.php',
'CoreLibs\\Convert\\Html' => __DIR__ . '/../..' . '/lib/CoreLibs/Convert/Html.php',
'CoreLibs\\Convert\\Json' => __DIR__ . '/../..' . '/lib/CoreLibs/Convert/Json.php',
'CoreLibs\\Convert\\Math' => __DIR__ . '/../..' . '/lib/CoreLibs/Convert/Math.php',
'CoreLibs\\Convert\\MimeAppName' => __DIR__ . '/../..' . '/lib/CoreLibs/Convert/MimeAppName.php',
'CoreLibs\\Convert\\MimeEncode' => __DIR__ . '/../..' . '/lib/CoreLibs/Convert/MimeEncode.php',
'CoreLibs\\Create\\Hash' => __DIR__ . '/../..' . '/lib/CoreLibs/Create/Hash.php',
'CoreLibs\\Create\\RandomKey' => __DIR__ . '/../..' . '/lib/CoreLibs/Create/RandomKey.php',
'CoreLibs\\Create\\Session' => __DIR__ . '/../..' . '/lib/CoreLibs/Create/Session.php',