From 8b15a0f9ff70f71bc69f6c09fc21ef45a86a27cf Mon Sep 17 00:00:00 2001 From: Hannes Papenberg Date: Wed, 4 Jun 2025 19:31:21 +0200 Subject: [PATCH 01/18] Replacing old phputf8 with symfony mbstring polyfill (#55) * Replacing old phputf8 with symfony mbstring polyfill * trigger notice for null value for strspn() --- composer.json | 25 +- src/StringHelper.php | 289 ++++++++++++++---- src/phputf8/LICENSE | 504 ------------------------------- src/phputf8/README | 82 ----- src/phputf8/mbstring/core.php | 139 --------- src/phputf8/native/core.php | 422 -------------------------- src/phputf8/ord.php | 94 ------ src/phputf8/str_ireplace.php | 62 ---- src/phputf8/str_pad.php | 54 ---- src/phputf8/str_split.php | 32 -- src/phputf8/strcasecmp.php | 24 -- src/phputf8/strcspn.php | 37 --- src/phputf8/stristr.php | 36 --- src/phputf8/strrev.php | 20 -- src/phputf8/strspn.php | 37 --- src/phputf8/substr_replace.php | 24 -- src/phputf8/trim.php | 73 ----- src/phputf8/ucfirst.php | 32 -- src/phputf8/ucwords.php | 42 --- src/phputf8/utf8.php | 82 ----- src/phputf8/utils/ascii.php | 230 -------------- src/phputf8/utils/bad.php | 400 ------------------------ src/phputf8/utils/patterns.php | 65 ---- src/phputf8/utils/position.php | 187 ------------ src/phputf8/utils/specials.php | 128 -------- src/phputf8/utils/unicode.php | 251 --------------- src/phputf8/utils/validation.php | 174 ----------- 27 files changed, 242 insertions(+), 3303 deletions(-) delete mode 100644 src/phputf8/LICENSE delete mode 100644 src/phputf8/README delete mode 100644 src/phputf8/mbstring/core.php delete mode 100644 src/phputf8/native/core.php delete mode 100644 src/phputf8/ord.php delete mode 100644 src/phputf8/str_ireplace.php delete mode 100644 src/phputf8/str_pad.php delete mode 100644 src/phputf8/str_split.php delete mode 100644 src/phputf8/strcasecmp.php delete mode 100644 src/phputf8/strcspn.php delete mode 100644 src/phputf8/stristr.php delete mode 100644 src/phputf8/strrev.php delete mode 100644 src/phputf8/strspn.php delete mode 100644 src/phputf8/substr_replace.php delete mode 100644 src/phputf8/trim.php delete mode 100644 src/phputf8/ucfirst.php delete mode 100644 src/phputf8/ucwords.php delete mode 100644 src/phputf8/utf8.php delete mode 100644 src/phputf8/utils/ascii.php delete mode 100644 src/phputf8/utils/bad.php delete mode 100644 src/phputf8/utils/patterns.php delete mode 100644 src/phputf8/utils/position.php delete mode 100644 src/phputf8/utils/specials.php delete mode 100644 src/phputf8/utils/unicode.php delete mode 100644 src/phputf8/utils/validation.php diff --git a/composer.json b/composer.json index 3301d96e..b3093a4d 100644 --- a/composer.json +++ b/composer.json @@ -7,7 +7,8 @@ "license": "GPL-2.0-or-later", "require": { "php": "^8.1.0", - "symfony/deprecation-contracts": "^2|^3" + "symfony/deprecation-contracts": "^2|^3", + "symfony/polyfill-mbstring": "^1.31.0" }, "require-dev": { "doctrine/inflector": "^1.2", @@ -28,24 +29,7 @@ "autoload": { "psr-4": { "Joomla\\String\\": "src/" - }, - "files": [ - "src/phputf8/utf8.php", - "src/phputf8/ord.php", - "src/phputf8/str_ireplace.php", - "src/phputf8/str_pad.php", - "src/phputf8/str_split.php", - "src/phputf8/strcasecmp.php", - "src/phputf8/strcspn.php", - "src/phputf8/stristr.php", - "src/phputf8/strrev.php", - "src/phputf8/strspn.php", - "src/phputf8/trim.php", - "src/phputf8/ucfirst.php", - "src/phputf8/ucwords.php", - "src/phputf8/utils/ascii.php", - "src/phputf8/utils/validation.php" - ] + } }, "autoload-dev": { "psr-4": { @@ -56,7 +40,8 @@ "extra": { "branch-alias": { "dev-2.0-dev": "2.0-dev", - "dev-3.x-dev": "3.0-dev" + "dev-3.x-dev": "3.x-dev", + "dev-4.x-dev": "4.x-dev" } } } diff --git a/src/StringHelper.php b/src/StringHelper.php index 30b4727e..27f3caf2 100644 --- a/src/StringHelper.php +++ b/src/StringHelper.php @@ -110,7 +110,8 @@ public static function increment($string, $style = 'default', $n = 0) */ public static function is_ascii($str) { - return utf8_is_ascii($str); + // Search for any bytes which are outside the ASCII range... + return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1); } /** @@ -127,7 +128,7 @@ public static function is_ascii($str) */ public static function ord($chr) { - return utf8_ord($chr); + return mb_ord($chr); } /** @@ -147,10 +148,10 @@ public static function ord($chr) public static function strpos($str, $search, $offset = false) { if ($offset === false) { - return utf8_strpos($str, $search); + return mb_strpos($str, $search); } - return utf8_strpos($str, $search, $offset); + return mb_strpos($str, $search, $offset); } /** @@ -169,7 +170,7 @@ public static function strpos($str, $search, $offset = false) */ public static function strrpos($str, $search, $offset = 0) { - return utf8_strrpos($str, $search, $offset); + return mb_strrpos($str, $search, $offset); } /** @@ -189,10 +190,10 @@ public static function strrpos($str, $search, $offset = 0) public static function substr($str, $offset, $length = false) { if ($length === false) { - return utf8_substr($str, $offset); + return mb_substr($str, $offset); } - return utf8_substr($str, $offset, $length); + return mb_substr($str, $offset, $length); } /** @@ -212,7 +213,7 @@ public static function substr($str, $offset, $length = false) */ public static function strtolower($str) { - return utf8_strtolower($str); + return mb_strtolower($str); } /** @@ -232,7 +233,7 @@ public static function strtolower($str) */ public static function strtoupper($str) { - return utf8_strtoupper($str); + return mb_strtoupper($str); } /** @@ -249,7 +250,7 @@ public static function strtoupper($str) */ public static function strlen($str) { - return utf8_strlen($str); + return mb_strlen($str); } /** @@ -269,11 +270,45 @@ public static function strlen($str) */ public static function str_ireplace($search, $replace, $str, $count = null) { - if ($count === false) { - return utf8_ireplace($search, $replace, $str); - } + if (!is_array($search)) { + $slen = strlen($search); + if ($slen == 0) { + return $str; + } - return utf8_ireplace($search, $replace, $str, $count); + $lendif = strlen($replace) - strlen($search); + $search = mb_strtolower($search); + + $search = preg_quote($search, '/'); + $lstr = mb_strtolower($str); + $i = 0; + $matched = 0; + while (preg_match('/(.*)' . $search . '/Us', $lstr, $matches)) { + if ($i === $count) { + break; + } + $mlen = strlen($matches[0]); + $lstr = substr($lstr, $mlen); + $str = substr_replace($str, $replace, $matched + strlen($matches[1]), $slen); + $matched += $mlen + $lendif; + $i++; + } + return $str; + } else { + foreach (array_keys($search) as $k) { + if (is_array($replace)) { + if (array_key_exists($k, $replace)) { + $str = self::str_ireplace($search[$k], $replace[$k], $str, $count); + } else { + $str = self::str_ireplace($search[$k], '', $str, $count); + } + } else { + $str = self::str_ireplace($search[$k], $replace, $str, $count); + } + } + + return $str; + } } /** @@ -294,7 +329,7 @@ public static function str_ireplace($search, $replace, $str, $count = null) */ public static function str_pad($input, $length, $padStr = ' ', $type = STR_PAD_RIGHT) { - return utf8_str_pad($input, $length, $padStr, $type); + return mb_str_pad($input, $length, $padStr, $type); } /** @@ -312,7 +347,7 @@ public static function str_pad($input, $length, $padStr = ' ', $type = STR_PAD_R */ public static function str_split($str, $splitLen = 1) { - return utf8_str_split($str, $splitLen); + return mb_str_split($str, $splitLen); } /** @@ -334,7 +369,9 @@ public static function str_split($str, $splitLen = 1) public static function strcasecmp($str1, $str2, $locale = false) { if ($locale === false) { - return utf8_strcasecmp($str1, $str2); + $strX = mb_strtolower($str1); + $strY = mb_strtolower($str2); + return strcmp($strX, $strY); } // Get current locale @@ -355,12 +392,12 @@ public static function strcasecmp($str1, $str2, $locale = false) // If we successfully set encoding it to utf-8 or encoding is sth weird don't recode if ($encoding == 'UTF-8' || $encoding == 'nonrecodable') { - return strcoll(utf8_strtolower($str1), utf8_strtolower($str2)); + return strcoll(mb_strtolower($str1), mb_strtolower($str2)); } return strcoll( - static::transcode(utf8_strtolower($str1), 'UTF-8', $encoding), - static::transcode(utf8_strtolower($str2), 'UTF-8', $encoding) + static::transcode(mb_strtolower($str1), 'UTF-8', $encoding), + static::transcode(mb_strtolower($str2), 'UTF-8', $encoding) ); } @@ -427,15 +464,23 @@ public static function strcmp($str1, $str2, $locale = false) */ public static function strcspn($str, $mask, $start = null, $length = null) { - if ($start === false && $length === false) { - return utf8_strcspn($str, $mask); + if (empty($mask) || strlen($mask) == 0) { + return 0; } - if ($length === false) { - return utf8_strcspn($str, $mask, $start); + $mask = preg_replace('!([\\\\\\-\\]\\[/^])!', '\\\${1}', $mask); + + if ($start != null || $length != null) { + $str = mb_substr($str, $start, $length); } - return utf8_strcspn($str, $mask, $start, $length); + preg_match('/^[^' . $mask . ']+/u', $str, $matches); + + if (isset($matches[0])) { + return mb_strlen($matches[0]); + } + + return 0; } /** @@ -454,7 +499,7 @@ public static function strcspn($str, $mask, $start = null, $length = null) */ public static function stristr($str, $search) { - return utf8_stristr($str, $search); + return mb_stristr($str, $search); } /** @@ -471,7 +516,8 @@ public static function stristr($str, $search) */ public static function strrev($str) { - return utf8_strrev($str); + preg_match_all('/./us', $str, $ar); + return join('', array_reverse($ar[0])); } /** @@ -479,10 +525,10 @@ public static function strrev($str) * * Find length of initial segment matching mask. * - * @param string $str The haystack - * @param string $mask The mask - * @param integer|null $start Start optional - * @param integer|null $length Length optional + * @param string $str The haystack + * @param string $mask The mask + * @param ?integer $start Start optional + * @param ?integer $length Length optional * * @return integer * @@ -491,15 +537,24 @@ public static function strrev($str) */ public static function strspn($str, $mask, $start = null, $length = null) { - if ($start === null && $length === null) { - return utf8_strspn($str, $mask); + $mask = preg_replace('!([\\\\\\-\\]\\[/^])!', '\\\${1}', $mask); + + if (is_int($start) && is_int($length)) { + $str = mb_substr($str, $start, $length); + } elseif (is_int($start) && !is_int($length)) { + $str = mb_substr($str, $start); + } elseif (!is_int($start) && is_int($length)) { + trigger_error('\Joomla\String\StringHelper::strspn(): Passing null to parameter #3 ($start) of type int is deprecated', E_USER_DEPRECATED); + $str = mb_substr($str, 0, $length); } - if ($length === null) { - return utf8_strspn($str, $mask, $start); + preg_match('/^[' . $mask . ']+/u', $str, $matches); + + if (isset($matches[0])) { + return mb_strlen($matches[0]); } - return utf8_strspn($str, $mask, $start, $length); + return 0; } /** @@ -519,12 +574,13 @@ public static function strspn($str, $mask, $start = null, $length = null) */ public static function substr_replace($str, $repl, $start, $length = null) { - // Loaded by library loader - if ($length === false) { - return utf8_substr_replace($str, $repl, $start); + preg_match_all('/./us', $str, $ar); + preg_match_all('/./us', $repl, $rar); + if ($length === null || $length === false) { + $length = mb_strlen($str); } - - return utf8_substr_replace($str, $repl, $start, $length); + array_splice($ar[0], $start, $length, $rar[0]); + return join('', $ar[0]); } /** @@ -548,10 +604,10 @@ public static function ltrim($str, $charlist = false) } if ($charlist === false) { - return utf8_ltrim($str); + return mb_ltrim($str); } - return utf8_ltrim($str, $charlist); + return mb_ltrim($str, $charlist); } /** @@ -575,10 +631,10 @@ public static function rtrim($str, $charlist = false) } if ($charlist === false) { - return utf8_rtrim($str); + return mb_rtrim($str); } - return utf8_rtrim($str, $charlist); + return mb_rtrim($str, $charlist); } /** @@ -602,10 +658,10 @@ public static function trim($str, $charlist = false) } if ($charlist === false) { - return utf8_trim($str); + return mb_trim($str); } - return utf8_trim($str, $charlist); + return mb_trim($str, $charlist); } /** @@ -627,14 +683,14 @@ public static function trim($str, $charlist = false) public static function ucfirst($str, $delimiter = null, $newDelimiter = null) { if ($delimiter === null) { - return utf8_ucfirst($str); + return mb_ucfirst($str); } if ($newDelimiter === null) { $newDelimiter = $delimiter; } - return implode($newDelimiter, array_map('utf8_ucfirst', explode($delimiter, $str))); + return implode($newDelimiter, array_map('mb_ucfirst', explode($delimiter, $str))); } /** @@ -651,7 +707,17 @@ public static function ucfirst($str, $delimiter = null, $newDelimiter = null) */ public static function ucwords($str) { - return utf8_ucwords($str); + // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches; + // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns + // This corresponds to the definition of a "word" defined at http://www.php.net/ucwords + $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u'; + + return preg_replace_callback($pattern, function ($matches) { + $leadingws = $matches[2]; + $ucfirst = mb_strtoupper($matches[3]); + $ucword = StringHelper::substr_replace(ltrim($matches[0]), $ucfirst, 0, 1); + return $leadingws . $ucword; + }, $str); } /** @@ -695,7 +761,119 @@ public static function transcode($source, $fromEncoding, $toEncoding) */ public static function valid($str) { - return utf8_is_valid($str); + $mState = 0; // cached expected number of octets after the current octet + // until the beginning of the next UTF8 character sequence + $mUcs4 = 0; // cached Unicode character + $mBytes = 1; // cached expected number of octets in the current sequence + + $len = strlen($str); + + for ($i = 0; $i < $len; $i++) { + /* + * Joomla modification - As of PHP 7.4, curly brace access has been deprecated. As a result the line below has + * been modified to use square brace syntax + * See https://github.com/php/php-src/commit/d574df63dc375f5fc9202ce5afde23f866b6450a + * for additional references + */ + $in = ord($str[$i]); + + if ($mState == 0) { + // When mState is zero we expect either a US-ASCII character or a + // multi-octet sequence. + if (0 == (0x80 & ($in))) { + // US-ASCII, pass straight through. + $mBytes = 1; + } elseif (0xC0 == (0xE0 & ($in))) { + // First octet of 2 octet sequence + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x1F) << 6; + $mState = 1; + $mBytes = 2; + } elseif (0xE0 == (0xF0 & ($in))) { + // First octet of 3 octet sequence + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x0F) << 12; + $mState = 2; + $mBytes = 3; + } elseif (0xF0 == (0xF8 & ($in))) { + // First octet of 4 octet sequence + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x07) << 18; + $mState = 3; + $mBytes = 4; + } elseif (0xF8 == (0xFC & ($in))) { + /* First octet of 5 octet sequence. + * + * This is illegal because the encoded codepoint must be either + * (a) not the shortest form or + * (b) outside the Unicode range of 0-0x10FFFF. + * Rather than trying to resynchronize, we will carry on until the end + * of the sequence and let the later error handling code catch it. + */ + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x03) << 24; + $mState = 4; + $mBytes = 5; + } elseif (0xFC == (0xFE & ($in))) { + // First octet of 6 octet sequence, see comments for 5 octet sequence. + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 1) << 30; + $mState = 5; + $mBytes = 6; + } else { + /* Current octet is neither in the US-ASCII range nor a legal first + * octet of a multi-octet sequence. + */ + return false; + } + } else { + // When mState is non-zero, we expect a continuation of the multi-octet + // sequence + if (0x80 == (0xC0 & ($in))) { + // Legal continuation. + $shift = ($mState - 1) * 6; + $tmp = $in; + $tmp = ($tmp & 0x0000003F) << $shift; + $mUcs4 |= $tmp; + + /** + * End of the multi-octet sequence. mUcs4 now contains the final + * Unicode codepoint to be output + */ + if (0 == --$mState) { + /* + * Check for illegal sequences and codepoints. + */ + // From Unicode 3.1, non-shortest form is illegal + if ( + ((2 == $mBytes) && ($mUcs4 < 0x0080)) || + ((3 == $mBytes) && ($mUcs4 < 0x0800)) || + ((4 == $mBytes) && ($mUcs4 < 0x10000)) || + (4 < $mBytes) || + // From Unicode 3.2, surrogate characters are illegal + (($mUcs4 & 0xFFFFF800) == 0xD800) || + // Codepoints outside the Unicode range are illegal + ($mUcs4 > 0x10FFFF) + ) { + return false; + } + + //initialize UTF8 cache + $mState = 0; + $mUcs4 = 0; + $mBytes = 1; + } + } else { + /** + *((0xC0 & (*in) != 0x80) && (mState != 0)) + * Incomplete multi-octet sequence. + */ + + return false; + } + } + } + return true; } /** @@ -716,7 +894,14 @@ public static function valid($str) */ public static function compliant($str) { - return utf8_compliant($str); + if (strlen($str) == 0) { + return true; + } + // If even just the first character can be matched, when the /u + // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow + // invalid, nothing at all will match, even if the string contains + // some valid sequences + return (preg_match('/^.{1}/us', $str, $ar) == 1); } /** diff --git a/src/phputf8/LICENSE b/src/phputf8/LICENSE deleted file mode 100644 index 28f18896..00000000 --- a/src/phputf8/LICENSE +++ /dev/null @@ -1,504 +0,0 @@ - GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1, February 1999 - - Copyright (C) 1991, 1999 Free Software Foundation, Inc. - 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - -[This is the first released version of the Lesser GPL. It also counts - as the successor of the GNU Library Public License, version 2, hence - the version number 2.1.] - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -Licenses are intended to guarantee your freedom to share and change -free software--to make sure the software is free for all its users. - - This license, the Lesser General Public License, applies to some -specially designated software packages--typically libraries--of the -Free Software Foundation and other authors who decide to use it. You -can use it too, but we suggest you first think carefully about whether -this license or the ordinary General Public License is the better -strategy to use in any particular case, based on the explanations below. - - When we speak of free software, we are referring to freedom of use, -not price. Our General Public Licenses are designed to make sure that -you have the freedom to distribute copies of free software (and charge -for this service if you wish); that you receive source code or can get -it if you want it; that you can change the software and use pieces of -it in new free programs; and that you are informed that you can do -these things. - - To protect your rights, we need to make restrictions that forbid -distributors to deny you these rights or to ask you to surrender these -rights. These restrictions translate to certain responsibilities for -you if you distribute copies of the library or if you modify it. - - For example, if you distribute copies of the library, whether gratis -or for a fee, you must give the recipients all the rights that we gave -you. You must make sure that they, too, receive or can get the source -code. If you link other code with the library, you must provide -complete object files to the recipients, so that they can relink them -with the library after making changes to the library and recompiling -it. And you must show them these terms so they know their rights. - - We protect your rights with a two-step method: (1) we copyright the -library, and (2) we offer you this license, which gives you legal -permission to copy, distribute and/or modify the library. - - To protect each distributor, we want to make it very clear that -there is no warranty for the free library. Also, if the library is -modified by someone else and passed on, the recipients should know -that what they have is not the original version, so that the original -author's reputation will not be affected by problems that might be -introduced by others. - - Finally, software patents pose a constant threat to the existence of -any free program. We wish to make sure that a company cannot -effectively restrict the users of a free program by obtaining a -restrictive license from a patent holder. Therefore, we insist that -any patent license obtained for a version of the library must be -consistent with the full freedom of use specified in this license. - - Most GNU software, including some libraries, is covered by the -ordinary GNU General Public License. This license, the GNU Lesser -General Public License, applies to certain designated libraries, and -is quite different from the ordinary General Public License. We use -this license for certain libraries in order to permit linking those -libraries into non-free programs. - - When a program is linked with a library, whether statically or using -a shared library, the combination of the two is legally speaking a -combined work, a derivative of the original library. The ordinary -General Public License therefore permits such linking only if the -entire combination fits its criteria of freedom. The Lesser General -Public License permits more lax criteria for linking other code with -the library. - - We call this license the "Lesser" General Public License because it -does Less to protect the user's freedom than the ordinary General -Public License. It also provides other free software developers Less -of an advantage over competing non-free programs. These disadvantages -are the reason we use the ordinary General Public License for many -libraries. However, the Lesser license provides advantages in certain -special circumstances. - - For example, on rare occasions, there may be a special need to -encourage the widest possible use of a certain library, so that it becomes -a de-facto standard. To achieve this, non-free programs must be -allowed to use the library. A more frequent case is that a free -library does the same job as widely used non-free libraries. In this -case, there is little to gain by limiting the free library to free -software only, so we use the Lesser General Public License. - - In other cases, permission to use a particular library in non-free -programs enables a greater number of people to use a large body of -free software. For example, permission to use the GNU C Library in -non-free programs enables many more people to use the whole GNU -operating system, as well as its variant, the GNU/Linux operating -system. - - Although the Lesser General Public License is Less protective of the -users' freedom, it does ensure that the user of a program that is -linked with the Library has the freedom and the wherewithal to run -that program using a modified version of the Library. - - The precise terms and conditions for copying, distribution and -modification follow. Pay close attention to the difference between a -"work based on the library" and a "work that uses the library". The -former contains code derived from the library, whereas the latter must -be combined with the library in order to run. - - GNU LESSER GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License Agreement applies to any software library or other -program which contains a notice placed by the copyright holder or -other authorized party saying it may be distributed under the terms of -this Lesser General Public License (also called "this License"). -Each licensee is addressed as "you". - - A "library" means a collection of software functions and/or data -prepared so as to be conveniently linked with application programs -(which use some of those functions and data) to form executables. - - The "Library", below, refers to any such software library or work -which has been distributed under these terms. A "work based on the -Library" means either the Library or any derivative work under -copyright law: that is to say, a work containing the Library or a -portion of it, either verbatim or with modifications and/or translated -straightforwardly into another language. (Hereinafter, translation is -included without limitation in the term "modification".) - - "Source code" for a work means the preferred form of the work for -making modifications to it. For a library, complete source code means -all the source code for all modules it contains, plus any associated -interface definition files, plus the scripts used to control compilation -and installation of the library. - - Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running a program using the Library is not restricted, and output from -such a program is covered only if its contents constitute a work based -on the Library (independent of the use of the Library in a tool for -writing it). Whether that is true depends on what the Library does -and what the program that uses the Library does. - - 1. You may copy and distribute verbatim copies of the Library's -complete source code as you receive it, in any medium, provided that -you conspicuously and appropriately publish on each copy an -appropriate copyright notice and disclaimer of warranty; keep intact -all the notices that refer to this License and to the absence of any -warranty; and distribute a copy of this License along with the -Library. - - You may charge a fee for the physical act of transferring a copy, -and you may at your option offer warranty protection in exchange for a -fee. - - 2. You may modify your copy or copies of the Library or any portion -of it, thus forming a work based on the Library, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) The modified work must itself be a software library. - - b) You must cause the files modified to carry prominent notices - stating that you changed the files and the date of any change. - - c) You must cause the whole of the work to be licensed at no - charge to all third parties under the terms of this License. - - d) If a facility in the modified Library refers to a function or a - table of data to be supplied by an application program that uses - the facility, other than as an argument passed when the facility - is invoked, then you must make a good faith effort to ensure that, - in the event an application does not supply such function or - table, the facility still operates, and performs whatever part of - its purpose remains meaningful. - - (For example, a function in a library to compute square roots has - a purpose that is entirely well-defined independent of the - application. Therefore, Subsection 2d requires that any - application-supplied function or table used by this function must - be optional: if the application does not supply it, the square - root function must still compute square roots.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Library, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Library, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote -it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Library. - -In addition, mere aggregation of another work not based on the Library -with the Library (or with a work based on the Library) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may opt to apply the terms of the ordinary GNU General Public -License instead of this License to a given copy of the Library. To do -this, you must alter all the notices that refer to this License, so -that they refer to the ordinary GNU General Public License, version 2, -instead of to this License. (If a newer version than version 2 of the -ordinary GNU General Public License has appeared, then you can specify -that version instead if you wish.) Do not make any other change in -these notices. - - Once this change is made in a given copy, it is irreversible for -that copy, so the ordinary GNU General Public License applies to all -subsequent copies and derivative works made from that copy. - - This option is useful when you wish to copy part of the code of -the Library into a program that is not a library. - - 4. You may copy and distribute the Library (or a portion or -derivative of it, under Section 2) in object code or executable form -under the terms of Sections 1 and 2 above provided that you accompany -it with the complete corresponding machine-readable source code, which -must be distributed under the terms of Sections 1 and 2 above on a -medium customarily used for software interchange. - - If distribution of object code is made by offering access to copy -from a designated place, then offering equivalent access to copy the -source code from the same place satisfies the requirement to -distribute the source code, even though third parties are not -compelled to copy the source along with the object code. - - 5. A program that contains no derivative of any portion of the -Library, but is designed to work with the Library by being compiled or -linked with it, is called a "work that uses the Library". Such a -work, in isolation, is not a derivative work of the Library, and -therefore falls outside the scope of this License. - - However, linking a "work that uses the Library" with the Library -creates an executable that is a derivative of the Library (because it -contains portions of the Library), rather than a "work that uses the -library". The executable is therefore covered by this License. -Section 6 states terms for distribution of such executables. - - When a "work that uses the Library" uses material from a header file -that is part of the Library, the object code for the work may be a -derivative work of the Library even though the source code is not. -Whether this is true is especially significant if the work can be -linked without the Library, or if the work is itself a library. The -threshold for this to be true is not precisely defined by law. - - If such an object file uses only numerical parameters, data -structure layouts and accessors, and small macros and small inline -functions (ten lines or less in length), then the use of the object -file is unrestricted, regardless of whether it is legally a derivative -work. (Executables containing this object code plus portions of the -Library will still fall under Section 6.) - - Otherwise, if the work is a derivative of the Library, you may -distribute the object code for the work under the terms of Section 6. -Any executables containing that work also fall under Section 6, -whether or not they are linked directly with the Library itself. - - 6. As an exception to the Sections above, you may also combine or -link a "work that uses the Library" with the Library to produce a -work containing portions of the Library, and distribute that work -under terms of your choice, provided that the terms permit -modification of the work for the customer's own use and reverse -engineering for debugging such modifications. - - You must give prominent notice with each copy of the work that the -Library is used in it and that the Library and its use are covered by -this License. You must supply a copy of this License. If the work -during execution displays copyright notices, you must include the -copyright notice for the Library among them, as well as a reference -directing the user to the copy of this License. Also, you must do one -of these things: - - a) Accompany the work with the complete corresponding - machine-readable source code for the Library including whatever - changes were used in the work (which must be distributed under - Sections 1 and 2 above); and, if the work is an executable linked - with the Library, with the complete machine-readable "work that - uses the Library", as object code and/or source code, so that the - user can modify the Library and then relink to produce a modified - executable containing the modified Library. (It is understood - that the user who changes the contents of definitions files in the - Library will not necessarily be able to recompile the application - to use the modified definitions.) - - b) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (1) uses at run time a - copy of the library already present on the user's computer system, - rather than copying library functions into the executable, and (2) - will operate properly with a modified version of the library, if - the user installs one, as long as the modified version is - interface-compatible with the version that the work was made with. - - c) Accompany the work with a written offer, valid for at - least three years, to give the same user the materials - specified in Subsection 6a, above, for a charge no more - than the cost of performing this distribution. - - d) If distribution of the work is made by offering access to copy - from a designated place, offer equivalent access to copy the above - specified materials from the same place. - - e) Verify that the user has already received a copy of these - materials or that you have already sent this user a copy. - - For an executable, the required form of the "work that uses the -Library" must include any data and utility programs needed for -reproducing the executable from it. However, as a special exception, -the materials to be distributed need not include anything that is -normally distributed (in either source or binary form) with the major -components (compiler, kernel, and so on) of the operating system on -which the executable runs, unless that component itself accompanies -the executable. - - It may happen that this requirement contradicts the license -restrictions of other proprietary libraries that do not normally -accompany the operating system. Such a contradiction means you cannot -use both them and the Library together in an executable that you -distribute. - - 7. You may place library facilities that are a work based on the -Library side-by-side in a single library together with other library -facilities not covered by this License, and distribute such a combined -library, provided that the separate distribution of the work based on -the Library and of the other library facilities is otherwise -permitted, and provided that you do these two things: - - a) Accompany the combined library with a copy of the same work - based on the Library, uncombined with any other library - facilities. This must be distributed under the terms of the - Sections above. - - b) Give prominent notice with the combined library of the fact - that part of it is a work based on the Library, and explaining - where to find the accompanying uncombined form of the same work. - - 8. You may not copy, modify, sublicense, link with, or distribute -the Library except as expressly provided under this License. Any -attempt otherwise to copy, modify, sublicense, link with, or -distribute the Library is void, and will automatically terminate your -rights under this License. However, parties who have received copies, -or rights, from you under this License will not have their licenses -terminated so long as such parties remain in full compliance. - - 9. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Library or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Library (or any work based on the -Library), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Library or works based on it. - - 10. Each time you redistribute the Library (or any work based on the -Library), the recipient automatically receives a license from the -original licensor to copy, distribute, link with or modify the Library -subject to these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties with -this License. - - 11. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Library at all. For example, if a patent -license would not permit royalty-free redistribution of the Library by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Library. - -If any portion of this section is held invalid or unenforceable under any -particular circumstance, the balance of the section is intended to apply, -and the section as a whole is intended to apply in other circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 12. If the distribution and/or use of the Library is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Library under this License may add -an explicit geographical distribution limitation excluding those countries, -so that distribution is permitted only in or among countries not thus -excluded. In such case, this License incorporates the limitation as if -written in the body of this License. - - 13. The Free Software Foundation may publish revised and/or new -versions of the Lesser General Public License from time to time. -Such new versions will be similar in spirit to the present version, -but may differ in detail to address new problems or concerns. - -Each version is given a distinguishing version number. If the Library -specifies a version number of this License which applies to it and -"any later version", you have the option of following the terms and -conditions either of that version or of any later version published by -the Free Software Foundation. If the Library does not specify a -license version number, you may choose any version ever published by -the Free Software Foundation. - - 14. If you wish to incorporate parts of the Library into other free -programs whose distribution conditions are incompatible with these, -write to the author to ask for permission. For software which is -copyrighted by the Free Software Foundation, write to the Free -Software Foundation; we sometimes make exceptions for this. Our -decision will be guided by the two goals of preserving the free status -of all derivatives of our free software and of promoting the sharing -and reuse of software generally. - - NO WARRANTY - - 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO -WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. -EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR -OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY -KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE -LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME -THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN -WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY -AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU -FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR -CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE -LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING -RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A -FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF -SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH -DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Libraries - - If you develop a new library, and you want it to be of the greatest -possible use to the public, we recommend making it free software that -everyone can redistribute and change. You can do so by permitting -redistribution under these terms (or, alternatively, under the terms of the -ordinary General Public License). - - To apply these terms, attach the following notices to the library. It is -safest to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least the -"copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -Also add information on how to contact you by electronic and paper mail. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the library, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the - library `Frob' (a library for tweaking knobs) written by James Random Hacker. - - , 1 April 1990 - Ty Coon, President of Vice - -That's all there is to it! - - diff --git a/src/phputf8/README b/src/phputf8/README deleted file mode 100644 index 6c309054..00000000 --- a/src/phputf8/README +++ /dev/null @@ -1,82 +0,0 @@ -++PHP UTF-8++ - -Version 0.5 - -++DOCUMENTATION++ - -Documentation in progress in ./docs dir - -http://www.phpwact.org/php/i18n/charsets -http://www.phpwact.org/php/i18n/utf-8 - -Important Note: DO NOT use these functions without understanding WHY -you are using them. In particular, do not blindly replace all use of PHP's -string functions which functions found here - most of the time you will -not need to, and you will be introducing a significant performance -overhead to your application. You can get a good idea of when to use what -from reading: http://www.phpwact.org/php/i18n/utf-8 - -Important Note: For sake of performance most of the functions here are -not "defensive" (e.g. there is not extensive parameter checking, well -formed UTF-8 is assumed). This is particularily relevant when is comes to -catching badly formed UTF-8 - you should screen input on the "outer -perimeter" with help from functions in the utf8_validation.php and -utf8_bad.php files. - -Important Note: this library treats ALL ASCII characters as valid, including ASCII control characters. But if you use some ASCII control characters in XML, it will render the XML ill-formed. Don't be a bozo: http://hsivonen.iki.fi/producing-xml/#controlchar - -++BUGS / SUPPORT / FEATURE REQUESTS ++ - -Please report bugs to: -http://sourceforge.net/tracker/?group_id=142846&atid=753842 -- if you are able, please submit a failing unit test -(http://www.lastcraft.com/simple_test.php) with your bug report. - -For feature requests / faster implementation of functions found here, -please drop them in via the RFE tracker: http://sourceforge.net/tracker/?group_id=142846&atid=753845 -Particularily interested in faster implementations! - -For general support / help, use: -http://sourceforge.net/tracker/?group_id=142846&atid=753843 - -In the VERY WORST case, you can email me: hfuecks gmail com - I tend to be slow to respond though so be warned. - -Important Note: when reporting bugs, please provide the following -information; - -PHP version, whether the iconv extension is loaded (in PHP5 it's -there by default), whether the mbstring extension is loaded. The -following PHP script can be used to determine this information; - -"; -if ( extension_loaded('mbstring') ) { - print "mbstring available
"; -} else { - print "mbstring not available
"; -} -if ( extension_loaded('iconv') ) { - print "iconv available
"; -} else { - print "iconv not available
"; -} -?> - -++LICENSING++ - -Parts of the code in this library come from other places, under different -licenses. -The authors involved have been contacted (see below). Attribution for -which code came from elsewhere can be found in the source code itself. - -+Andreas Gohr / Chris Smith - Dokuwiki -There is a fair degree of collaboration / exchange of ideas and code -beteen Dokuwiki's UTF-8 library; -http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php -and phputf8. Although Dokuwiki is released under GPL, its UTF-8 -library is released under LGPL, hence no conflict with phputf8 - -+Henri Sivonen (http://hsivonen.iki.fi/php-utf8/ / -http://hsivonen.iki.fi/php-utf8/) has also given permission for his -code to be released under the terms of the LGPL. He ported a Unicode / UTF-8 -converter from the Mozilla codebase to PHP, which is re-used in phputf8 diff --git a/src/phputf8/mbstring/core.php b/src/phputf8/mbstring/core.php deleted file mode 100644 index 559d3e7f..00000000 --- a/src/phputf8/mbstring/core.php +++ /dev/null @@ -1,139 +0,0 @@ - -* @link http://www.php.net/manual/en/function.strlen.php -* @link http://www.php.net/manual/en/function.utf8-decode.php -* @param string UTF-8 string -* @return int number of UTF-8 characters in string -* @package utf8 -*/ -function utf8_strlen($str) -{ - return strlen(utf8_decode($str)); -} - - -//-------------------------------------------------------------------- -/** -* UTF-8 aware alternative to strpos -* Find position of first occurrence of a string -* Note: This will get alot slower if offset is used -* Note: requires utf8_strlen amd utf8_substr to be loaded -* @param string haystack -* @param string needle (you should validate this with utf8_is_valid) -* @param integer offset in characters (from left) -* @return mixed integer position or FALSE on failure -* @see http://www.php.net/strpos -* @see utf8_strlen -* @see utf8_substr -* @package utf8 -*/ -function utf8_strpos($str, $needle, $offset = null) -{ - if (is_null($offset)) { - $ar = explode($needle, $str, 2); - if (count($ar) > 1) { - return utf8_strlen($ar[0]); - } - return false; - } else { - if (!is_int($offset)) { - trigger_error('utf8_strpos: Offset must be an integer', E_USER_ERROR); - return false; - } - - $str = utf8_substr($str, $offset); - - if (false !== ($pos = utf8_strpos($str, $needle))) { - return $pos + $offset; - } - - return false; - } -} - -//-------------------------------------------------------------------- -/** -* UTF-8 aware alternative to strrpos -* Find position of last occurrence of a char in a string -* Note: This will get alot slower if offset is used -* Note: requires utf8_substr and utf8_strlen to be loaded -* @param string haystack -* @param string needle (you should validate this with utf8_is_valid) -* @param integer (optional) offset (from left) -* @return mixed integer position or FALSE on failure -* @see http://www.php.net/strrpos -* @see utf8_substr -* @see utf8_strlen -* @package utf8 -*/ -function utf8_strrpos($str, $needle, $offset = null) -{ - if (is_null($offset)) { - $ar = explode($needle, $str); - - if (count($ar) > 1) { - // Pop off the end of the string where the last match was made - array_pop($ar); - $str = join($needle, $ar); - return utf8_strlen($str); - } - return false; - } else { - if (!is_int($offset)) { - trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_WARNING); - return false; - } - - $str = utf8_substr($str, $offset); - - if (false !== ($pos = utf8_strrpos($str, $needle))) { - return $pos + $offset; - } - - return false; - } -} - -//-------------------------------------------------------------------- -/** -* UTF-8 aware alternative to substr -* Return part of a string given character offset (and optionally length) -* -* Note arguments: comparied to substr - if offset or length are -* not integers, this version will not complain but rather massages them -* into an integer. -* -* Note on returned values: substr documentation states false can be -* returned in some cases (e.g. offset > string length) -* mb_substr never returns false, it will return an empty string instead. -* This adopts the mb_substr approach -* -* Note on implementation: PCRE only supports repetitions of less than -* 65536, in order to accept up to MAXINT values for offset and length, -* we'll repeat a group of 65535 characters when needed. -* -* Note on implementation: calculating the number of characters in the -* string is a relatively expensive operation, so we only carry it out when -* necessary. It isn't necessary for +ve offsets and no specified length -* -* @author Chris Smith -* @param string -* @param integer number of UTF-8 characters offset (from left) -* @param integer (optional) length in UTF-8 characters from offset -* @return mixed string or FALSE if failure -* @package utf8 -*/ -function utf8_substr($str, $offset, $length = null) -{ - // generates E_NOTICE - // for PHP4 objects, but not PHP5 objects - $str = (string)$str; - $offset = (int)$offset; - if (!is_null($length)) { - $length = (int)$length; - } - - // handle trivial cases - if ($length === 0) { - return ''; - } - if ($offset < 0 && $length < 0 && $length < $offset) { - return ''; - } - - // normalise negative offsets (we could use a tail - // anchored pattern, but they are horribly slow!) - if ($offset < 0) { - // see notes - $strlen = strlen(utf8_decode($str)); - $offset = $strlen + $offset; - if ($offset < 0) { - $offset = 0; - } - } - - $Op = ''; - $Lp = ''; - - // establish a pattern for offset, a - // non-captured group equal in length to offset - if ($offset > 0) { - $Ox = (int)($offset / 65535); - $Oy = $offset % 65535; - - if ($Ox) { - $Op = '(?:.{65535}){' . $Ox . '}'; - } - - $Op = '^(?:' . $Op . '.{' . $Oy . '})'; - } else { - // offset == 0; just anchor the pattern - $Op = '^'; - } - - // establish a pattern for length - if (is_null($length)) { - // the rest of the string - $Lp = '(.*)$'; - } else { - if (!isset($strlen)) { - // see notes - $strlen = strlen(utf8_decode($str)); - } - - // another trivial case - if ($offset > $strlen) { - return ''; - } - - if ($length > 0) { - // reduce any length that would - // go passed the end of the string - $length = min($strlen - $offset, $length); - - $Lx = (int)($length / 65535); - $Ly = $length % 65535; - - // negative length requires a captured group - // of length characters - if ($Lx) { - $Lp = '(?:.{65535}){' . $Lx . '}'; - } - $Lp = '(' . $Lp . '.{' . $Ly . '})'; - } elseif ($length < 0) { - if ($length < ($offset - $strlen)) { - return ''; - } - - $Lx = (int)((-$length) / 65535); - $Ly = (-$length) % 65535; - - // negative length requires ... capture everything - // except a group of -length characters - // anchored at the tail-end of the string - if ($Lx) { - $Lp = '(?:.{65535}){' . $Lx . '}'; - } - $Lp = '(.*)(?:' . $Lp . '.{' . $Ly . '})$'; - } - } - - if (!preg_match('#' . $Op . $Lp . '#us', $str, $match)) { - return ''; - } - - return $match[1]; -} - -//--------------------------------------------------------------- -/** -* UTF-8 aware alternative to strtolower -* Make a string lowercase -* Note: The concept of a characters "case" only exists is some alphabets -* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does -* not exist in the Chinese alphabet, for example. See Unicode Standard -* Annex #21: Case Mappings -* Note: requires utf8_to_unicode and utf8_from_unicode -* @author Andreas Gohr -* @param string -* @return mixed either string in lowercase or FALSE is UTF-8 invalid -* @see http://www.php.net/strtolower -* @see utf8_to_unicode -* @see utf8_from_unicode -* @see http://www.unicode.org/reports/tr21/tr21-5.html -* @see http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php -* @package utf8 -*/ -function utf8_strtolower($string) -{ - static $UTF8_UPPER_TO_LOWER = null; - - if (is_null($UTF8_UPPER_TO_LOWER)) { - $UTF8_UPPER_TO_LOWER = [ - 0x0041 => 0x0061, 0x03A6 => 0x03C6, 0x0162 => 0x0163, 0x00C5 => 0x00E5, 0x0042 => 0x0062, - 0x0139 => 0x013A, 0x00C1 => 0x00E1, 0x0141 => 0x0142, 0x038E => 0x03CD, 0x0100 => 0x0101, - 0x0490 => 0x0491, 0x0394 => 0x03B4, 0x015A => 0x015B, 0x0044 => 0x0064, 0x0393 => 0x03B3, - 0x00D4 => 0x00F4, 0x042A => 0x044A, 0x0419 => 0x0439, 0x0112 => 0x0113, 0x041C => 0x043C, - 0x015E => 0x015F, 0x0143 => 0x0144, 0x00CE => 0x00EE, 0x040E => 0x045E, 0x042F => 0x044F, - 0x039A => 0x03BA, 0x0154 => 0x0155, 0x0049 => 0x0069, 0x0053 => 0x0073, 0x1E1E => 0x1E1F, - 0x0134 => 0x0135, 0x0427 => 0x0447, 0x03A0 => 0x03C0, 0x0418 => 0x0438, 0x00D3 => 0x00F3, - 0x0420 => 0x0440, 0x0404 => 0x0454, 0x0415 => 0x0435, 0x0429 => 0x0449, 0x014A => 0x014B, - 0x0411 => 0x0431, 0x0409 => 0x0459, 0x1E02 => 0x1E03, 0x00D6 => 0x00F6, 0x00D9 => 0x00F9, - 0x004E => 0x006E, 0x0401 => 0x0451, 0x03A4 => 0x03C4, 0x0423 => 0x0443, 0x015C => 0x015D, - 0x0403 => 0x0453, 0x03A8 => 0x03C8, 0x0158 => 0x0159, 0x0047 => 0x0067, 0x00C4 => 0x00E4, - 0x0386 => 0x03AC, 0x0389 => 0x03AE, 0x0166 => 0x0167, 0x039E => 0x03BE, 0x0164 => 0x0165, - 0x0116 => 0x0117, 0x0108 => 0x0109, 0x0056 => 0x0076, 0x00DE => 0x00FE, 0x0156 => 0x0157, - 0x00DA => 0x00FA, 0x1E60 => 0x1E61, 0x1E82 => 0x1E83, 0x00C2 => 0x00E2, 0x0118 => 0x0119, - 0x0145 => 0x0146, 0x0050 => 0x0070, 0x0150 => 0x0151, 0x042E => 0x044E, 0x0128 => 0x0129, - 0x03A7 => 0x03C7, 0x013D => 0x013E, 0x0422 => 0x0442, 0x005A => 0x007A, 0x0428 => 0x0448, - 0x03A1 => 0x03C1, 0x1E80 => 0x1E81, 0x016C => 0x016D, 0x00D5 => 0x00F5, 0x0055 => 0x0075, - 0x0176 => 0x0177, 0x00DC => 0x00FC, 0x1E56 => 0x1E57, 0x03A3 => 0x03C3, 0x041A => 0x043A, - 0x004D => 0x006D, 0x016A => 0x016B, 0x0170 => 0x0171, 0x0424 => 0x0444, 0x00CC => 0x00EC, - 0x0168 => 0x0169, 0x039F => 0x03BF, 0x004B => 0x006B, 0x00D2 => 0x00F2, 0x00C0 => 0x00E0, - 0x0414 => 0x0434, 0x03A9 => 0x03C9, 0x1E6A => 0x1E6B, 0x00C3 => 0x00E3, 0x042D => 0x044D, - 0x0416 => 0x0436, 0x01A0 => 0x01A1, 0x010C => 0x010D, 0x011C => 0x011D, 0x00D0 => 0x00F0, - 0x013B => 0x013C, 0x040F => 0x045F, 0x040A => 0x045A, 0x00C8 => 0x00E8, 0x03A5 => 0x03C5, - 0x0046 => 0x0066, 0x00DD => 0x00FD, 0x0043 => 0x0063, 0x021A => 0x021B, 0x00CA => 0x00EA, - 0x0399 => 0x03B9, 0x0179 => 0x017A, 0x00CF => 0x00EF, 0x01AF => 0x01B0, 0x0045 => 0x0065, - 0x039B => 0x03BB, 0x0398 => 0x03B8, 0x039C => 0x03BC, 0x040C => 0x045C, 0x041F => 0x043F, - 0x042C => 0x044C, 0x00DE => 0x00FE, 0x00D0 => 0x00F0, 0x1EF2 => 0x1EF3, 0x0048 => 0x0068, - 0x00CB => 0x00EB, 0x0110 => 0x0111, 0x0413 => 0x0433, 0x012E => 0x012F, 0x00C6 => 0x00E6, - 0x0058 => 0x0078, 0x0160 => 0x0161, 0x016E => 0x016F, 0x0391 => 0x03B1, 0x0407 => 0x0457, - 0x0172 => 0x0173, 0x0178 => 0x00FF, 0x004F => 0x006F, 0x041B => 0x043B, 0x0395 => 0x03B5, - 0x0425 => 0x0445, 0x0120 => 0x0121, 0x017D => 0x017E, 0x017B => 0x017C, 0x0396 => 0x03B6, - 0x0392 => 0x03B2, 0x0388 => 0x03AD, 0x1E84 => 0x1E85, 0x0174 => 0x0175, 0x0051 => 0x0071, - 0x0417 => 0x0437, 0x1E0A => 0x1E0B, 0x0147 => 0x0148, 0x0104 => 0x0105, 0x0408 => 0x0458, - 0x014C => 0x014D, 0x00CD => 0x00ED, 0x0059 => 0x0079, 0x010A => 0x010B, 0x038F => 0x03CE, - 0x0052 => 0x0072, 0x0410 => 0x0430, 0x0405 => 0x0455, 0x0402 => 0x0452, 0x0126 => 0x0127, - 0x0136 => 0x0137, 0x012A => 0x012B, 0x038A => 0x03AF, 0x042B => 0x044B, 0x004C => 0x006C, - 0x0397 => 0x03B7, 0x0124 => 0x0125, 0x0218 => 0x0219, 0x00DB => 0x00FB, 0x011E => 0x011F, - 0x041E => 0x043E, 0x1E40 => 0x1E41, 0x039D => 0x03BD, 0x0106 => 0x0107, 0x03AB => 0x03CB, - 0x0426 => 0x0446, 0x00DE => 0x00FE, 0x00C7 => 0x00E7, 0x03AA => 0x03CA, 0x0421 => 0x0441, - 0x0412 => 0x0432, 0x010E => 0x010F, 0x00D8 => 0x00F8, 0x0057 => 0x0077, 0x011A => 0x011B, - 0x0054 => 0x0074, 0x004A => 0x006A, 0x040B => 0x045B, 0x0406 => 0x0456, 0x0102 => 0x0103, - 0x039B => 0x03BB, 0x00D1 => 0x00F1, 0x041D => 0x043D, 0x038C => 0x03CC, 0x00C9 => 0x00E9, - 0x00D0 => 0x00F0, 0x0407 => 0x0457, 0x0122 => 0x0123, - ]; - } - - $uni = utf8_to_unicode($string); - - if (!$uni) { - return false; - } - - $cnt = count($uni); - for ($i = 0; $i < $cnt; $i++) { - if (isset($UTF8_UPPER_TO_LOWER[$uni[$i]])) { - $uni[$i] = $UTF8_UPPER_TO_LOWER[$uni[$i]]; - } - } - - return utf8_from_unicode($uni); -} - -//--------------------------------------------------------------- -/** -* UTF-8 aware alternative to strtoupper -* Make a string uppercase -* Note: The concept of a characters "case" only exists is some alphabets -* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does -* not exist in the Chinese alphabet, for example. See Unicode Standard -* Annex #21: Case Mappings -* Note: requires utf8_to_unicode and utf8_from_unicode -* @author Andreas Gohr -* @param string -* @return mixed either string in lowercase or FALSE is UTF-8 invalid -* @see http://www.php.net/strtoupper -* @see utf8_to_unicode -* @see utf8_from_unicode -* @see http://www.unicode.org/reports/tr21/tr21-5.html -* @see http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php -* @package utf8 -*/ -function utf8_strtoupper($string) -{ - static $UTF8_LOWER_TO_UPPER = null; - - if (is_null($UTF8_LOWER_TO_UPPER)) { - $UTF8_LOWER_TO_UPPER = [ - 0x0061 => 0x0041, 0x03C6 => 0x03A6, 0x0163 => 0x0162, 0x00E5 => 0x00C5, 0x0062 => 0x0042, - 0x013A => 0x0139, 0x00E1 => 0x00C1, 0x0142 => 0x0141, 0x03CD => 0x038E, 0x0101 => 0x0100, - 0x0491 => 0x0490, 0x03B4 => 0x0394, 0x015B => 0x015A, 0x0064 => 0x0044, 0x03B3 => 0x0393, - 0x00F4 => 0x00D4, 0x044A => 0x042A, 0x0439 => 0x0419, 0x0113 => 0x0112, 0x043C => 0x041C, - 0x015F => 0x015E, 0x0144 => 0x0143, 0x00EE => 0x00CE, 0x045E => 0x040E, 0x044F => 0x042F, - 0x03BA => 0x039A, 0x0155 => 0x0154, 0x0069 => 0x0049, 0x0073 => 0x0053, 0x1E1F => 0x1E1E, - 0x0135 => 0x0134, 0x0447 => 0x0427, 0x03C0 => 0x03A0, 0x0438 => 0x0418, 0x00F3 => 0x00D3, - 0x0440 => 0x0420, 0x0454 => 0x0404, 0x0435 => 0x0415, 0x0449 => 0x0429, 0x014B => 0x014A, - 0x0431 => 0x0411, 0x0459 => 0x0409, 0x1E03 => 0x1E02, 0x00F6 => 0x00D6, 0x00F9 => 0x00D9, - 0x006E => 0x004E, 0x0451 => 0x0401, 0x03C4 => 0x03A4, 0x0443 => 0x0423, 0x015D => 0x015C, - 0x0453 => 0x0403, 0x03C8 => 0x03A8, 0x0159 => 0x0158, 0x0067 => 0x0047, 0x00E4 => 0x00C4, - 0x03AC => 0x0386, 0x03AE => 0x0389, 0x0167 => 0x0166, 0x03BE => 0x039E, 0x0165 => 0x0164, - 0x0117 => 0x0116, 0x0109 => 0x0108, 0x0076 => 0x0056, 0x00FE => 0x00DE, 0x0157 => 0x0156, - 0x00FA => 0x00DA, 0x1E61 => 0x1E60, 0x1E83 => 0x1E82, 0x00E2 => 0x00C2, 0x0119 => 0x0118, - 0x0146 => 0x0145, 0x0070 => 0x0050, 0x0151 => 0x0150, 0x044E => 0x042E, 0x0129 => 0x0128, - 0x03C7 => 0x03A7, 0x013E => 0x013D, 0x0442 => 0x0422, 0x007A => 0x005A, 0x0448 => 0x0428, - 0x03C1 => 0x03A1, 0x1E81 => 0x1E80, 0x016D => 0x016C, 0x00F5 => 0x00D5, 0x0075 => 0x0055, - 0x0177 => 0x0176, 0x00FC => 0x00DC, 0x1E57 => 0x1E56, 0x03C3 => 0x03A3, 0x043A => 0x041A, - 0x006D => 0x004D, 0x016B => 0x016A, 0x0171 => 0x0170, 0x0444 => 0x0424, 0x00EC => 0x00CC, - 0x0169 => 0x0168, 0x03BF => 0x039F, 0x006B => 0x004B, 0x00F2 => 0x00D2, 0x00E0 => 0x00C0, - 0x0434 => 0x0414, 0x03C9 => 0x03A9, 0x1E6B => 0x1E6A, 0x00E3 => 0x00C3, 0x044D => 0x042D, - 0x0436 => 0x0416, 0x01A1 => 0x01A0, 0x010D => 0x010C, 0x011D => 0x011C, 0x00F0 => 0x00D0, - 0x013C => 0x013B, 0x045F => 0x040F, 0x045A => 0x040A, 0x00E8 => 0x00C8, 0x03C5 => 0x03A5, - 0x0066 => 0x0046, 0x00FD => 0x00DD, 0x0063 => 0x0043, 0x021B => 0x021A, 0x00EA => 0x00CA, - 0x03B9 => 0x0399, 0x017A => 0x0179, 0x00EF => 0x00CF, 0x01B0 => 0x01AF, 0x0065 => 0x0045, - 0x03BB => 0x039B, 0x03B8 => 0x0398, 0x03BC => 0x039C, 0x045C => 0x040C, 0x043F => 0x041F, - 0x044C => 0x042C, 0x00FE => 0x00DE, 0x00F0 => 0x00D0, 0x1EF3 => 0x1EF2, 0x0068 => 0x0048, - 0x00EB => 0x00CB, 0x0111 => 0x0110, 0x0433 => 0x0413, 0x012F => 0x012E, 0x00E6 => 0x00C6, - 0x0078 => 0x0058, 0x0161 => 0x0160, 0x016F => 0x016E, 0x03B1 => 0x0391, 0x0457 => 0x0407, - 0x0173 => 0x0172, 0x00FF => 0x0178, 0x006F => 0x004F, 0x043B => 0x041B, 0x03B5 => 0x0395, - 0x0445 => 0x0425, 0x0121 => 0x0120, 0x017E => 0x017D, 0x017C => 0x017B, 0x03B6 => 0x0396, - 0x03B2 => 0x0392, 0x03AD => 0x0388, 0x1E85 => 0x1E84, 0x0175 => 0x0174, 0x0071 => 0x0051, - 0x0437 => 0x0417, 0x1E0B => 0x1E0A, 0x0148 => 0x0147, 0x0105 => 0x0104, 0x0458 => 0x0408, - 0x014D => 0x014C, 0x00ED => 0x00CD, 0x0079 => 0x0059, 0x010B => 0x010A, 0x03CE => 0x038F, - 0x0072 => 0x0052, 0x0430 => 0x0410, 0x0455 => 0x0405, 0x0452 => 0x0402, 0x0127 => 0x0126, - 0x0137 => 0x0136, 0x012B => 0x012A, 0x03AF => 0x038A, 0x044B => 0x042B, 0x006C => 0x004C, - 0x03B7 => 0x0397, 0x0125 => 0x0124, 0x0219 => 0x0218, 0x00FB => 0x00DB, 0x011F => 0x011E, - 0x043E => 0x041E, 0x1E41 => 0x1E40, 0x03BD => 0x039D, 0x0107 => 0x0106, 0x03CB => 0x03AB, - 0x0446 => 0x0426, 0x00FE => 0x00DE, 0x00E7 => 0x00C7, 0x03CA => 0x03AA, 0x0441 => 0x0421, - 0x0432 => 0x0412, 0x010F => 0x010E, 0x00F8 => 0x00D8, 0x0077 => 0x0057, 0x011B => 0x011A, - 0x0074 => 0x0054, 0x006A => 0x004A, 0x045B => 0x040B, 0x0456 => 0x0406, 0x0103 => 0x0102, - 0x03BB => 0x039B, 0x00F1 => 0x00D1, 0x043D => 0x041D, 0x03CC => 0x038C, 0x00E9 => 0x00C9, - 0x00F0 => 0x00D0, 0x0457 => 0x0407, 0x0123 => 0x0122, - ]; - } - - $uni = utf8_to_unicode($string); - - if (!$uni) { - return false; - } - - $cnt = count($uni); - for ($i = 0; $i < $cnt; $i++) { - if (isset($UTF8_LOWER_TO_UPPER[$uni[$i]])) { - $uni[$i] = $UTF8_LOWER_TO_UPPER[$uni[$i]]; - } - } - - return utf8_from_unicode($uni); -} diff --git a/src/phputf8/ord.php b/src/phputf8/ord.php deleted file mode 100644 index f35afdb6..00000000 --- a/src/phputf8/ord.php +++ /dev/null @@ -1,94 +0,0 @@ -= 0 && $ord0 <= 127) { - return $ord0; - } - - if (!isset($chr[1])) { - trigger_error('Short sequence - at least 2 bytes expected, only 1 seen'); - return false; - } - - $ord1 = ord($chr[1]); - if ($ord0 >= 192 && $ord0 <= 223) { - return ($ord0 - 192) * 64 - + ($ord1 - 128); - } - - if (!isset($chr[2])) { - trigger_error('Short sequence - at least 3 bytes expected, only 2 seen'); - return false; - } - $ord2 = ord($chr[2]); - if ($ord0 >= 224 && $ord0 <= 239) { - return ($ord0 - 224) * 4096 - + ($ord1 - 128) * 64 - + ($ord2 - 128); - } - - if (!isset($chr[3])) { - trigger_error('Short sequence - at least 4 bytes expected, only 3 seen'); - return false; - } - $ord3 = ord($chr[3]); - if ($ord0 >= 240 && $ord0 <= 247) { - return ($ord0 - 240) * 262144 - + ($ord1 - 128) * 4096 - + ($ord2 - 128) * 64 - + ($ord3 - 128); - } - - if (!isset($chr[4])) { - trigger_error('Short sequence - at least 5 bytes expected, only 4 seen'); - return false; - } - $ord4 = ord($chr[4]); - if ($ord0 >= 248 && $ord0 <= 251) { - return ($ord0 - 248) * 16777216 - + ($ord1 - 128) * 262144 - + ($ord2 - 128) * 4096 - + ($ord3 - 128) * 64 - + ($ord4 - 128); - } - - if (!isset($chr[5])) { - trigger_error('Short sequence - at least 6 bytes expected, only 5 seen'); - return false; - } - if ($ord0 >= 252 && $ord0 <= 253) { - return ($ord0 - 252) * 1073741824 - + ($ord1 - 128) * 16777216 - + ($ord2 - 128) * 262144 - + ($ord3 - 128) * 4096 - + ($ord4 - 128) * 64 - + (ord($chr[5]) - 128); - } - - if ($ord0 >= 254 && $ord0 <= 255) { - trigger_error('Invalid UTF-8 with surrogate ordinal ' . $ord0); - return false; - } -} diff --git a/src/phputf8/str_ireplace.php b/src/phputf8/str_ireplace.php deleted file mode 100644 index 2ed3db02..00000000 --- a/src/phputf8/str_ireplace.php +++ /dev/null @@ -1,62 +0,0 @@ - -* @param string $input -* @param int $length -* @param string $padStr -* @param int $type ( same constants as str_pad ) -* @return string -* @see http://www.php.net/str_pad -* @see utf8_substr -* @package utf8 -*/ -function utf8_str_pad($input, $length, $padStr = ' ', $type = STR_PAD_RIGHT) -{ - $inputLen = utf8_strlen($input); - if ($length <= $inputLen) { - return $input; - } - - $padStrLen = utf8_strlen($padStr); - $padLen = $length - $inputLen; - - if ($type == STR_PAD_RIGHT) { - $repeatTimes = ceil($padLen / $padStrLen); - return utf8_substr($input . str_repeat($padStr, $repeatTimes), 0, $length); - } - - if ($type == STR_PAD_LEFT) { - $repeatTimes = ceil($padLen / $padStrLen); - return utf8_substr(str_repeat($padStr, $repeatTimes), 0, floor($padLen)) . $input; - } - - if ($type == STR_PAD_BOTH) { - $padLen /= 2; - $padAmountLeft = floor($padLen); - $padAmountRight = ceil($padLen); - $repeatTimesLeft = ceil($padAmountLeft / $padStrLen); - $repeatTimesRight = ceil($padAmountRight / $padStrLen); - - $paddingLeft = utf8_substr(str_repeat($padStr, $repeatTimesLeft), 0, $padAmountLeft); - $paddingRight = utf8_substr(str_repeat($padStr, $repeatTimesRight), 0, $padAmountLeft); - return $paddingLeft . $input . $paddingRight; - } - - trigger_error('utf8_str_pad: Unknown padding type (' . $type . ')', E_USER_ERROR); -} diff --git a/src/phputf8/str_split.php b/src/phputf8/str_split.php deleted file mode 100644 index ba94f6b2..00000000 --- a/src/phputf8/str_split.php +++ /dev/null @@ -1,32 +0,0 @@ - -* @see http://www.php.net/ltrim -* @see http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php -* @return string -* @package utf8 -*/ -function utf8_ltrim($str, $charlist = false) -{ - if ($charlist === false) { - return ltrim($str); - } - - //quote charlist for use in a characterclass - $charlist = preg_replace('!([\\\\\\-\\]\\[/^])!', '\\\${1}', $charlist); - - return preg_replace('/^[' . $charlist . ']+/u', '', $str); -} - -//--------------------------------------------------------------- -/** -* UTF-8 aware replacement for rtrim() -* Note: you only need to use this if you are supplying the charlist -* optional arg and it contains UTF-8 characters. Otherwise rtrim will -* work normally on a UTF-8 string -* @author Andreas Gohr -* @see http://www.php.net/rtrim -* @see http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php -* @return string -* @package utf8 -*/ -function utf8_rtrim($str, $charlist = false) -{ - if ($charlist === false) { - return rtrim($str); - } - - //quote charlist for use in a characterclass - $charlist = preg_replace('!([\\\\\\-\\]\\[/^])!', '\\\${1}', $charlist); - - return preg_replace('/[' . $charlist . ']+$/u', '', $str); -} - -//--------------------------------------------------------------- -/** -* UTF-8 aware replacement for trim() -* Note: you only need to use this if you are supplying the charlist -* optional arg and it contains UTF-8 characters. Otherwise trim will -* work normally on a UTF-8 string -* @author Andreas Gohr -* @see http://www.php.net/trim -* @see http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php -* @return string -* @package utf8 -*/ -function utf8_trim($str, $charlist = false) -{ - if ($charlist === false) { - return trim($str); - } - return utf8_ltrim(utf8_rtrim($str, $charlist), $charlist); -} diff --git a/src/phputf8/ucfirst.php b/src/phputf8/ucfirst.php deleted file mode 100644 index 4ccfd1d0..00000000 --- a/src/phputf8/ucfirst.php +++ /dev/null @@ -1,32 +0,0 @@ - -* if ( utf8_is_ascii($someString) ) { -* // It's just ASCII - use the native PHP version -* $someString = strtolower($someString); -* } else { -* $someString = utf8_strtolower($someString); -* } -* -* -* @param string -* @return boolean TRUE if it's all ASCII -* @package utf8 -* @see utf8_is_ascii_ctrl -*/ -function utf8_is_ascii($str) -{ - // Search for any bytes which are outside the ASCII range... - return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1); -} - -//-------------------------------------------------------------------- -/** -* Tests whether a string contains only 7bit ASCII bytes with device -* control codes omitted. The device control codes can be found on the -* second table here: http://www.w3schools.com/tags/ref_ascii.asp -* -* @param string -* @return boolean TRUE if it's all ASCII without device control codes -* @package utf8 -* @see utf8_is_ascii -*/ -function utf8_is_ascii_ctrl($str) -{ - if (strlen($str) > 0) { - // Search for any bytes which are outside the ASCII range, - // or are device control codes - return (preg_match('/[^\x09\x0A\x0D\x20-\x7E]/', $str) !== 1); - } - return false; -} - -//-------------------------------------------------------------------- -/** -* Strip out all non-7bit ASCII bytes -* If you need to transmit a string to system which you know can only -* support 7bit ASCII, you could use this function. -* @param string -* @return string with non ASCII bytes removed -* @package utf8 -* @see utf8_strip_non_ascii_ctrl -*/ -function utf8_strip_non_ascii($str) -{ - ob_start(); - while ( - preg_match( - '/^([\x00-\x7F]+)|([^\x00-\x7F]+)/S', - $str, - $matches - ) - ) { - if (!isset($matches[2])) { - echo $matches[0]; - } - $str = substr($str, strlen($matches[0])); - } - $result = ob_get_contents(); - ob_end_clean(); - return $result; -} - -//-------------------------------------------------------------------- -/** -* Strip out device control codes in the ASCII range -* which are not permitted in XML. Note that this leaves -* multi-byte characters untouched - it only removes device -* control codes -* @see http://hsivonen.iki.fi/producing-xml/#controlchar -* @param string -* @return string control codes removed -*/ -function utf8_strip_ascii_ctrl($str) -{ - ob_start(); - while ( - preg_match( - '/^([^\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+)|([\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+)/S', - $str, - $matches - ) - ) { - if (!isset($matches[2])) { - echo $matches[0]; - } - $str = substr($str, strlen($matches[0])); - } - $result = ob_get_contents(); - ob_end_clean(); - return $result; -} - -//-------------------------------------------------------------------- -/** -* Strip out all non 7bit ASCII bytes and ASCII device control codes. -* For a list of ASCII device control codes see the 2nd table here: -* http://www.w3schools.com/tags/ref_ascii.asp -* -* @param string -* @return boolean TRUE if it's all ASCII -* @package utf8 -*/ -function utf8_strip_non_ascii_ctrl($str) -{ - ob_start(); - while ( - preg_match( - '/^([\x09\x0A\x0D\x20-\x7E]+)|([^\x09\x0A\x0D\x20-\x7E]+)/S', - $str, - $matches - ) - ) { - if (!isset($matches[2])) { - echo $matches[0]; - } - $str = substr($str, strlen($matches[0])); - } - $result = ob_get_contents(); - ob_end_clean(); - return $result; -} - -//--------------------------------------------------------------- -/** -* Replace accented UTF-8 characters by unaccented ASCII-7 "equivalents". -* The purpose of this function is to replace characters commonly found in Latin -* alphabets with something more or less equivalent from the ASCII range. This can -* be useful for converting a UTF-8 to something ready for a filename, for example. -* Following the use of this function, you would probably also pass the string -* through utf8_strip_non_ascii to clean out any other non-ASCII chars -* Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) -* letters. Default is to deaccent both cases ($case = 0) -* -* For a more complete implementation of transliteration, see the utf8_to_ascii package -* available from the phputf8 project downloads: -* http://prdownloads.sourceforge.net/phputf8 -* -* @param string UTF-8 string -* @param int (optional) -1 lowercase only, +1 uppercase only, 1 both cases -* @param string UTF-8 with accented characters replaced by ASCII chars -* @return string accented chars replaced with ascii equivalents -* @author Andreas Gohr -* @package utf8 -*/ -function utf8_accents_to_ascii($str, $case = 0) -{ - static $UTF8_LOWER_ACCENTS = null; - static $UTF8_UPPER_ACCENTS = null; - - if ($case <= 0) { - if (is_null($UTF8_LOWER_ACCENTS)) { - $UTF8_LOWER_ACCENTS = [ - 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', - 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', - 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', - 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', - 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', - 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', - 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', - 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', - 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', - 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', - 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', - 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', - 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', - 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', - 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e', - ]; - } - - $str = str_replace( - array_keys($UTF8_LOWER_ACCENTS), - array_values($UTF8_LOWER_ACCENTS), - $str - ); - } - - if ($case >= 0) { - if (is_null($UTF8_UPPER_ACCENTS)) { - $UTF8_UPPER_ACCENTS = [ - 'À' => 'A', 'Ô' => 'O', 'Ď' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Š' => 'S', 'Ơ' => 'O', - 'Ă' => 'A', 'Ř' => 'R', 'Ț' => 'T', 'Ň' => 'N', 'Ā' => 'A', 'Ķ' => 'K', - 'Ŝ' => 'S', 'Ỳ' => 'Y', 'Ņ' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'Ṗ' => 'P', 'Ó' => 'O', - 'Ú' => 'U', 'Ě' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'Ċ' => 'C', 'Õ' => 'O', - 'Ṡ' => 'S', 'Ø' => 'O', 'Ģ' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ė' => 'E', 'Ĉ' => 'C', - 'Ś' => 'S', 'Î' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Ŵ' => 'W', 'Ṫ' => 'T', - 'Ū' => 'U', 'Č' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Ŷ' => 'Y', 'Ą' => 'A', 'Ł' => 'L', - 'Ų' => 'U', 'Ů' => 'U', 'Ş' => 'S', 'Ğ' => 'G', 'Ļ' => 'L', 'Ƒ' => 'F', 'Ž' => 'Z', - 'Ẃ' => 'W', 'Ḃ' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', 'Ḋ' => 'D', 'Ť' => 'T', - 'Ŗ' => 'R', 'Ä' => 'Ae', 'Í' => 'I', 'Ŕ' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ò' => 'O', - 'Ē' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Ĝ' => 'G', 'Đ' => 'D', 'Ĵ' => 'J', - 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O', - 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G', - 'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A', - 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 'Ĕ' => 'E', - ]; - } - $str = str_replace( - array_keys($UTF8_UPPER_ACCENTS), - array_values($UTF8_UPPER_ACCENTS), - $str - ); - } - - return $str; -} diff --git a/src/phputf8/utils/bad.php b/src/phputf8/utils/bad.php deleted file mode 100644 index 143504c2..00000000 --- a/src/phputf8/utils/bad.php +++ /dev/null @@ -1,400 +0,0 @@ - 0) { - return $badList; - } - return false; -} - -//-------------------------------------------------------------------- -/** -* Strips out any bad bytes from a UTF-8 string and returns the rest -* PCRE Pattern to locate bad bytes in a UTF-8 string -* Comes from W3 FAQ: Multilingual Forms -* Note: modified to include full ASCII range including control chars -* @see http://www.w3.org/International/questions/qa-forms-utf-8 -* @param string -* @return string -* @package utf8 -*/ -function utf8_bad_strip($str) -{ - $UTF8_BAD = - '([\x00-\x7F]' . # ASCII (including control chars) - '|[\xC2-\xDF][\x80-\xBF]' . # non-overlong 2-byte - '|\xE0[\xA0-\xBF][\x80-\xBF]' . # excluding overlongs - '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' . # straight 3-byte - '|\xED[\x80-\x9F][\x80-\xBF]' . # excluding surrogates - '|\xF0[\x90-\xBF][\x80-\xBF]{2}' . # planes 1-3 - '|[\xF1-\xF3][\x80-\xBF]{3}' . # planes 4-15 - '|\xF4[\x80-\x8F][\x80-\xBF]{2}' . # plane 16 - '|(.{1}))'; # invalid byte - ob_start(); - while (preg_match('/' . $UTF8_BAD . '/S', $str, $matches)) { - if (!isset($matches[2])) { - echo $matches[0]; - } - $str = substr($str, strlen($matches[0])); - } - $result = ob_get_contents(); - ob_end_clean(); - return $result; -} - -//-------------------------------------------------------------------- -/** -* Replace bad bytes with an alternative character - ASCII character -* recommended is replacement char -* PCRE Pattern to locate bad bytes in a UTF-8 string -* Comes from W3 FAQ: Multilingual Forms -* Note: modified to include full ASCII range including control chars -* @see http://www.w3.org/International/questions/qa-forms-utf-8 -* @param string to search -* @param string to replace bad bytes with (defaults to '?') - use ASCII -* @return string -* @package utf8 -*/ -function utf8_bad_replace($str, $replace = '?') -{ - $UTF8_BAD = - '([\x00-\x7F]' . # ASCII (including control chars) - '|[\xC2-\xDF][\x80-\xBF]' . # non-overlong 2-byte - '|\xE0[\xA0-\xBF][\x80-\xBF]' . # excluding overlongs - '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' . # straight 3-byte - '|\xED[\x80-\x9F][\x80-\xBF]' . # excluding surrogates - '|\xF0[\x90-\xBF][\x80-\xBF]{2}' . # planes 1-3 - '|[\xF1-\xF3][\x80-\xBF]{3}' . # planes 4-15 - '|\xF4[\x80-\x8F][\x80-\xBF]{2}' . # plane 16 - '|(.{1}))'; # invalid byte - ob_start(); - while (preg_match('/' . $UTF8_BAD . '/S', $str, $matches)) { - if (!isset($matches[2])) { - echo $matches[0]; - } else { - echo $replace; - } - $str = substr($str, strlen($matches[0])); - } - $result = ob_get_contents(); - ob_end_clean(); - return $result; -} - -//-------------------------------------------------------------------- -/** -* Return code from utf8_bad_identify() when a five octet sequence is detected. -* Note: 5 octets sequences are valid UTF-8 but are not supported by Unicode so -* do not represent a useful character -* @see utf8_bad_identify -* @package utf8 -*/ -define('UTF8_BAD_5OCTET', 1); - -/** -* Return code from utf8_bad_identify() when a six octet sequence is detected. -* Note: 6 octets sequences are valid UTF-8 but are not supported by Unicode so -* do not represent a useful character -* @see utf8_bad_identify -* @package utf8 -*/ -define('UTF8_BAD_6OCTET', 2); - -/** -* Return code from utf8_bad_identify(). -* Invalid octet for use as start of multi-byte UTF-8 sequence -* @see utf8_bad_identify -* @package utf8 -*/ -define('UTF8_BAD_SEQID', 3); - -/** -* Return code from utf8_bad_identify(). -* From Unicode 3.1, non-shortest form is illegal -* @see utf8_bad_identify -* @package utf8 -*/ -define('UTF8_BAD_NONSHORT', 4); - -/** -* Return code from utf8_bad_identify(). -* From Unicode 3.2, surrogate characters are illegal -* @see utf8_bad_identify -* @package utf8 -*/ -define('UTF8_BAD_SURROGATE', 5); - -/** -* Return code from utf8_bad_identify(). -* Codepoints outside the Unicode range are illegal -* @see utf8_bad_identify -* @package utf8 -*/ -define('UTF8_BAD_UNIOUTRANGE', 6); - -/** -* Return code from utf8_bad_identify(). -* Incomplete multi-octet sequence -* Note: this is kind of a "catch-all" -* @see utf8_bad_identify -* @package utf8 -*/ -define('UTF8_BAD_SEQINCOMPLETE', 7); - -//-------------------------------------------------------------------- -/** -* Reports on the type of bad byte found in a UTF-8 string. Returns a -* status code on the first bad byte found -* -* Joomla modification - As of PHP 7.4, curly brace access has been deprecated. As a result this function has been -* modified to use square brace syntax -* See https://github.com/php/php-src/commit/d574df63dc375f5fc9202ce5afde23f866b6450a -* for additional references -* -* @author -* @param string UTF-8 encoded string -* @return mixed integer constant describing problem or FALSE if valid UTF-8 -* @see utf8_bad_explain -* @see http://hsivonen.iki.fi/php-utf8/ -* @package utf8 -*/ -function utf8_bad_identify($str, &$i) -{ - $mState = 0; // cached expected number of octets after the current octet - // until the beginning of the next UTF8 character sequence - $mUcs4 = 0; // cached Unicode character - $mBytes = 1; // cached expected number of octets in the current sequence - - $len = strlen($str); - - for ($i = 0; $i < $len; $i++) { - $in = ord($str[$i]); - - if ($mState == 0) { - // When mState is zero we expect either a US-ASCII character or a - // multi-octet sequence. - if (0 == (0x80 & ($in))) { - // US-ASCII, pass straight through. - $mBytes = 1; - } elseif (0xC0 == (0xE0 & ($in))) { - // First octet of 2 octet sequence - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x1F) << 6; - $mState = 1; - $mBytes = 2; - } elseif (0xE0 == (0xF0 & ($in))) { - // First octet of 3 octet sequence - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x0F) << 12; - $mState = 2; - $mBytes = 3; - } elseif (0xF0 == (0xF8 & ($in))) { - // First octet of 4 octet sequence - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x07) << 18; - $mState = 3; - $mBytes = 4; - } elseif (0xF8 == (0xFC & ($in))) { - /* First octet of 5 octet sequence. - * - * This is illegal because the encoded codepoint must be either - * (a) not the shortest form or - * (b) outside the Unicode range of 0-0x10FFFF. - */ - - return UTF8_BAD_5OCTET; - } elseif (0xFC == (0xFE & ($in))) { - // First octet of 6 octet sequence, see comments for 5 octet sequence. - return UTF8_BAD_6OCTET; - } else { - // Current octet is neither in the US-ASCII range nor a legal first - // octet of a multi-octet sequence. - return UTF8_BAD_SEQID; - } - } else { - // When mState is non-zero, we expect a continuation of the multi-octet - // sequence - if (0x80 == (0xC0 & ($in))) { - // Legal continuation. - $shift = ($mState - 1) * 6; - $tmp = $in; - $tmp = ($tmp & 0x0000003F) << $shift; - $mUcs4 |= $tmp; - - /** - * End of the multi-octet sequence. mUcs4 now contains the final - * Unicode codepoint to be output - */ - if (0 == --$mState) { - // From Unicode 3.1, non-shortest form is illegal - if ( - ((2 == $mBytes) && ($mUcs4 < 0x0080)) || - ((3 == $mBytes) && ($mUcs4 < 0x0800)) || - ((4 == $mBytes) && ($mUcs4 < 0x10000)) - ) { - return UTF8_BAD_NONSHORT; - - // From Unicode 3.2, surrogate characters are illegal - } elseif (($mUcs4 & 0xFFFFF800) == 0xD800) { - return UTF8_BAD_SURROGATE; - - // Codepoints outside the Unicode range are illegal - } elseif ($mUcs4 > 0x10FFFF) { - return UTF8_BAD_UNIOUTRANGE; - } - - //initialize UTF8 cache - $mState = 0; - $mUcs4 = 0; - $mBytes = 1; - } - } else { - // ((0xC0 & (*in) != 0x80) && (mState != 0)) - // Incomplete multi-octet sequence. - $i--; - return UTF8_BAD_SEQINCOMPLETE; - } - } - } - - if ($mState != 0) { - // Incomplete multi-octet sequence. - $i--; - return UTF8_BAD_SEQINCOMPLETE; - } - - // No bad octets found - $i = null; - return false; -} - -//-------------------------------------------------------------------- -/** -* Takes a return code from utf8_bad_identify() are returns a message -* (in English) explaining what the problem is. -* @param int return code from utf8_bad_identify -* @return mixed string message or FALSE if return code unknown -* @see utf8_bad_identify -* @package utf8 -*/ -function utf8_bad_explain($code) -{ - switch ($code) { - case UTF8_BAD_5OCTET: - return 'Five octet sequences are valid UTF-8 but are not supported by Unicode'; - break; - - case UTF8_BAD_6OCTET: - return 'Six octet sequences are valid UTF-8 but are not supported by Unicode'; - break; - - case UTF8_BAD_SEQID: - return 'Invalid octet for use as start of multi-byte UTF-8 sequence'; - break; - - case UTF8_BAD_NONSHORT: - return 'From Unicode 3.1, non-shortest form is illegal'; - break; - - case UTF8_BAD_SURROGATE: - return 'From Unicode 3.2, surrogate characters are illegal'; - break; - - case UTF8_BAD_UNIOUTRANGE: - return 'Codepoints outside the Unicode range are illegal'; - break; - - case UTF8_BAD_SEQINCOMPLETE: - return 'Incomplete multi-octet sequence'; - break; - } - - trigger_error('Unknown error code: ' . $code, E_USER_WARNING); - return false; -} diff --git a/src/phputf8/utils/patterns.php b/src/phputf8/utils/patterns.php deleted file mode 100644 index 46a5c38e..00000000 --- a/src/phputf8/utils/patterns.php +++ /dev/null @@ -1,65 +0,0 @@ - -* @param string string to locate index in -* @param int (n times) -* @return mixed - int if only one input int, array if more -* @return boolean TRUE if it's all ASCII -* @package utf8 -*/ -function utf8_byte_position() -{ - $args = func_get_args(); - $str =& array_shift($args); - if (!is_string($str)) { - return false; - } - - $result = []; - - // trivial byte index, character offset pair - $prev = [0,0]; - - // use a short piece of str to estimate bytes per character - // $i (& $j) -> byte indexes into $str - $i = utf8_locate_next_chr($str, 300); - - // $c -> character offset into $str - $c = strlen(utf8_decode(substr($str, 0, $i))); - - // deal with arguments from lowest to highest - sort($args); - - foreach ($args as $offset) { - // sanity checks FIXME - - // 0 is an easy check - if ($offset == 0) { - $result[] = 0; - continue; - } - - // ensure no endless looping - $safety_valve = 50; - - do { - if (($c - $prev[1]) == 0) { - // Hack: gone past end of string - $error = 0; - $i = strlen($str); - break; - } - - $j = $i + (int)(($offset - $c) * ($i - $prev[0]) / ($c - $prev[1])); - - // correct to utf8 character boundary - $j = utf8_locate_next_chr($str, $j); - - // save the index, offset for use next iteration - $prev = [$i,$c]; - - if ($j > $i) { - // determine new character offset - $c += strlen(utf8_decode(substr($str, $i, $j - $i))); - } else { - // ditto - $c -= strlen(utf8_decode(substr($str, $j, $i - $j))); - } - - $error = abs($c - $offset); - - // ready for next time around - $i = $j; - - // from 7 it is faster to iterate over the string - } while (($error > 7) && --$safety_valve); - - if ($error && $error <= 7) { - if ($c < $offset) { - // move up - while ($error--) { - $i = utf8_locate_next_chr($str, ++$i); - } - } else { - // move down - while ($error--) { - $i = utf8_locate_current_chr($str, --$i); - } - } - - // ready for next arg - $c = $offset; - } - $result[] = $i; - } - - if (count($result) == 1) { - return $result[0]; - } - - return $result; -} - -//-------------------------------------------------------------------- -/** -* Given a string and any byte index, returns the byte index -* of the start of the current UTF-8 character, relative to supplied -* position. If the current character begins at the same place as the -* supplied byte index, that byte index will be returned. Otherwise -* this function will step backwards, looking for the index where -* current UTF-8 character begins -* @author Chris Smith -* @param string -* @param int byte index in the string -* @return int byte index of start of next UTF-8 character -* @package utf8 -*/ -function utf8_locate_current_chr(&$str, $idx) -{ - if ($idx <= 0) { - return 0; - } - - $limit = strlen($str); - if ($idx >= $limit) { - return $limit; - } - - // Binary value for any byte after the first in a multi-byte UTF-8 character - // will be like 10xxxxxx so & 0xC0 can be used to detect this kind - // of byte - assuming well formed UTF-8 - while ($idx && ((ord($str[$idx]) & 0xC0) == 0x80)) { - $idx--; - } - - return $idx; -} - -//-------------------------------------------------------------------- -/** -* Given a string and any byte index, returns the byte index -* of the start of the next UTF-8 character, relative to supplied -* position. If the next character begins at the same place as the -* supplied byte index, that byte index will be returned. -* @author Chris Smith -* @param string -* @param int byte index in the string -* @return int byte index of start of next UTF-8 character -* @package utf8 -*/ -function utf8_locate_next_chr(&$str, $idx) -{ - if ($idx <= 0) { - return 0; - } - - $limit = strlen($str); - if ($idx >= $limit) { - return $limit; - } - - // Binary value for any byte after the first in a multi-byte UTF-8 character - // will be like 10xxxxxx so & 0xC0 can be used to detect this kind - // of byte - assuming well formed UTF-8 - while (($idx < $limit) && ((ord($str[$idx]) & 0xC0) == 0x80)) { - $idx++; - } - - return $idx; -} diff --git a/src/phputf8/utils/specials.php b/src/phputf8/utils/specials.php deleted file mode 100644 index 0853a6e1..00000000 --- a/src/phputf8/utils/specials.php +++ /dev/null @@ -1,128 +0,0 @@ - -* @param string $string The UTF8 string to strip of special chars -* @param string (optional) $repl Replace special with this string -* @return string with common non-alphanumeric characters removed -* @see utf8_specials_pattern -*/ -function utf8_strip_specials($string, $repl = '') -{ - return preg_replace(utf8_specials_pattern(), $repl, $string); -} diff --git a/src/phputf8/utils/unicode.php b/src/phputf8/utils/unicode.php deleted file mode 100644 index 4dfad27f..00000000 --- a/src/phputf8/utils/unicode.php +++ /dev/null @@ -1,251 +0,0 @@ - 0xFFFF. Occurrances of the BOM are ignored. Surrogates -* are not allowed. -* Returns false if the input string isn't a valid UTF-8 octet sequence -* and raises a PHP error at level E_USER_WARNING -* Note: this function has been modified slightly in this library to -* trigger errors on encountering bad bytes -* -* Joomla modification - As of PHP 7.4, curly brace access has been deprecated. As a result this function has been -* modified to use square brace syntax -* See https://github.com/php/php-src/commit/d574df63dc375f5fc9202ce5afde23f866b6450a -* for additional references -* -* @author -* @param string UTF-8 encoded string -* @return mixed array of unicode code points or FALSE if UTF-8 invalid -* @see utf8_from_unicode -* @see http://hsivonen.iki.fi/php-utf8/ -* @package utf8 -*/ -function utf8_to_unicode($str) -{ - $mState = 0; // cached expected number of octets after the current octet - // until the beginning of the next UTF8 character sequence - $mUcs4 = 0; // cached Unicode character - $mBytes = 1; // cached expected number of octets in the current sequence - - $out = []; - - $len = strlen($str); - - for ($i = 0; $i < $len; $i++) { - $in = ord($str[$i]); - - if ($mState == 0) { - // When mState is zero we expect either a US-ASCII character or a - // multi-octet sequence. - if (0 == (0x80 & ($in))) { - // US-ASCII, pass straight through. - $out[] = $in; - $mBytes = 1; - } elseif (0xC0 == (0xE0 & ($in))) { - // First octet of 2 octet sequence - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x1F) << 6; - $mState = 1; - $mBytes = 2; - } elseif (0xE0 == (0xF0 & ($in))) { - // First octet of 3 octet sequence - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x0F) << 12; - $mState = 2; - $mBytes = 3; - } elseif (0xF0 == (0xF8 & ($in))) { - // First octet of 4 octet sequence - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x07) << 18; - $mState = 3; - $mBytes = 4; - } elseif (0xF8 == (0xFC & ($in))) { - /* First octet of 5 octet sequence. - * - * This is illegal because the encoded codepoint must be either - * (a) not the shortest form or - * (b) outside the Unicode range of 0-0x10FFFF. - * Rather than trying to resynchronize, we will carry on until the end - * of the sequence and let the later error handling code catch it. - */ - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x03) << 24; - $mState = 4; - $mBytes = 5; - } elseif (0xFC == (0xFE & ($in))) { - // First octet of 6 octet sequence, see comments for 5 octet sequence. - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 1) << 30; - $mState = 5; - $mBytes = 6; - } else { - /* Current octet is neither in the US-ASCII range nor a legal first - * octet of a multi-octet sequence. - */ - trigger_error( - 'utf8_to_unicode: Illegal sequence identifier ' . - 'in UTF-8 at byte ' . $i, - E_USER_WARNING - ); - return false; - } - } else { - // When mState is non-zero, we expect a continuation of the multi-octet - // sequence - if (0x80 == (0xC0 & ($in))) { - // Legal continuation. - $shift = ($mState - 1) * 6; - $tmp = $in; - $tmp = ($tmp & 0x0000003F) << $shift; - $mUcs4 |= $tmp; - - /** - * End of the multi-octet sequence. mUcs4 now contains the final - * Unicode codepoint to be output - */ - if (0 == --$mState) { - /* - * Check for illegal sequences and codepoints. - */ - // From Unicode 3.1, non-shortest form is illegal - if ( - ((2 == $mBytes) && ($mUcs4 < 0x0080)) || - ((3 == $mBytes) && ($mUcs4 < 0x0800)) || - ((4 == $mBytes) && ($mUcs4 < 0x10000)) || - (4 < $mBytes) || - // From Unicode 3.2, surrogate characters are illegal - (($mUcs4 & 0xFFFFF800) == 0xD800) || - // Codepoints outside the Unicode range are illegal - ($mUcs4 > 0x10FFFF) - ) { - trigger_error( - 'utf8_to_unicode: Illegal sequence or codepoint ' . - 'in UTF-8 at byte ' . $i, - E_USER_WARNING - ); - - return false; - } - - if (0xFEFF != $mUcs4) { - // BOM is legal but we don't want to output it - $out[] = $mUcs4; - } - - //initialize UTF8 cache - $mState = 0; - $mUcs4 = 0; - $mBytes = 1; - } - } else { - /** - *((0xC0 & (*in) != 0x80) && (mState != 0)) - * Incomplete multi-octet sequence. - */ - trigger_error( - 'utf8_to_unicode: Incomplete multi-octet ' . - ' sequence in UTF-8 at byte ' . $i, - E_USER_WARNING - ); - - return false; - } - } - } - return $out; -} - -//-------------------------------------------------------------------- -/** -* Takes an array of ints representing the Unicode characters and returns -* a UTF-8 string. Astral planes are supported ie. the ints in the -* input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates -* are not allowed. -* Returns false if the input array contains ints that represent -* surrogates or are outside the Unicode range -* and raises a PHP error at level E_USER_WARNING -* Note: this function has been modified slightly in this library to use -* output buffering to concatenate the UTF-8 string (faster) as well as -* reference the array by it's keys -* @param array of unicode code points representing a string -* @return mixed UTF-8 string or FALSE if array contains invalid code points -* @author -* @see utf8_to_unicode -* @see http://hsivonen.iki.fi/php-utf8/ -* @package utf8 -*/ -function utf8_from_unicode($arr) -{ - ob_start(); - - foreach (array_keys($arr) as $k) { - # ASCII range (including control chars) - if (($arr[$k] >= 0) && ($arr[$k] <= 0x007f)) { - echo chr($arr[$k]); - - # 2 byte sequence - } elseif ($arr[$k] <= 0x07ff) { - echo chr(0xc0 | ($arr[$k] >> 6)); - echo chr(0x80 | ($arr[$k] & 0x003f)); - - # Byte order mark (skip) - } elseif ($arr[$k] == 0xFEFF) { - // nop -- zap the BOM - - # Test for illegal surrogates - } elseif ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) { - // found a surrogate - trigger_error( - 'utf8_from_unicode: Illegal surrogate ' . - 'at index: ' . $k . ', value: ' . $arr[$k], - E_USER_WARNING - ); - - return false; - - # 3 byte sequence - } elseif ($arr[$k] <= 0xffff) { - echo chr(0xe0 | ($arr[$k] >> 12)); - echo chr(0x80 | (($arr[$k] >> 6) & 0x003f)); - echo chr(0x80 | ($arr[$k] & 0x003f)); - - # 4 byte sequence - } elseif ($arr[$k] <= 0x10ffff) { - echo chr(0xf0 | ($arr[$k] >> 18)); - echo chr(0x80 | (($arr[$k] >> 12) & 0x3f)); - echo chr(0x80 | (($arr[$k] >> 6) & 0x3f)); - echo chr(0x80 | ($arr[$k] & 0x3f)); - } else { - trigger_error( - 'utf8_from_unicode: Codepoint out of Unicode range ' . - 'at index: ' . $k . ', value: ' . $arr[$k], - E_USER_WARNING - ); - - // out of range - return false; - } - } - - $result = ob_get_contents(); - ob_end_clean(); - return $result; -} diff --git a/src/phputf8/utils/validation.php b/src/phputf8/utils/validation.php deleted file mode 100644 index 74fcdd23..00000000 --- a/src/phputf8/utils/validation.php +++ /dev/null @@ -1,174 +0,0 @@ - -* @param string UTF-8 encoded string -* @return boolean true if valid -* @see http://hsivonen.iki.fi/php-utf8/ -* @see utf8_compliant -* @package utf8 -*/ -function utf8_is_valid($str) -{ - $mState = 0; // cached expected number of octets after the current octet - // until the beginning of the next UTF8 character sequence - $mUcs4 = 0; // cached Unicode character - $mBytes = 1; // cached expected number of octets in the current sequence - - $len = strlen($str); - - for ($i = 0; $i < $len; $i++) { - /* - * Joomla modification - As of PHP 7.4, curly brace access has been deprecated. As a result the line below has - * been modified to use square brace syntax - * See https://github.com/php/php-src/commit/d574df63dc375f5fc9202ce5afde23f866b6450a - * for additional references - */ - $in = ord($str[$i]); - - if ($mState == 0) { - // When mState is zero we expect either a US-ASCII character or a - // multi-octet sequence. - if (0 == (0x80 & ($in))) { - // US-ASCII, pass straight through. - $mBytes = 1; - } elseif (0xC0 == (0xE0 & ($in))) { - // First octet of 2 octet sequence - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x1F) << 6; - $mState = 1; - $mBytes = 2; - } elseif (0xE0 == (0xF0 & ($in))) { - // First octet of 3 octet sequence - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x0F) << 12; - $mState = 2; - $mBytes = 3; - } elseif (0xF0 == (0xF8 & ($in))) { - // First octet of 4 octet sequence - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x07) << 18; - $mState = 3; - $mBytes = 4; - } elseif (0xF8 == (0xFC & ($in))) { - /* First octet of 5 octet sequence. - * - * This is illegal because the encoded codepoint must be either - * (a) not the shortest form or - * (b) outside the Unicode range of 0-0x10FFFF. - * Rather than trying to resynchronize, we will carry on until the end - * of the sequence and let the later error handling code catch it. - */ - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 0x03) << 24; - $mState = 4; - $mBytes = 5; - } elseif (0xFC == (0xFE & ($in))) { - // First octet of 6 octet sequence, see comments for 5 octet sequence. - $mUcs4 = ($in); - $mUcs4 = ($mUcs4 & 1) << 30; - $mState = 5; - $mBytes = 6; - } else { - /* Current octet is neither in the US-ASCII range nor a legal first - * octet of a multi-octet sequence. - */ - return false; - } - } else { - // When mState is non-zero, we expect a continuation of the multi-octet - // sequence - if (0x80 == (0xC0 & ($in))) { - // Legal continuation. - $shift = ($mState - 1) * 6; - $tmp = $in; - $tmp = ($tmp & 0x0000003F) << $shift; - $mUcs4 |= $tmp; - - /** - * End of the multi-octet sequence. mUcs4 now contains the final - * Unicode codepoint to be output - */ - if (0 == --$mState) { - /* - * Check for illegal sequences and codepoints. - */ - // From Unicode 3.1, non-shortest form is illegal - if ( - ((2 == $mBytes) && ($mUcs4 < 0x0080)) || - ((3 == $mBytes) && ($mUcs4 < 0x0800)) || - ((4 == $mBytes) && ($mUcs4 < 0x10000)) || - (4 < $mBytes) || - // From Unicode 3.2, surrogate characters are illegal - (($mUcs4 & 0xFFFFF800) == 0xD800) || - // Codepoints outside the Unicode range are illegal - ($mUcs4 > 0x10FFFF) - ) { - return false; - } - - //initialize UTF8 cache - $mState = 0; - $mUcs4 = 0; - $mBytes = 1; - } - } else { - /** - *((0xC0 & (*in) != 0x80) && (mState != 0)) - * Incomplete multi-octet sequence. - */ - - return false; - } - } - } - return true; -} - -//-------------------------------------------------------------------- -/** -* Tests whether a string complies as UTF-8. This will be much -* faster than utf8_is_valid but will pass five and six octet -* UTF-8 sequences, which are not supported by Unicode and -* so cannot be displayed correctly in a browser. In other words -* it is not as strict as utf8_is_valid but it's faster. If you use -* is to validate user input, you place yourself at the risk that -* attackers will be able to inject 5 and 6 byte sequences (which -* may or may not be a significant risk, depending on what you are -* are doing) -* @see utf8_is_valid -* @see http://www.php.net/manual/en/reference.pcre.pattern.modifiers.php#54805 -* @param string UTF-8 string to check -* @return boolean TRUE if string is valid UTF-8 -* @package utf8 -*/ -function utf8_compliant($str) -{ - if (strlen($str) == 0) { - return true; - } - // If even just the first character can be matched, when the /u - // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow - // invalid, nothing at all will match, even if the string contains - // some valid sequences - return (preg_match('/^.{1}/us', $str, $ar) == 1); -} From 0a933eeb89fed17e1d46c96dd418cd226b59ffce Mon Sep 17 00:00:00 2001 From: Hannes Papenberg Date: Mon, 30 Jun 2025 09:17:49 +0200 Subject: [PATCH 02/18] Switch Github Actions to v4 workflow --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 09649838..5e0e34ee 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,4 +12,4 @@ concurrency: jobs: framework-ci: - uses: joomla-framework/.github/.github/workflows/workflow-v3.yml@main + uses: joomla-framework/.github/.github/workflows/workflow-v4.yml@main From 3499f48fbe23f24e0a4e23089bede18e66809dc9 Mon Sep 17 00:00:00 2001 From: Robert Deutz Date: Sun, 6 Jul 2025 13:09:50 +0200 Subject: [PATCH 03/18] switch to 4.x-dev --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 9794846c..6066559c 100644 --- a/composer.json +++ b/composer.json @@ -12,7 +12,7 @@ }, "require-dev": { "doctrine/inflector": "^1.2", - "joomla/test": "^3.0", + "joomla/test": "dev-4.x-dev", "phpunit/phpunit": "^9.5.28", "squizlabs/php_codesniffer": "^3.7.2", "phpstan/phpstan": "1.12.27", From c7e26cad49fcf554d4bc868aff716b2c24df64d4 Mon Sep 17 00:00:00 2001 From: Brian Teeman Date: Tue, 8 Jul 2025 13:36:19 +0100 Subject: [PATCH 04/18] [4.x] Typo (#58) reference --- src/StringHelper.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/StringHelper.php b/src/StringHelper.php index 27f3caf2..2b8a7d60 100644 --- a/src/StringHelper.php +++ b/src/StringHelper.php @@ -261,7 +261,7 @@ public static function strlen($str) * @param string|string[] $search String to search * @param string|string[] $replace Existing string to replace * @param string $str New string to replace with - * @param integer|null|boolean $count Optional count value to be passed by referene + * @param integer|null|boolean $count Optional count value to be passed by reference * * @return string UTF-8 String * From a55c7dcf3dcd2b351ef61e2f3a92b147bc2c3fe6 Mon Sep 17 00:00:00 2001 From: Hannes Papenberg Date: Fri, 11 Jul 2025 09:50:17 +0200 Subject: [PATCH 05/18] Raising requirements to PHP 8.3, updating dependencies --- composer.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/composer.json b/composer.json index 6066559c..d4f3dd18 100644 --- a/composer.json +++ b/composer.json @@ -6,17 +6,17 @@ "homepage": "https://github.com/joomla-framework/string", "license": "GPL-2.0-or-later", "require": { - "php": "^8.1.0", + "php": "^8.3.0", "symfony/deprecation-contracts": "^2|^3", "symfony/polyfill-mbstring": "^1.31.0" }, "require-dev": { "doctrine/inflector": "^1.2", "joomla/test": "dev-4.x-dev", - "phpunit/phpunit": "^9.5.28", + "phpunit/phpunit": "^12.2.6", "squizlabs/php_codesniffer": "^3.7.2", - "phpstan/phpstan": "1.12.27", - "phpstan/phpstan-deprecation-rules": "1.2.1" + "phpstan/phpstan": "2.1.17", + "phpstan/phpstan-deprecation-rules": "2.0.3" }, "conflict": { "doctrine/inflector": "<1.2" From b8ca9b7a7fa5819d02bcbb313ed8f420cb567691 Mon Sep 17 00:00:00 2001 From: Hannes Papenberg Date: Fri, 11 Jul 2025 09:51:10 +0200 Subject: [PATCH 06/18] Removing deprecated methods from Inflector class --- src/Inflector.php | 193 +--------------------------------------------- 1 file changed, 2 insertions(+), 191 deletions(-) diff --git a/src/Inflector.php b/src/Inflector.php index 1a54ad41..f7e64b1d 100644 --- a/src/Inflector.php +++ b/src/Inflector.php @@ -20,15 +20,6 @@ */ class Inflector extends DoctrineInflector { - /** - * The singleton instance. - * - * @var Inflector - * @since 1.0 - * @deprecated 3.0 - */ - private static $instance; - /** * The inflector rules for countability. * @@ -90,140 +81,6 @@ public function addCountableRule($data) return $this; } - /** - * Adds a specific singular-plural pair for a word. - * - * @param string $singular The singular form of the word. - * @param string $plural The plural form of the word. If omitted, it is assumed the singular and plural are identical. - * - * @return $this - * - * @since 1.0 - * @deprecated 3.0 Use Doctrine\Common\Inflector\Inflector::rules() instead. - */ - public function addWord($singular, $plural = '') - { - trigger_deprecation( - 'joomla/string', - '2.0.0', - '%s() is deprecated and will be removed in 3.0, use %s::rules() instead.', - __METHOD__, - DoctrineInflector::class - ); - - if ($plural !== '') { - static::rules( - 'plural', - [ - 'irregular' => [$plural => $singular], - ] - ); - - static::rules( - 'singular', - [ - 'irregular' => [$singular => $plural], - ] - ); - } else { - static::rules( - 'plural', - [ - 'uninflected' => [$singular], - ] - ); - - static::rules( - 'singular', - [ - 'uninflected' => [$singular], - ] - ); - } - - return $this; - } - - /** - * Adds a pluralisation rule. - * - * @param mixed $data A string or an array of regex rules to add. - * - * @return $this - * - * @since 1.0 - * @deprecated 3.0 Use Doctrine\Common\Inflector\Inflector::rules() instead. - */ - public function addPluraliseRule($data) - { - trigger_deprecation( - 'joomla/string', - '2.0.0', - '%s() is deprecated and will be removed in 3.0, use %s::rules() instead.', - __METHOD__, - DoctrineInflector::class - ); - - $this->addRule($data, 'plural'); - - return $this; - } - - /** - * Adds a singularisation rule. - * - * @param mixed $data A string or an array of regex rules to add. - * - * @return $this - * - * @since 1.0 - * @deprecated 3.0 Use Doctrine\Common\Inflector\Inflector::rules() instead. - */ - public function addSingulariseRule($data) - { - trigger_deprecation( - 'joomla/string', - '2.0.0', - '%s() is deprecated and will be removed in 3.0, use %s::rules() instead.', - __METHOD__, - DoctrineInflector::class - ); - - $this->addRule($data, 'singular'); - - return $this; - } - - /** - * Gets an instance of the Inflector singleton. - * - * @param boolean $new If true (default is false), returns a new instance regardless if one exists. This argument is mainly used for testing. - * - * @return static - * - * @since 1.0 - * @deprecated 3.0 Use static methods without a class instance instead. - */ - public static function getInstance($new = false) - { - trigger_deprecation( - 'joomla/string', - '2.0.0', - '%s() is deprecated and will be removed in 3.0.', - __METHOD__ - ); - - if ($new) { - return new static(); - } - - if (!\is_object(self::$instance)) { - self::$instance = new static(); - } - - return self::$instance; - } - /** * Checks if a word is countable. * @@ -249,7 +106,7 @@ public function isCountable($word) */ public function isPlural($word) { - return $this->toPlural($this->toSingular($word)) === $word; + return static::pluralize(static::singularize($word)) === $word; } /** @@ -263,52 +120,6 @@ public function isPlural($word) */ public function isSingular($word) { - return $this->toSingular($word) === $word; - } - - /** - * Converts a word into its plural form. - * - * @param string $word The singular word to pluralise. - * - * @return string The word in plural form. - * - * @since 1.0 - * @deprecated 3.0 Use Doctrine\Common\Inflector\Inflector::pluralize() instead. - */ - public function toPlural($word) - { - trigger_deprecation( - 'joomla/string', - '2.0.0', - '%s() is deprecated and will be removed in 3.0, use %s::pluralize() instead.', - __METHOD__, - DoctrineInflector::class - ); - - return static::pluralize($word); - } - - /** - * Converts a word into its singular form. - * - * @param string $word The plural word to singularise. - * - * @return string The word in singular form. - * - * @since 1.0 - * @deprecated 3.0 Use Doctrine\Common\Inflector\Inflector::singularize() instead. - */ - public function toSingular($word) - { - trigger_deprecation( - 'joomla/string', - '2.0.0', - '%s() is deprecated and will be removed in 3.0, use %s::singularize() instead.', - __METHOD__, - DoctrineInflector::class - ); - - return static::singularize($word); + return static::singularize($word) === $word; } } From 8d9bdb74258ff8dd077082a1c4410935d669c5c2 Mon Sep 17 00:00:00 2001 From: Hannes Papenberg Date: Fri, 11 Jul 2025 09:51:35 +0200 Subject: [PATCH 07/18] Updating unittests to phpunit 12 --- Tests/InflectorTest.php | 260 +++---------------- Tests/NormaliseTest.php | 189 +++++++------- Tests/StringHelperTest.php | 519 +++++++++++++++++++------------------ phpunit.xml.dist | 8 - 4 files changed, 410 insertions(+), 566 deletions(-) diff --git a/Tests/InflectorTest.php b/Tests/InflectorTest.php index 671e8353..66503fea 100644 --- a/Tests/InflectorTest.php +++ b/Tests/InflectorTest.php @@ -10,6 +10,7 @@ use Doctrine\Common\Inflector\Inflector as DoctrineInflector; use Joomla\String\Inflector; use Joomla\Test\TestHelper; +use PHPUnit\Framework\Attributes\DataProvider; use PHPUnit\Framework\TestCase; /** @@ -28,51 +29,55 @@ class InflectorTest extends TestCase /** * Method to seed data to testIsCountable. * - * @return \Generator + * @return array */ - public function seedIsCountable(): \Generator + public static function seedIsCountable(): array { - yield ['id', true]; - yield ['title', false]; + return [ + ['id', true], + ['title', false], + ]; } /** * Method to seed data to testToPlural. * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedSinglePlural(): \Generator + public static function seedSinglePlural(): array { - // Regular plurals - yield ['bus', 'buses']; - yield ['notify', 'notifies']; - yield ['click', 'clicks']; + return [ + // Regular plurals + ['bus', 'buses'], + ['notify', 'notifies'], + ['click', 'clicks'], - // Almost regular plurals. - yield ['photo', 'photos']; - yield ['zero', 'zeros']; + // Almost regular plurals. + ['photo', 'photos'], + ['zero', 'zeros'], - // Irregular identicals - yield ['salmon', 'salmon']; + // Irregular identicals + ['salmon', 'salmon'], - // Irregular plurals - yield ['ox', 'oxen']; - yield ['quiz', 'quizzes']; - yield ['status', 'statuses']; - yield ['matrix', 'matrices']; - yield ['index', 'indices']; - yield ['vertex', 'vertices']; - yield ['hive', 'hives']; + // Irregular plurals + ['ox', 'oxen'], + ['quiz', 'quizzes'], + ['status', 'statuses'], + ['matrix', 'matrices'], + ['index', 'indices'], + ['vertex', 'vertices'], + ['hive', 'hives'], - // Ablaut plurals - yield ['foot', 'feet']; - yield ['louse', 'lice']; - yield ['man', 'men']; - yield ['mouse', 'mice']; - yield ['tooth', 'teeth']; - yield ['woman', 'women']; + // Ablaut plurals + ['foot', 'feet'], + ['louse', 'lice'], + ['man', 'men'], + ['mouse', 'mice'], + ['tooth', 'teeth'], + ['woman', 'women'], + ]; } /** @@ -86,7 +91,7 @@ protected function setUp(): void { parent::setUp(); - $this->inflector = Inflector::getInstance(true); + $this->inflector = new Inflector(); DoctrineInflector::reset(); } @@ -142,136 +147,13 @@ public function testAddCountableRule() ); } - /** - * @testdox A word can be added to the inflector without a plural form - */ - public function testAddWordWithoutPlural() - { - if (!$this->checkInflectorImplementation($this->inflector)) { - $this->markTestSkipped('This test depends on the library\'s implementation'); - } - - $this->assertSame( - $this->inflector, - $this->inflector->addWord('foo') - ); - - $plural = TestHelper::getValue(DoctrineInflector::class, 'plural'); - - $this->assertTrue( - in_array('foo', $plural['uninflected']) - ); - - $singular = TestHelper::getValue(DoctrineInflector::class, 'singular'); - - $this->assertTrue( - in_array('foo', $singular['uninflected']) - ); - } - - /** - * @testdox A word can be added to the inflector with a plural form - */ - public function testAddWordWithPlural() - { - if (!$this->checkInflectorImplementation($this->inflector)) { - $this->markTestSkipped('This test depends on the library\'s implementation'); - } - - $this->assertEquals( - $this->inflector, - $this->inflector->addWord('bar', 'foo') - ); - - $plural = TestHelper::getValue(DoctrineInflector::class, 'plural'); - - $this->assertArrayHasKey( - 'foo', - $plural['irregular'] - ); - - $singular = TestHelper::getValue(DoctrineInflector::class, 'singular'); - - $this->assertArrayHasKey( - 'bar', - $singular['irregular'] - ); - } - - /** - * @testdox A pluralisation rule can be added to the inflector - */ - public function testAddPluraliseRule() - { - if (!$this->checkInflectorImplementation($this->inflector)) { - $this->markTestSkipped('This test depends on the library\'s implementation'); - } - - $this->assertSame( - $this->inflector->addPluraliseRule(['/^(custom)$/i' => '\1izables']), - $this->inflector, - 'Checks chaining.' - ); - - $plural = TestHelper::getValue(DoctrineInflector::class, 'plural'); - - $this->assertArrayHasKey( - '/^(custom)$/i', - $plural['rules'], - 'Checks a pluralisation rule was added.' - ); - } - - /** - * @testdox A singularisation rule can be added to the inflector - */ - public function testAddSingulariseRule() - { - if (!$this->checkInflectorImplementation($this->inflector)) { - $this->markTestSkipped('This test depends on the library\'s implementation'); - } - - $this->assertSame( - $this->inflector->addSingulariseRule(['/^(inflec|contribu)tors$/i' => '\1ta']), - $this->inflector, - 'Checks chaining.' - ); - - $singular = TestHelper::getValue(DoctrineInflector::class, 'singular'); - - $this->assertArrayHasKey( - '/^(inflec|contribu)tors$/i', - $singular['rules'], - 'Checks a singularisation rule was added.' - ); - } - - /** - * @testdox The singleton instance of the inflector can be retrieved - */ - public function testGetInstance() - { - $this->assertInstanceOf( - Inflector::class, - Inflector::getInstance(), - 'Check getInstance returns the right class.' - ); - - $this->assertNotSame( - Inflector::getInstance(), - Inflector::getInstance(true), - 'getInstance with the new flag should not return the singleton instance' - ); - } - /** * @testdox A string is checked to determine if it a countable word * * @param string $input A string. * @param boolean $expected The expected result of the function call. - * - * @dataProvider seedIsCountable */ + #[DataProvider('seedIsCountable')] public function testIsCountable(string $input, bool $expected) { $this->assertEquals( @@ -285,9 +167,8 @@ public function testIsCountable(string $input, bool $expected) * * @param string $singular The singular form of a word. * @param string $plural The plural form of a word. - * - * @dataProvider seedSinglePlural */ + #[Dataprovider('seedSinglePlural')] public function testIsPlural(string $singular, string $plural) { if ($singular === 'bus' && !$this->checkInflectorImplementation($this->inflector)) { @@ -312,9 +193,8 @@ public function testIsPlural(string $singular, string $plural) * * @param string $singular The singular form of a word. * @param string $plural The plural form of a word. - * - * @dataProvider seedSinglePlural */ + #[Dataprovider('seedSinglePlural')] public function testIsSingular(string $singular, string $plural) { if ($singular === 'bus' && !$this->checkInflectorImplementation($this->inflector)) { @@ -334,68 +214,6 @@ public function testIsSingular(string $singular, string $plural) } } - /** - * @testdox A string is converted to its plural form - * - * @param string $singular The singular form of a word. - * @param string $plural The plural form of a word. - * - * @dataProvider seedSinglePlural - */ - public function testToPlural(string $singular, string $plural) - { - $this->assertSame( - $plural, - $this->inflector->toPlural($singular), - "'$plural' should be the plural form of '$singular'" - ); - } - - /** - * @testdox A string that is already plural is returned in the same form - */ - public function testToPluralAlreadyPlural() - { - $this->assertSame( - 'buses', - $this->inflector->toPlural('buses'), - "'buses' should not be pluralised'" - ); - } - - /** - * @testdox A string is converted to its singular form - * - * @param string $singular The singular form of a word. - * @param string $plural The plural form of a word. - * - * @dataProvider seedSinglePlural - */ - public function testToSingular(string $singular, string $plural) - { - $this->assertSame( - $singular, - $this->inflector->toSingular($plural), - "'$singular' should be the singular form of '$plural'" - ); - } - - /** - * @testdox A string that is already singular is returned in the same form - */ - public function testToSingularAlreadySingular() - { - if (!$this->checkInflectorImplementation($this->inflector)) { - $this->markTestSkipped('"bus/buses" is not known to the new implementation'); - } - - $this->assertSame( - 'bus', - $this->inflector->toSingular('bus'), - "'bus' should not be singularised'" - ); - } - private function checkInflectorImplementation(DoctrineInflector $inflector): bool { $reflectionClass = new \ReflectionClass($inflector); diff --git a/Tests/NormaliseTest.php b/Tests/NormaliseTest.php index aae8a0c9..719d6b7f 100644 --- a/Tests/NormaliseTest.php +++ b/Tests/NormaliseTest.php @@ -8,6 +8,7 @@ namespace Joomla\String\Tests; use Joomla\String\Normalise; +use PHPUnit\Framework\Attributes\DataProvider; use PHPUnit\Framework\TestCase; /** @@ -20,147 +21,163 @@ class NormaliseTest extends TestCase /** * Method to seed data to testFromCamelCase. * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestFromCamelCase(): \Generator + public static function seedTestFromCamelCase(): array { - // Note: string, expected - yield ['FooBarABCDef', ['Foo', 'Bar', 'ABC', 'Def']]; - yield ['JFooBar', ['J', 'Foo', 'Bar']]; - yield ['J001FooBar002', ['J001', 'Foo', 'Bar002']]; - yield ['abcDef', ['abc', 'Def']]; - yield ['abc_defGhi_Jkl', ['abc_def', 'Ghi_Jkl']]; - yield ['ThisIsA_NASAAstronaut', ['This', 'Is', 'A_NASA', 'Astronaut']]; - yield ['JohnFitzgerald_Kennedy', ['John', 'Fitzgerald_Kennedy']]; + return [ + // Note: string, expected + ['FooBarABCDef', ['Foo', 'Bar', 'ABC', 'Def']], + ['JFooBar', ['J', 'Foo', 'Bar']], + ['J001FooBar002', ['J001', 'Foo', 'Bar002']], + ['abcDef', ['abc', 'Def']], + ['abc_defGhi_Jkl', ['abc_def', 'Ghi_Jkl']], + ['ThisIsA_NASAAstronaut', ['This', 'Is', 'A_NASA', 'Astronaut']], + ['JohnFitzgerald_Kennedy', ['John', 'Fitzgerald_Kennedy']], + ]; } /** * Method to seed data to testFromCamelCase. * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestFromCamelCase_nongrouped(): \Generator + public static function seedTestFromCamelCase_nongrouped(): array { - yield ['Foo Bar', 'FooBar']; - yield ['foo Bar', 'fooBar']; - yield ['Foobar', 'Foobar']; - yield ['foobar', 'foobar']; + return [ + ['Foo Bar', 'FooBar'], + ['foo Bar', 'fooBar'], + ['Foobar', 'Foobar'], + ['foobar', 'foobar'], + ]; } /** * Method to seed data to testToCamelCase. * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestToCamelCase(): \Generator + public static function seedTestToCamelCase(): array { - yield ['FooBar', 'Foo Bar']; - yield ['FooBar', 'Foo-Bar']; - yield ['FooBar', 'Foo_Bar']; - yield ['FooBar', 'foo bar']; - yield ['FooBar', 'foo-bar']; - yield ['FooBar', 'foo_bar']; + return [ + ['FooBar', 'Foo Bar'], + ['FooBar', 'Foo-Bar'], + ['FooBar', 'Foo_Bar'], + ['FooBar', 'foo bar'], + ['FooBar', 'foo-bar'], + ['FooBar', 'foo_bar'], + ]; } /** * Method to seed data to testToDashSeparated. * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestToDashSeparated(): \Generator + public static function seedTestToDashSeparated(): array { - yield ['Foo-Bar', 'Foo Bar']; - yield ['Foo-Bar', 'Foo-Bar']; - yield ['Foo-Bar', 'Foo_Bar']; - yield ['foo-bar', 'foo bar']; - yield ['foo-bar', 'foo-bar']; - yield ['foo-bar', 'foo_bar']; - yield ['foo-bar', 'foo bar']; - yield ['foo-bar', 'foo---bar']; - yield ['foo-bar', 'foo___bar']; + return [ + ['Foo-Bar', 'Foo Bar'], + ['Foo-Bar', 'Foo-Bar'], + ['Foo-Bar', 'Foo_Bar'], + ['foo-bar', 'foo bar'], + ['foo-bar', 'foo-bar'], + ['foo-bar', 'foo_bar'], + ['foo-bar', 'foo bar'], + ['foo-bar', 'foo---bar'], + ['foo-bar', 'foo___bar'], + ]; } /** * Method to seed data to testToSpaceSeparated. * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestToSpaceSeparated(): \Generator + public static function seedTestToSpaceSeparated(): array { - yield ['Foo Bar', 'Foo Bar']; - yield ['Foo Bar', 'Foo-Bar']; - yield ['Foo Bar', 'Foo_Bar']; - yield ['foo bar', 'foo bar']; - yield ['foo bar', 'foo-bar']; - yield ['foo bar', 'foo_bar']; - yield ['foo bar', 'foo bar']; - yield ['foo bar', 'foo---bar']; - yield ['foo bar', 'foo___bar']; + return [ + ['Foo Bar', 'Foo Bar'], + ['Foo Bar', 'Foo-Bar'], + ['Foo Bar', 'Foo_Bar'], + ['foo bar', 'foo bar'], + ['foo bar', 'foo-bar'], + ['foo bar', 'foo_bar'], + ['foo bar', 'foo bar'], + ['foo bar', 'foo---bar'], + ['foo bar', 'foo___bar'], + ]; } /** * Method to seed data to testToUnderscoreSeparated. * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestToUnderscoreSeparated(): \Generator + public static function seedTestToUnderscoreSeparated(): array { - yield ['Foo_Bar', 'Foo Bar']; - yield ['Foo_Bar', 'Foo-Bar']; - yield ['Foo_Bar', 'Foo_Bar']; - yield ['foo_bar', 'foo bar']; - yield ['foo_bar', 'foo-bar']; - yield ['foo_bar', 'foo_bar']; - yield ['foo_bar', 'foo bar']; - yield ['foo_bar', 'foo---bar']; - yield ['foo_bar', 'foo___bar']; + return [ + ['Foo_Bar', 'Foo Bar'], + ['Foo_Bar', 'Foo-Bar'], + ['Foo_Bar', 'Foo_Bar'], + ['foo_bar', 'foo bar'], + ['foo_bar', 'foo-bar'], + ['foo_bar', 'foo_bar'], + ['foo_bar', 'foo bar'], + ['foo_bar', 'foo---bar'], + ['foo_bar', 'foo___bar'], + ]; } /** * Method to seed data to testToVariable. * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestToVariable(): \Generator + public static function seedTestToVariable(): array { - yield ['myFooBar', 'My Foo Bar']; - yield ['myFooBar', 'My Foo-Bar']; - yield ['myFooBar', 'My Foo_Bar']; - yield ['myFooBar', 'my foo bar']; - yield ['myFooBar', 'my foo-bar']; - yield ['myFooBar', 'my foo_bar']; - yield ['abc3def4', '1abc3def4']; + return [ + ['myFooBar', 'My Foo Bar'], + ['myFooBar', 'My Foo-Bar'], + ['myFooBar', 'My Foo_Bar'], + ['myFooBar', 'my foo bar'], + ['myFooBar', 'my foo-bar'], + ['myFooBar', 'my foo_bar'], + ['abc3def4', '1abc3def4'], + ]; } /** * Method to seed data to testToKey. * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestToKey(): \Generator + public static function seedTestToKey(): array { - yield ['foo_bar', 'Foo Bar']; - yield ['foo_bar', 'Foo-Bar']; - yield ['foo_bar', 'Foo_Bar']; - yield ['foo_bar', 'foo bar']; - yield ['foo_bar', 'foo-bar']; - yield ['foo_bar', 'foo_bar']; + return [ + ['foo_bar', 'Foo Bar'], + ['foo_bar', 'Foo-Bar'], + ['foo_bar', 'Foo_Bar'], + ['foo_bar', 'foo bar'], + ['foo_bar', 'foo-bar'], + ['foo_bar', 'foo_bar'], + ]; } /** @@ -168,9 +185,8 @@ public function seedTestToKey(): \Generator * * @param string $expected The expected value from the method. * @param string $input The input value for the method. - * - * @dataProvider seedTestFromCamelCase_nongrouped */ + #[DataProvider('seedTestFromCamelCase_nongrouped')] public function testFromCamelCase_nongrouped(string $expected, string $input) { $this->assertEquals($expected, Normalise::fromCamelcase($input)); @@ -181,9 +197,8 @@ public function testFromCamelCase_nongrouped(string $expected, string $input) * * @param string $input The input value for the method. * @param array|string $expected The expected value from the method. - * - * @dataProvider seedTestFromCamelCase */ + #[DataProvider('seedTestFromCamelCase')] public function testFromCamelCase_grouped(string $input, $expected) { $this->assertEquals($expected, Normalise::fromCamelcase($input, true)); @@ -194,9 +209,8 @@ public function testFromCamelCase_grouped(string $input, $expected) * * @param string $expected The expected value from the method. * @param string $input The input value for the method. - * - * @dataProvider seedTestToCamelCase */ + #[DataProvider('seedTestToCamelCase')] public function testToCamelCase(string $expected, string $input) { $this->assertEquals($expected, Normalise::toCamelcase($input)); @@ -207,9 +221,8 @@ public function testToCamelCase(string $expected, string $input) * * @param string $expected The expected value from the method. * @param string $input The input value for the method. - * - * @dataProvider seedTestToDashSeparated */ + #[DataProvider('seedTestToDashSeparated')] public function testToDashSeparated(string $expected, string $input) { $this->assertEquals($expected, Normalise::toDashSeparated($input)); @@ -220,9 +233,8 @@ public function testToDashSeparated(string $expected, string $input) * * @param string $expected The expected value from the method. * @param string $input The input value for the method. - * - * @dataProvider seedTestToSpaceSeparated */ + #[DataProvider('seedTestToSpaceSeparated')] public function testToSpaceSeparated(string $expected, string $input) { $this->assertEquals($expected, Normalise::toSpaceSeparated($input)); @@ -233,9 +245,8 @@ public function testToSpaceSeparated(string $expected, string $input) * * @param string $expected The expected value from the method. * @param string $input The input value for the method. - * - * @dataProvider seedTestToUnderscoreSeparated */ + #[DataProvider('seedTestToUnderscoreSeparated')] public function testToUnderscoreSeparated(string $expected, string $input) { $this->assertEquals($expected, Normalise::toUnderscoreSeparated($input)); @@ -246,9 +257,8 @@ public function testToUnderscoreSeparated(string $expected, string $input) * * @param string $expected The expected value from the method. * @param string $input The input value for the method. - * - * @dataProvider seedTestToVariable */ + #[DataProvider('seedTestToVariable')] public function testToVariable(string $expected, string $input) { $this->assertEquals($expected, Normalise::toVariable($input)); @@ -259,9 +269,8 @@ public function testToVariable(string $expected, string $input) * * @param string $expected The expected value from the method. * @param string $input The input value for the method. - * - * @dataProvider seedTestToKey */ + #[DataProvider('seedTestToKey')] public function testToKey(string $expected, string $input) { $this->assertEquals($expected, Normalise::toKey($input)); diff --git a/Tests/StringHelperTest.php b/Tests/StringHelperTest.php index e1fbcc44..8d6fd578 100644 --- a/Tests/StringHelperTest.php +++ b/Tests/StringHelperTest.php @@ -8,6 +8,7 @@ namespace Joomla\String\Tests; use Joomla\String\StringHelper; +use PHPUnit\Framework\Attributes\DataProvider; use PHPUnit\Framework\TestCase; /** @@ -18,406 +19,458 @@ class StringHelperTest extends TestCase /** * Data provider for testIncrement * - * @return \Generator + * @return array */ - public function seedTestIncrement(): \Generator + public static function seedTestIncrement(): array { - // Note: string, style, number, expected - yield 'First default increment' => ['title', null, 0, 'title (2)']; - yield 'Second default increment' => ['title(2)', null, 0, 'title(3)']; - yield 'First dash increment' => ['title', 'dash', 0, 'title-2']; - yield 'Second dash increment' => ['title-2', 'dash', 0, 'title-3']; - yield 'Set default increment' => ['title', null, 4, 'title (4)']; - yield 'Unknown style fallback to default' => ['title', 'foo', 0, 'title (2)']; + return [ + // Note: string, style, number, expected + 'First default increment' => ['title', null, 0, 'title (2)'], + 'Second default increment' => ['title(2)', null, 0, 'title(3)'], + 'First dash increment' => ['title', 'dash', 0, 'title-2'], + 'Second dash increment' => ['title-2', 'dash', 0, 'title-3'], + 'Set default increment' => ['title', null, 4, 'title (4)'], + 'Unknown style fallback to default' => ['title', 'foo', 0, 'title (2)'], + ]; } /** * Data provider for testIs_ascii * - * @return \Generator + * @return array */ - public function seedTestIs_ascii(): \Generator + public static function seedTestIs_ascii(): array { - yield ['ascii', true]; - yield ['1024', true]; - yield ['#$#@$%', true]; - yield ['áÑ', false]; - yield ['ÿ©', false]; - yield ['¡¾', false]; - yield ['÷™', false]; + return [ + ['ascii', true], + ['1024', true], + ['#$#@$%', true], + ['áÑ', false], + ['ÿ©', false], + ['¡¾', false], + ['÷™', false], + ]; } /** * Data provider for testStrpos * - * @return \Generator + * @return array */ - public function seedTestStrpos(): \Generator + public static function seedTestStrpos(): array { - yield [3, 'missing', 'sing', 0]; - yield [false, 'missing', 'sting', 0]; - yield [4, 'missing', 'ing', 0]; - yield [10, ' объектов на карте с', 'на карте', 0]; - yield [0, 'на карте с', 'на карте', 0, 0]; - yield [false, 'на карте с', 'на каррте', 0]; - yield [false, 'на карте с', 'на карте', 2]; - yield [3, 'missing', 'sing', false]; + return [ + [3, 'missing', 'sing', 0], + [false, 'missing', 'sting', 0], + [4, 'missing', 'ing', 0], + [10, ' объектов на карте с', 'на карте', 0], + [0, 'на карте с', 'на карте', 0], + [false, 'на карте с', 'на каррте', 0], + [false, 'на карте с', 'на карте', 2], + [3, 'missing', 'sing', false], + ]; } /** * Data provider for testStrrpos * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestStrrpos(): \Generator + public static function seedTestStrrpos(): array { - yield [3, 'missing', 'sing', 0]; - yield [false, 'missing', 'sting', 0]; - yield [4, 'missing', 'ing', 0]; - yield [10, ' объектов на карте с', 'на карте', 0]; - yield [0, 'на карте с', 'на карте', 0]; - yield [false, 'на карте с', 'на каррте', 0]; - yield [3, 'на карте с', 'карт', 2]; + return [ + [3, 'missing', 'sing', 0], + [false, 'missing', 'sting', 0], + [4, 'missing', 'ing', 0], + [10, ' объектов на карте с', 'на карте', 0], + [0, 'на карте с', 'на карте', 0], + [false, 'на карте с', 'на каррте', 0], + [3, 'на карте с', 'карт', 2], + ]; } /** * Data provider for testSubstr * - * @return \Generator + * @return array */ - public function seedTestSubstr(): \Generator + public static function seedTestSubstr(): array { - yield ['issauga', 'Mississauga', 4, false]; - yield ['на карте с', ' объектов на карте с', 10, false]; - yield ['на ка', ' объектов на карте с', 10, 5]; - yield ['те с', ' объектов на карте с', -4, false]; - yield [false, ' объектов на карте с', 99, false]; + return [ + ['issauga', 'Mississauga', 4, false], + ['на карте с', ' объектов на карте с', 10, false], + ['на ка', ' объектов на карте с', 10, 5], + ['те с', ' объектов на карте с', -4, false], + [false, ' объектов на карте с', 99, false], + ]; } /** * Data provider for testStrtolower * - * @return \Generator + * @return array */ - public function seedTestStrtolower(): \Generator + public static function seedTestStrtolower(): array { - yield ['Joomla! Rocks', 'joomla! rocks']; + return [ + ['Joomla! Rocks', 'joomla! rocks'], + ]; } /** * Data provider for testStrtoupper * - * @return \Generator + * @return array */ - public function seedTestStrtoupper(): \Generator + public static function seedTestStrtoupper(): array { - yield ['Joomla! Rocks', 'JOOMLA! ROCKS']; + return [ + ['Joomla! Rocks', 'JOOMLA! ROCKS'], + ]; } /** * Data provider for testStrlen * - * @return \Generator + * @return array */ - public function seedTestStrlen(): \Generator + public static function seedTestStrlen(): array { - yield ['Joomla! Rocks', 13]; + return [ + ['Joomla! Rocks', 13], + ]; } /** * Data provider for testStr_ireplace * - * @return \Generator + * @return array */ - public function seedTestStr_ireplace(): \Generator + public static function seedTestStr_ireplace(): array { - yield ['Pig', 'cow', 'the pig jumped', false, 'the cow jumped']; - yield ['Pig', 'cow', 'the pig jumped', true, 'the cow jumped']; - yield ['Pig', 'cow', 'the pig jumped over the cow', true, 'the cow jumped over the cow']; - yield [['PIG', 'JUMPED'], ['cow', 'hopped'], 'the pig jumped over the pig', true, 'the cow hopped over the cow']; - yield ['шил', 'биш', 'Би шил идэй чадна', true, 'Би биш идэй чадна']; - yield ['/', ':', '/test/slashes/', true, ':test:slashes:']; + return [ + ['Pig', 'cow', 'the pig jumped', false, 'the cow jumped'], + ['Pig', 'cow', 'the pig jumped', true, 'the cow jumped'], + ['Pig', 'cow', 'the pig jumped over the cow', true, 'the cow jumped over the cow'], + [['PIG', 'JUMPED'], ['cow', 'hopped'], 'the pig jumped over the pig', true, 'the cow hopped over the cow'], + ['шил', 'биш', 'Би шил идэй чадна', true, 'Би биш идэй чадна'], + ['/', ':', '/test/slashes/', true, ':test:slashes:'], + ]; } /** * Data provider for testStr_split * - * @return \Generator + * @return array */ - public function seedTestStr_split(): \Generator + public static function seedTestStr_split(): array { - yield ['string', 1, ['s', 't', 'r', 'i', 'n', 'g']]; - yield ['string', 2, ['st', 'ri', 'ng']]; - yield ['волн', 3, ['вол', 'н']]; - yield ['волн', 1, ['в', 'о', 'л', 'н']]; + return [ + ['string', 1, ['s', 't', 'r', 'i', 'n', 'g']], + ['string', 2, ['st', 'ri', 'ng']], + ['волн', 3, ['вол', 'н']], + ['волн', 1, ['в', 'о', 'л', 'н']], + ]; } /** * Data provider for testStrcasecmp * - * @return \Generator + * @return array */ - public function seedTestStrcasecmp(): \Generator + public static function seedTestStrcasecmp(): array { - yield ['THIS IS STRING1', 'this is string1', false, 0]; - yield ['this is string1', 'this is string2', false, -1]; - yield ['this is string2', 'this is string1', false, 1]; - yield ['бгдпт', 'бгдпт', false, 0]; - yield ['àbc', 'abc', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], 1]; - yield ['àbc', 'bcd', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], -1]; - yield ['é', 'è', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], -1]; - yield ['É', 'é', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], 0]; - yield ['œ', 'p', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], -1]; - yield ['œ', 'n', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], 1]; + return [ + ['THIS IS STRING1', 'this is string1', false, 0], + ['this is string1', 'this is string2', false, -1], + ['this is string2', 'this is string1', false, 1], + ['бгдпт', 'бгдпт', false, 0], + ['àbc', 'abc', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], 1], + ['àbc', 'bcd', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], -1], + ['é', 'è', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], -1], + ['É', 'é', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], 0], + ['œ', 'p', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], -1], + ['œ', 'n', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], 1], + ]; } /** * Data provider for testStrcmp * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestStrcmp(): \Generator + public static function seedTestStrcmp(): array { - yield ['THIS IS STRING1', 'this is string1', false, -1]; - yield ['this is string1', 'this is string2', false, -1]; - yield ['this is string2', 'this is string1', false, 1]; - yield ['a', 'B', false, 1]; - yield ['A', 'b', false, -1]; - yield ['Àbc', 'abc', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], 1]; - yield ['Àbc', 'bcd', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], -1]; - yield ['É', 'è', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], -1]; - yield ['é', 'È', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], -1]; - yield ['Œ', 'p', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], -1]; - yield ['Œ', 'n', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], 1]; - yield ['œ', 'N', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], 1]; - yield ['œ', 'P', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], -1]; + return [ + ['THIS IS STRING1', 'this is string1', false, -1], + ['this is string1', 'this is string2', false, -1], + ['this is string2', 'this is string1', false, 1], + ['a', 'B', false, 1], + ['A', 'b', false, -1], + ['Àbc', 'abc', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], 1], + ['Àbc', 'bcd', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], -1], + ['É', 'è', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], -1], + ['é', 'È', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], -1], + ['Œ', 'p', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], -1], + ['Œ', 'n', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], 1], + ['œ', 'N', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], 1], + ['œ', 'P', ['fr_FR.utf8', 'fr_FR.UTF-8', 'fr_FR.UTF-8@euro', 'French_Standard', 'french', 'fr_FR', 'fre_FR'], -1], + ]; } /** * Data provider for testStrcspn * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestStrcspn(): \Generator + public static function seedTestStrcspn(): array { - yield ['subject string ', '<>', false, false, 8]; - yield ['Би шил {123} идэй {456} чадна', '}{', null, false, 7]; - yield ['Би шил {123} идэй {456} чадна', '}{', 13, 10, 5]; + return [ + ['subject string ', '<>', false, false, 8], + ['Би шил {123} идэй {456} чадна', '}{', null, false, 7], + ['Би шил {123} идэй {456} чадна', '}{', 13, 10, 5], + ]; } /** * Data provider for testStristr * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestStristr(): \Generator + public static function seedTestStristr(): array { - yield ['haystack', 'needle', false]; - yield ['before match, after match', 'match', 'match, after match']; - yield ['Би шил идэй чадна', 'шил', 'шил идэй чадна']; + return [ + ['haystack', 'needle', false], + ['before match, after match', 'match', 'match, after match'], + ['Би шил идэй чадна', 'шил', 'шил идэй чадна'], + ]; } /** * Data provider for testStrrev * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestStrrev(): \Generator + public static function seedTestStrrev(): array { - yield ['abc def', 'fed cba']; - yield ['Би шил', 'лиш иБ']; + return [ + ['abc def', 'fed cba'], + ['Би шил', 'лиш иБ'], + ]; } /** * Data provider for testStrspn * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestStrspn(): \Generator + public static function seedTestStrspn(): array { - yield ['A321 Main Street', '0123456789', 1, 2, 2]; - yield ['321 Main Street', '0123456789', null, 2, 2]; - yield ['A321 Main Street', '0123456789', null, 10, 0]; - yield ['321 Main Street', '0123456789', null, null, 3]; - yield ['Main Street 321', '0123456789', null, -3, 0]; - yield ['321 Main Street', '0123456789', null, -13, 2]; - yield ['321 Main Street', '0123456789', null, -12, 3]; - yield ['A321 Main Street', '0123456789', 0, null, 0]; - yield ['A321 Main Street', '0123456789', 1, 10, 3]; - yield ['A321 Main Street', '0123456789', 1, null, 3]; - yield ['Би шил идэй чадна', 'Би', null, null, 2]; - yield ['чадна Би шил идэй чадна', 'Би', null, null, 0]; + return [ + ['A321 Main Street', '0123456789', 1, 2, 2], + ['321 Main Street', '0123456789', 0, 2, 2], + ['A321 Main Street', '0123456789', 0, 10, 0], + ['321 Main Street', '0123456789', 0, null, 3], + ['Main Street 321', '0123456789', 0, -3, 0], + ['321 Main Street', '0123456789', 0, -13, 2], + ['321 Main Street', '0123456789', 0, -12, 3], + ['A321 Main Street', '0123456789', 0, null, 0], + ['A321 Main Street', '0123456789', 1, 10, 3], + ['A321 Main Street', '0123456789', 1, null, 3], + ['Би шил идэй чадна', 'Би', 0, null, 2], + ['чадна Би шил идэй чадна', 'Би', 0, null, 0], + ]; } /** * Data provider for testSubstr_replace * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestSubstr_replace(): \Generator + public static function seedTestSubstr_replace(): array { - yield ['321 Broadway Avenue', '321 Main Street', 'Broadway Avenue', 4, false]; - yield ['321 Broadway Street', '321 Main Street', 'Broadway', 4, 4]; - yield ['чадна 我能吞', 'чадна Би шил идэй чадна', '我能吞', 6, false]; - yield ['чадна 我能吞 шил идэй чадна', 'чадна Би шил идэй чадна', '我能吞', 6, 2]; + return [ + ['321 Broadway Avenue', '321 Main Street', 'Broadway Avenue', 4, false], + ['321 Broadway Street', '321 Main Street', 'Broadway', 4, 4], + ['чадна 我能吞', 'чадна Би шил идэй чадна', '我能吞', 6, false], + ['чадна 我能吞 шил идэй чадна', 'чадна Би шил идэй чадна', '我能吞', 6, 2], + ]; } /** * Data provider for testLtrim * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestLtrim(): \Generator + public static function seedTestLtrim(): array { - yield [' abc def', false, 'abc def']; - yield [' abc def', '', ' abc def']; - yield [' Би шил', false, 'Би шил']; - yield ["\t\n\r\x0BБи шил", false, 'Би шил']; - yield ["\x0B\t\n\rБи шил", "\t\n\x0B", "\rБи шил"]; - yield ["\x09Би шил\x0A", "\x09\x0A", "Би шил\x0A"]; - yield ['1234abc', '0123456789', 'abc']; + return [ + [' abc def', false, 'abc def'], + [' abc def', '', ' abc def'], + [' Би шил', false, 'Би шил'], + ["\t\n\r\x0BБи шил", false, 'Би шил'], + ["\x0B\t\n\rБи шил", "\t\n\x0B", "\rБи шил"], + ["\x09Би шил\x0A", "\x09\x0A", "Би шил\x0A"], + ['1234abc', '0123456789', 'abc'], + ]; } /** * Data provider for testRtrim * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestRtrim(): \Generator + public static function seedTestRtrim(): array { - yield ['abc def ', false, 'abc def']; - yield ['abc def ', '', 'abc def ']; - yield ['Би шил ', false, 'Би шил']; - yield ["Би шил\t\n\r\x0B", false, 'Би шил']; - yield ["Би шил\r\x0B\t\n", "\t\n\x0B", "Би шил\r"]; - yield ["\x09Би шил\x0A", "\x09\x0A", "\x09Би шил"]; - yield ['1234abc', 'abc', '1234']; + return [ + ['abc def ', false, 'abc def'], + ['abc def ', '', 'abc def '], + ['Би шил ', false, 'Би шил'], + ["Би шил\t\n\r\x0B", false, 'Би шил'], + ["Би шил\r\x0B\t\n", "\t\n\x0B", "Би шил\r"], + ["\x09Би шил\x0A", "\x09\x0A", "\x09Би шил"], + ['1234abc', 'abc', '1234'], + ]; } /** * Data provider for testTrim * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestTrim(): \Generator + public static function seedTestTrim(): array { - yield [' abc def ', false, 'abc def']; - yield [' abc def ', '', ' abc def ']; - yield [' Би шил ', false, 'Би шил']; - yield ["\t\n\r\x0BБи шил\t\n\r\x0B", false, 'Би шил']; - yield ["\x0B\t\n\rБи шил\r\x0B\t\n", "\t\n\x0B", "\rБи шил\r"]; - yield ["\x09Би шил\x0A", "\x09\x0A", "Би шил"]; - yield ['1234abc56789', '0123456789', 'abc']; + return [ + [' abc def ', false, 'abc def'], + [' abc def ', '', ' abc def '], + [' Би шил ', false, 'Би шил'], + ["\t\n\r\x0BБи шил\t\n\r\x0B", false, 'Би шил'], + ["\x0B\t\n\rБи шил\r\x0B\t\n", "\t\n\x0B", "\rБи шил\r"], + ["\x09Би шил\x0A", "\x09\x0A", "Би шил"], + ['1234abc56789', '0123456789', 'abc'], + ]; } /** * Data provider for testUcfirst * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestUcfirst(): \Generator + public static function seedTestUcfirst(): array { - yield ['george', null, null, 'George']; - yield ['мога', null, null, 'Мога']; - yield ['ψυχοφθόρα', null, null, 'Ψυχοφθόρα']; - yield ['dr jekill and mister hyde', ' ', null, 'Dr Jekill And Mister Hyde']; - yield ['dr jekill and mister hyde', ' ', '_', 'Dr_Jekill_And_Mister_Hyde']; - yield ['dr jekill and mister hyde', ' ', '', 'DrJekillAndMisterHyde']; + return [ + ['george', null, null, 'George'], + ['мога', null, null, 'Мога'], + ['ψυχοφθόρα', null, null, 'Ψυχοφθόρα'], + ['dr jekill and mister hyde', ' ', null, 'Dr Jekill And Mister Hyde'], + ['dr jekill and mister hyde', ' ', '_', 'Dr_Jekill_And_Mister_Hyde'], + ['dr jekill and mister hyde', ' ', '', 'DrJekillAndMisterHyde'], + ]; } /** * Data provider for testUcwords * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestUcwords(): \Generator + public static function seedTestUcwords(): array { - yield ['george washington', 'George Washington']; - yield ["george\r\nwashington", "George\r\nWashington"]; - yield ['мога', 'Мога']; - yield ['αβγ δεζ', 'Αβγ Δεζ']; - yield ['åbc öde', 'Åbc Öde']; + return [ + ['george washington', 'George Washington'], + ["george\r\nwashington", "George\r\nWashington"], + ['мога', 'Мога'], + ['αβγ δεζ', 'Αβγ Δεζ'], + ['åbc öde', 'Åbc Öde'], + ]; } /** * Data provider for testTranscode * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedTestTranscode(): \Generator + public static function seedTestTranscode(): array { - yield ['Åbc Öde €100', 'UTF-8', 'ISO-8859-1', "\xc5bc \xd6de EUR100"]; + return [ + ['Åbc Öde €100', 'UTF-8', 'ISO-8859-1', "\xc5bc \xd6de EUR100"], + ]; } /** * Data provider for testing compliant strings * - * @return \Generator + * @return array * * @since 1.0 */ - public function seedCompliantStrings(): \Generator + public static function seedCompliantStrings(): array { - yield ["\xCF\xB0", true]; - yield ["\xFBa", false]; - yield ["\xFDa", false]; - yield ["foo\xF7bar", false]; - yield ['george Мога Ž Ψυχοφθόρα ฉันกินกระจกได้ 我能吞下玻璃而不伤身体 ', true]; - yield ["\xFF ABC", false]; - yield ["0xfffd ABC", true]; - yield ['', true]; + return [ + ["\xCF\xB0", true], + ["\xFBa", false], + ["\xFDa", false], + ["foo\xF7bar", false], + ['george Мога Ž Ψυχοφθόρα ฉันกินกระจกได้ 我能吞下玻璃而不伤身体 ', true], + ["\xFF ABC", false], + ["0xfffd ABC", true], + ['', true], + ]; } /** * Data provider for testUnicodeToUtf8 * - * @return \Generator + * @return array * * @since 1.2.0 */ - public function seedTestUnicodeToUtf8(): \Generator + public static function seedTestUnicodeToUtf8(): array { - yield ["\u0422\u0435\u0441\u0442 \u0441\u0438\u0441\u0442\u0435\u043c\u044b", "Тест системы"]; - yield ["\u00dcberpr\u00fcfung der Systemumstellung", "Überprüfung der Systemumstellung"]; + return [ + ["\u0422\u0435\u0441\u0442 \u0441\u0438\u0441\u0442\u0435\u043c\u044b", "Тест системы"], + ["\u00dcberpr\u00fcfung der Systemumstellung", "Überprüfung der Systemumstellung"], + ]; } /** * Data provider for testUnicodeToUtf16 * - * @return \Generator + * @return array * * @since 1.2.0 */ - public function seedTestUnicodeToUtf16(): \Generator + public static function seedTestUnicodeToUtf16(): array { - yield ["\u0422\u0435\u0441\u0442 \u0441\u0438\u0441\u0442\u0435\u043c\u044b", "Тест системы"]; - yield ["\u00dcberpr\u00fcfung der Systemumstellung", "Überprüfung der Systemumstellung"]; + return [ + ["\u0422\u0435\u0441\u0442 \u0441\u0438\u0441\u0442\u0435\u043c\u044b", "Тест системы"], + ["\u00dcberpr\u00fcfung der Systemumstellung", "Überprüfung der Systemumstellung"], + ]; } /** @@ -427,9 +480,8 @@ public function seedTestUnicodeToUtf16(): \Generator * @param string|null $style The the style (default|dash). * @param integer $number If supplied, this number is used for the copy, otherwise it is the 'next' number. * @param string $expected Expected result. - * - * @dataProvider seedTestIncrement */ + #[DataProvider('seedTestIncrement')] public function testIncrement(string $string, ?string $style, int $number, string $expected) { $this->assertEquals( @@ -443,9 +495,8 @@ public function testIncrement(string $string, ?string $style, int $number, strin * * @param string $string The string to test. * @param boolean $expected Expected result. - * - * @dataProvider seedTestIs_ascii */ + #[DataProvider('seedTestIs_ascii')] public function testIs_ascii(string $string, bool $expected) { $this->assertEquals( @@ -461,9 +512,8 @@ public function testIs_ascii(string $string, bool $expected) * @param string $haystack String being examined * @param string $needle String being searched for * @param integer|null|boolean $offset Optional, specifies the position from which the search should be performed - * - * @dataProvider seedTestStrpos */ + #[DataProvider('seedTestStrpos')] public function testStrpos($expected, string $haystack, string $needle, $offset = 0) { $this->assertEquals( @@ -479,9 +529,8 @@ public function testStrpos($expected, string $haystack, string $needle, $offset * @param string $haystack String being examined * @param string $needle String being searched for * @param integer|null|boolean $offset Optional, specifies the position from which the search should be performed - * - * @dataProvider seedTestStrrpos */ + #[DataProvider('seedTestStrrpos')] public function testStrrpos($expected, string $haystack, string $needle, int $offset = 0) { $this->assertEquals( @@ -497,9 +546,8 @@ public function testStrrpos($expected, string $haystack, string $needle, int $of * @param string $string String being processed * @param integer $offset Number of UTF-8 characters offset (from left) * @param integer|null|boolean $offset Optional, specifies the position from which the search should be performed - * - * @dataProvider seedTestSubstr */ + #[DataProvider('seedTestSubstr')] public function testSubstr($expected, string $string, int $start, $length = false) { $this->assertEquals( @@ -513,9 +561,8 @@ public function testSubstr($expected, string $string, int $start, $length = fals * * @param string $string String being processed * @param string|boolean $expected Expected result - * - * @dataProvider seedTestStrtolower */ + #[DataProvider('seedTestStrtolower')] public function testStrtolower(string $string, $expected) { $this->assertEquals( @@ -529,9 +576,8 @@ public function testStrtolower(string $string, $expected) * * @param string $string String being processed * @param string|boolean $expected Expected result - * - * @dataProvider seedTestStrtoupper */ + #[DataProvider('seedTestStrtoupper')] public function testStrtoupper($string, $expected) { $this->assertEquals( @@ -545,9 +591,8 @@ public function testStrtoupper($string, $expected) * * @param string $string String being processed * @param string|boolean $expected Expected result - * - * @dataProvider seedTestStrlen */ + #[DataProvider('seedTestStrlen')] public function testStrlen(string $string, $expected) { $this->assertEquals( @@ -564,11 +609,8 @@ public function testStrlen(string $string, $expected) * @param string $subject New string to replace with * @param integer|null|boolean $count Optional count value to be passed by reference * @param string $expected Expected result - * - * @return array - * - * @dataProvider seedTestStr_ireplace */ + #[DataProvider('seedTestStr_ireplace')] public function testStr_ireplace($search, $replace, $subject, $count, $expected) { $this->assertEquals( @@ -583,9 +625,8 @@ public function testStr_ireplace($search, $replace, $subject, $count, $expected) * @param string $string UTF-8 encoded string to process * @param integer $splitLen Number to characters to split string by * @param array|string|boolean $expected Expected result - * - * @dataProvider seedTestStr_split */ + #[DataProvider('seedTestStr_split')] public function testStr_split($string, $splitLen, $expected) { $this->assertEquals( @@ -601,9 +642,8 @@ public function testStr_split($string, $splitLen, $expected) * @param string $string2 String 2 to compare * @param array|string|boolean $locale The locale used by strcoll or false to use classical comparison * @param integer $expected Expected result - * - * @dataProvider seedTestStrcasecmp */ + #[DataProvider('seedTestStrcasecmp')] public function testStrcasecmp(string $string1, string $string2, $locale, int $expected) { // Convert the $locale param to a string if it is an array @@ -635,9 +675,8 @@ public function testStrcasecmp(string $string1, string $string2, $locale, int $e * @param string $string2 String 2 to compare * @param mixed $locale The locale used by strcoll or false to use classical comparison * @param integer $expected Expected result - * - * @dataProvider seedTestStrcmp */ + #[DataProvider('seedTestStrcmp')] public function testStrcmp(string $string1, string $string2, $locale, int $expected) { // Convert the $locale param to a string if it is an array @@ -671,9 +710,8 @@ public function testStrcmp(string $string1, string $string2, $locale, int $expec * @param integer|boolean $start Optional starting character position (in characters) * @param integer|boolean $len Optional length * @param integer $expected Expected result - * - * @dataProvider seedTestStrcspn */ + #[DataProvider('seedTestStrcspn')] public function testStrcspn(string $haystack, string $needles, $start, $len, int $expected) { $this->assertEquals( @@ -688,9 +726,8 @@ public function testStrcspn(string $haystack, string $needles, $start, $len, int * @param string $haystack The haystack * @param string $needle The needle * @param string|boolean $expect Expected result - * - * @dataProvider seedTestStristr */ + #[DataProvider('seedTestStristr')] public function testStristr(string $haystack, string $needle, $expected) { $this->assertEquals( @@ -704,9 +741,9 @@ public function testStristr(string $haystack, string $needle, $expected) * * @param string $string String to be reversed * @param string $expected Expected result - * - * @dataProvider seedTestStrrev + */ + #[DataProvider('seedTestStrrev')] public function testStrrev(string $string, string $expected) { $this->assertEquals( @@ -723,9 +760,8 @@ public function testStrrev(string $string, string $expected) * @param integer|null $start Start optional * @param integer|null $length Length optional * @param integer $expect Expected result - * - * @dataProvider seedTestStrspn */ + #[DataProvider('seedTestStrspn')] public function testStrspn(string $subject, string $mask, $start, $length, int $expected) { $this->assertEquals( @@ -742,9 +778,8 @@ public function testStrspn(string $subject, string $mask, $start, $length, int $ * @param string $replacement The replacement string * @param integer $start Start * @param integer|boolean|null $length Length (optional) - * - * @dataProvider seedTestSubstr_replace */ + #[DataProvider('seedTestSubstr_replace')] public function testSubstr_replace(string $expected, string $string, string $replacement, int $start, $length) { $this->assertEquals( @@ -759,9 +794,8 @@ public function testSubstr_replace(string $expected, string $string, string $rep * @param string $string The string to be trimmed * @param string|boolean $charlist The optional charlist of additional characters to trim * @param string $expected Expected result - * - * @dataProvider seedTestLtrim */ + #[DataProvider('seedTestLtrim')] public function testLtrim(string $string, $charlist, string $expected) { $this->assertEquals( @@ -776,9 +810,8 @@ public function testLtrim(string $string, $charlist, string $expected) * @param string $string The string to be trimmed * @param string|boolean $charlist The optional charlist of additional characters to trim * @param string $expected Expected result - * - * @dataProvider seedTestRtrim */ + #[DataProvider('seedTestRtrim')] public function testRtrim(string $string, $charlist, string $expected) { $this->assertEquals( @@ -793,9 +826,8 @@ public function testRtrim(string $string, $charlist, string $expected) * @param string $string The string to be trimmed * @param string|boolean $charlist The optional charlist of additional characters to trim * @param string $expected Expected result - * - * @dataProvider seedTestTrim */ + #[DataProvider('seedTestTrim')] public function testTrim(string $string, $charlist, string $expected) { $this->assertEquals( @@ -811,9 +843,8 @@ public function testTrim(string $string, $charlist, string $expected) * @param string|null $delimiter The words delimiter (null means do not split the string) * @param string|null $newDelimiter The new words delimiter (null means equal to $delimiter) * @param string $expected Expected result - * - * @dataProvider seedTestUcfirst */ + #[DataProvider('seedTestUcfirst')] public function testUcfirst(string $string, ?string $delimiter, ?string $newDelimiter, string $expected) { $this->assertEquals( @@ -827,9 +858,8 @@ public function testUcfirst(string $string, ?string $delimiter, ?string $newDeli * * @param string $string String to be processed * @param string $expected Expected result - * - * @dataProvider seedTestUcwords */ + #[DataProvider('seedTestUcwords')] public function testUcwords(string $string, string $expected) { $this->assertEquals( @@ -845,9 +875,8 @@ public function testUcwords(string $string, string $expected) * @param string $fromEncoding The source encoding. * @param string $toEncoding The target encoding. * @param string|null $expect Expected result. - * - * @dataProvider seedTestTranscode */ + #[DataProvider('seedTestTranscode')] public function testTranscode(string $source, string $fromEncoding, string $toEncoding, ?string $expected) { $this->assertEquals( @@ -861,9 +890,8 @@ public function testTranscode(string $source, string $fromEncoding, string $toEn * * @param string $string UTF-8 encoded string. * @param boolean $expected Expected result. - * - * @dataProvider seedCompliantStrings */ + #[DataProvider('seedCompliantStrings')] public function testValid(string $string, bool $expected) { $this->assertEquals( @@ -877,9 +905,8 @@ public function testValid(string $string, bool $expected) * * @param string $string Unicode string to convert * @param string $expected Expected result - * - * @dataProvider seedTestUnicodeToUtf8 */ + #[DataProvider('seedTestUnicodeToUtf8')] public function testUnicodeToUtf8(string $string, string $expected) { $this->assertEquals( @@ -893,9 +920,8 @@ public function testUnicodeToUtf8(string $string, string $expected) * * @param string $string Unicode string to convert * @param string $expected Expected result - * - * @dataProvider seedTestUnicodeToUtf16 */ + #[DataProvider('seedTestUnicodeToUtf16')] public function testUnicodeToUtf16(string $string, string $expected) { $this->assertEquals( @@ -909,9 +935,8 @@ public function testUnicodeToUtf16(string $string, string $expected) * * @param string $string UTF-8 string to check * @param boolean $expected Expected result - * - * @dataProvider seedCompliantStrings */ + #[DataProvider('seedCompliantStrings')] public function testCompliant(string $string, bool $expected) { $this->assertEquals( diff --git a/phpunit.xml.dist b/phpunit.xml.dist index 844a69c7..2278bfba 100644 --- a/phpunit.xml.dist +++ b/phpunit.xml.dist @@ -1,13 +1,5 @@ - - - src - - src/phputf8 - - - Tests From edb694c79c7b441a3bf6e88f5ff53268751c9998 Mon Sep 17 00:00:00 2001 From: Hannes Papenberg Date: Fri, 11 Jul 2025 09:52:10 +0200 Subject: [PATCH 08/18] Fixing notices from phpstan --- src/StringHelper.php | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/StringHelper.php b/src/StringHelper.php index 2b8a7d60..8f1a3c95 100644 --- a/src/StringHelper.php +++ b/src/StringHelper.php @@ -74,7 +74,7 @@ public static function increment($string, $style = 'default', $n = 0) // Check if we are incrementing an existing pattern, or appending a new one. if (preg_match($rxSearch, $string, $matches)) { - $n = empty($n) ? ($matches[1] + 1) : $n; + $n = empty($n) ? (1 + (int) $matches[1]) : $n; $string = preg_replace($rxReplace, sprintf($oldFormat, $n), $string); } else { $n = empty($n) ? 2 : $n; @@ -375,7 +375,7 @@ public static function strcasecmp($str1, $str2, $locale = false) } // Get current locale - $locale0 = setlocale(LC_COLLATE, 0); + $locale0 = setlocale(LC_COLLATE, null); if (!$locale = setlocale(LC_COLLATE, $locale)) { $locale = $locale0; @@ -421,7 +421,7 @@ public static function strcmp($str1, $str2, $locale = false) { if ($locale) { // Get current locale - $locale0 = setlocale(LC_COLLATE, 0); + $locale0 = setlocale(LC_COLLATE, null); if (!$locale = setlocale(LC_COLLATE, $locale)) { $locale = $locale0; @@ -462,9 +462,9 @@ public static function strcmp($str1, $str2, $locale = false) * @link https://www.php.net/strcspn * @since 1.3.0 */ - public static function strcspn($str, $mask, $start = null, $length = null) + public static function strcspn(string $str, string $mask, $start = null, $length = null) { - if (empty($mask) || strlen($mask) == 0) { + if (strlen($mask) == 0) { return 0; } @@ -535,15 +535,15 @@ public static function strrev($str) * @link https://www.php.net/strspn * @since 1.3.0 */ - public static function strspn($str, $mask, $start = null, $length = null) + public static function strspn(string $str, string $mask, ?int $start = null, ?int $length = null) { $mask = preg_replace('!([\\\\\\-\\]\\[/^])!', '\\\${1}', $mask); - if (is_int($start) && is_int($length)) { + if ($start && $length) { $str = mb_substr($str, $start, $length); - } elseif (is_int($start) && !is_int($length)) { + } elseif ($start) { $str = mb_substr($str, $start); - } elseif (!is_int($start) && is_int($length)) { + } elseif ($length) { trigger_error('\Joomla\String\StringHelper::strspn(): Passing null to parameter #3 ($start) of type int is deprecated', E_USER_DEPRECATED); $str = mb_substr($str, 0, $length); } From caef0ea7c51792ee5fb1fd0c7885bd8418bf3825 Mon Sep 17 00:00:00 2001 From: Hannes Papenberg Date: Fri, 11 Jul 2025 09:52:41 +0200 Subject: [PATCH 09/18] Updating documentation --- README.md | 6 +++--- SECURITY.md | 3 ++- docs/v2-to-v3-update.md | 5 +++++ docs/v3-to-v4-update.md | 16 ++++++++++++++++ 4 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 docs/v2-to-v3-update.md create mode 100644 docs/v3-to-v4-update.md diff --git a/README.md b/README.md index 0a1fda2b..6b6a00d7 100644 --- a/README.md +++ b/README.md @@ -7,12 +7,12 @@ ## Installation via Composer -Add `"joomla/string": "~3.0"` to the require block in your composer.json and then run `composer install`. +Add `"joomla/string": "~4.0"` to the require block in your composer.json and then run `composer install`. ```json { "require": { - "joomla/string": "~3.0" + "joomla/string": "~4.0" } } ``` @@ -20,5 +20,5 @@ Add `"joomla/string": "~3.0"` to the require block in your composer.json and the Alternatively, you can simply run the following from the command line: ```sh -composer require joomla/string "~3.0" +composer require joomla/string "~4.0" ``` diff --git a/SECURITY.md b/SECURITY.md index 643dd83d..00bb3f37 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -5,7 +5,8 @@ These versions are currently being supported with security updates: | Version | Supported | -| ------- | ------------------ | +|---------| ------------------ | +| 4.x.x | :white_check_mark: | | 3.x.x | :white_check_mark: | | 2.0.x | :white_check_mark: | | 1.4.x | :x: | diff --git a/docs/v2-to-v3-update.md b/docs/v2-to-v3-update.md new file mode 100644 index 00000000..bdb1f3e8 --- /dev/null +++ b/docs/v2-to-v3-update.md @@ -0,0 +1,5 @@ +## Updating from v2 to v3 + +### Minimum supported PHP version raised + +All Framework packages now require PHP 8.1 or newer. diff --git a/docs/v3-to-v4-update.md b/docs/v3-to-v4-update.md new file mode 100644 index 00000000..51fc1838 --- /dev/null +++ b/docs/v3-to-v4-update.md @@ -0,0 +1,16 @@ +## Updating from v3 to v4 + +### Minimum supported PHP version raised + +All Framework packages now require PHP 8.3 or newer. + +### Deprecated methods from `Inflector` class have been removed + +The following deprecated methods have been removed in this release: + +* `Inflector::addWord()`: Use `Doctrine\Common\Inflector\Inflector::rules()` +* `Inflector::addPluraliseRule()`: Use `Doctrine\Common\Inflector\Inflector::rules()` +* `Inflector::addSingulariseRule()`: Use `Doctrine\Common\Inflector\Inflector::rules()` +* `Inflector::getInstance()`: Use the static methods without an instance instead. +* `Inflector::toPlural()`: Use static `Doctrine\Common\Inflector\Inflector::pluralize()` +* `Inflector::toSingular()`: Use static `Doctrine\Common\Inflector\Inflector::singularize()` From ab37cf279fd5fb0cb858ed1219f4098d24cf181f Mon Sep 17 00:00:00 2001 From: Hannes Papenberg Date: Fri, 11 Jul 2025 09:53:15 +0200 Subject: [PATCH 10/18] Cleanup of .gitattributes --- .gitattributes | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitattributes b/.gitattributes index 44a57dcc..4ebfb831 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,9 +1,6 @@ .github/ export-ignore -.phan/ export-ignore docs/ export-ignore Tests/ export-ignore -.drone.jsonnet export-ignore -.drone.yml export-ignore .editorconfig export-ignore .gitattributes export-ignore .gitignore export-ignore From 844fea690578f9e01387f83a8115e31e6d3cc4ea Mon Sep 17 00:00:00 2001 From: Hannes Papenberg Date: Fri, 11 Jul 2025 09:55:50 +0200 Subject: [PATCH 11/18] Deprecating Inflector class --- docs/v3-to-v4-update.md | 4 ++++ src/Inflector.php | 1 + 2 files changed, 5 insertions(+) diff --git a/docs/v3-to-v4-update.md b/docs/v3-to-v4-update.md index 51fc1838..4ee255e5 100644 --- a/docs/v3-to-v4-update.md +++ b/docs/v3-to-v4-update.md @@ -14,3 +14,7 @@ The following deprecated methods have been removed in this release: * `Inflector::getInstance()`: Use the static methods without an instance instead. * `Inflector::toPlural()`: Use static `Doctrine\Common\Inflector\Inflector::pluralize()` * `Inflector::toSingular()`: Use static `Doctrine\Common\Inflector\Inflector::singularize()` + +### The `Inflector` class has been deprecated + +The `Inflector` class in this package depends on the `doctrine/inflector` v1 package, which has been EOL for some time. The newer v2 version of that package does not allow our class to inherit from their `Inflector` class anymore. At the same time, the new `doctrine/inflector` package provides basically everything our `Inflector` class supplied, so our class is not necessary anymore. Thus the `Inflector` class in this package is deprecated and should not be used anymore. Use the `doctrine/inflector` package directly instead. This class will be removed in 5.0 of this package. diff --git a/src/Inflector.php b/src/Inflector.php index f7e64b1d..aa78c50b 100644 --- a/src/Inflector.php +++ b/src/Inflector.php @@ -17,6 +17,7 @@ * The Inflector transforms words * * @since 1.0 + * @deprecated 5.0 Use doctrine/inflector package as complete replacement instead. */ class Inflector extends DoctrineInflector { From 81b3d9b5aff57d22c7c6799cc630b28aa3c1a416 Mon Sep 17 00:00:00 2001 From: Hannes Papenberg Date: Fri, 11 Jul 2025 12:03:32 +0200 Subject: [PATCH 12/18] Update doctrine/inflector to v2, add proxies for all former methods --- Tests/InflectorTest.php | 16 +-------- composer.json | 2 +- src/Inflector.php | 76 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 76 insertions(+), 18 deletions(-) diff --git a/Tests/InflectorTest.php b/Tests/InflectorTest.php index 66503fea..2409e08b 100644 --- a/Tests/InflectorTest.php +++ b/Tests/InflectorTest.php @@ -92,20 +92,6 @@ protected function setUp(): void parent::setUp(); $this->inflector = new Inflector(); - DoctrineInflector::reset(); - } - - /** - * Tears down the fixture, for example, close a network connection. - * This method is called after a test is executed. - * - * @return void - */ - protected function tearDown(): void - { - DoctrineInflector::reset(); - - parent::tearDown(); } /** @@ -214,7 +200,7 @@ public function testIsSingular(string $singular, string $plural) } } - private function checkInflectorImplementation(DoctrineInflector $inflector): bool + private function checkInflectorImplementation(Inflector $inflector): bool { $reflectionClass = new \ReflectionClass($inflector); diff --git a/composer.json b/composer.json index d4f3dd18..fa2f0abf 100644 --- a/composer.json +++ b/composer.json @@ -11,7 +11,7 @@ "symfony/polyfill-mbstring": "^1.31.0" }, "require-dev": { - "doctrine/inflector": "^1.2", + "doctrine/inflector": "^2.0.10", "joomla/test": "dev-4.x-dev", "phpunit/phpunit": "^12.2.6", "squizlabs/php_codesniffer": "^3.7.2", diff --git a/src/Inflector.php b/src/Inflector.php index aa78c50b..e49c8213 100644 --- a/src/Inflector.php +++ b/src/Inflector.php @@ -9,7 +9,7 @@ namespace Joomla\String; -use Doctrine\Common\Inflector\Inflector as DoctrineInflector; +use Doctrine\Inflector\InflectorFactory; /** * Joomla Framework String Inflector Class @@ -19,7 +19,7 @@ * @since 1.0 * @deprecated 5.0 Use doctrine/inflector package as complete replacement instead. */ -class Inflector extends DoctrineInflector +class Inflector { /** * The inflector rules for countability. @@ -123,4 +123,76 @@ public function isSingular($word) { return static::singularize($word) === $word; } + + /** + * Proxy for Inflector::tableize() + */ + public static function tableize(string $word) : string + { + $inflector = InflectorFactory::create()->build(); + + return $inflector->tableize($word); + } + + /** + * Proxy for Inflector::classify() + */ + public static function classify(string $word) : string + { + $inflector = InflectorFactory::create()->build(); + + return $inflector->classify($word); + } + + /** + * Proxy for Inflector::camelize() + */ + public static function camelize(string $word) : string + { + $inflector = InflectorFactory::create()->build(); + + return $inflector->camelize($word); + } + + /** + * Proxy for Inflector::ucwords() + */ + public static function ucwords(string $string, string $delimiters = " \n\t\r\0\x0B-") : string + { + return ucwords($string, $delimiters); + } + + /** + * Empty method to suffice the former interface + */ + public static function reset() : void + { + } + + /** + * Empty method to suffice the former interface + */ + public static function rules(string $type, iterable $rules, bool $reset = false) : void + { + } + + /** + * Proxy for Inflector::pluralize() + */ + public static function pluralize(string $word) : string + { + $inflector = InflectorFactory::create()->build(); + + return $inflector->pluralize($word); + } + + /** + * Proxy for Inflector::singularize() + */ + public static function singularize(string $word) : string + { + $inflector = InflectorFactory::create()->build(); + + return $inflector->singularize($word); + } } From 7bbcb1614ffdb85a2148546f58278fb1c9c9cc6d Mon Sep 17 00:00:00 2001 From: Hannes Papenberg Date: Fri, 11 Jul 2025 12:05:54 +0200 Subject: [PATCH 13/18] Codestyle --- src/Inflector.php | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Inflector.php b/src/Inflector.php index e49c8213..4f99f0fb 100644 --- a/src/Inflector.php +++ b/src/Inflector.php @@ -127,7 +127,7 @@ public function isSingular($word) /** * Proxy for Inflector::tableize() */ - public static function tableize(string $word) : string + public static function tableize(string $word): string { $inflector = InflectorFactory::create()->build(); @@ -137,7 +137,7 @@ public static function tableize(string $word) : string /** * Proxy for Inflector::classify() */ - public static function classify(string $word) : string + public static function classify(string $word): string { $inflector = InflectorFactory::create()->build(); @@ -147,7 +147,7 @@ public static function classify(string $word) : string /** * Proxy for Inflector::camelize() */ - public static function camelize(string $word) : string + public static function camelize(string $word): string { $inflector = InflectorFactory::create()->build(); @@ -157,7 +157,7 @@ public static function camelize(string $word) : string /** * Proxy for Inflector::ucwords() */ - public static function ucwords(string $string, string $delimiters = " \n\t\r\0\x0B-") : string + public static function ucwords(string $string, string $delimiters = " \n\t\r\0\x0B-"): string { return ucwords($string, $delimiters); } @@ -165,21 +165,21 @@ public static function ucwords(string $string, string $delimiters = " \n\t\r\0\x /** * Empty method to suffice the former interface */ - public static function reset() : void + public static function reset(): void { } /** * Empty method to suffice the former interface */ - public static function rules(string $type, iterable $rules, bool $reset = false) : void + public static function rules(string $type, iterable $rules, bool $reset = false): void { } /** * Proxy for Inflector::pluralize() */ - public static function pluralize(string $word) : string + public static function pluralize(string $word): string { $inflector = InflectorFactory::create()->build(); @@ -189,7 +189,7 @@ public static function pluralize(string $word) : string /** * Proxy for Inflector::singularize() */ - public static function singularize(string $word) : string + public static function singularize(string $word): string { $inflector = InflectorFactory::create()->build(); From bd6d1cccaaf3e56b261417f514a606b67f116d74 Mon Sep 17 00:00:00 2001 From: Hannes Papenberg Date: Fri, 11 Jul 2025 14:09:45 +0200 Subject: [PATCH 14/18] Updating documentation --- docs/v3-to-v4-update.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/v3-to-v4-update.md b/docs/v3-to-v4-update.md index 4ee255e5..5abbd7bd 100644 --- a/docs/v3-to-v4-update.md +++ b/docs/v3-to-v4-update.md @@ -17,4 +17,4 @@ The following deprecated methods have been removed in this release: ### The `Inflector` class has been deprecated -The `Inflector` class in this package depends on the `doctrine/inflector` v1 package, which has been EOL for some time. The newer v2 version of that package does not allow our class to inherit from their `Inflector` class anymore. At the same time, the new `doctrine/inflector` package provides basically everything our `Inflector` class supplied, so our class is not necessary anymore. Thus the `Inflector` class in this package is deprecated and should not be used anymore. Use the `doctrine/inflector` package directly instead. This class will be removed in 5.0 of this package. +The `Inflector` class in this package does not provide benefits over the original `doctrine/inflector` package anymore and is currently only there to keep backwards compatibility with version 3. Use `doctrine/inflector` directly instead. This class will be removed in 5.0 of this package. From 5c9184fbe367bc417dbdf4e6c5e12109ba14d84f Mon Sep 17 00:00:00 2001 From: Richard Fath Date: Sat, 12 Jul 2025 13:07:35 +0200 Subject: [PATCH 15/18] Remove 3.x-dev phpstan baseline in 4.x-dev --- phpstan-baseline.neon | 40 ---------------------------------------- phpstan.neon | 1 - 2 files changed, 41 deletions(-) delete mode 100644 phpstan-baseline.neon diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon deleted file mode 100644 index d4ad014e..00000000 --- a/phpstan-baseline.neon +++ /dev/null @@ -1,40 +0,0 @@ -parameters: - excludePaths: - analyse: - - src/phputf8/* - ignoreErrors: - - - message: "#^Call to deprecated method rules\\(\\) of class Doctrine\\\\Common\\\\Inflector\\\\Inflector\\.$#" - count: 1 - path: src/Inflector.php - - - - message: """ - #^Call to deprecated method toPlural\\(\\) of class Joomla\\\\String\\\\Inflector\\: - 3\\.0 Use Doctrine\\\\Common\\\\Inflector\\\\Inflector\\:\\:pluralize\\(\\) instead\\.$# - """ - count: 1 - path: src/Inflector.php - - - - message: """ - #^Call to deprecated method toSingular\\(\\) of class Joomla\\\\String\\\\Inflector\\: - 3\\.0 Use Doctrine\\\\Common\\\\Inflector\\\\Inflector\\:\\:singularize\\(\\) instead\\.$# - """ - count: 2 - path: src/Inflector.php - - - - message: "#^Class Joomla\\\\String\\\\Inflector extends deprecated class Doctrine\\\\Common\\\\Inflector\\\\Inflector\\.$#" - count: 1 - path: src/Inflector.php - - - - message: "#^Method Joomla\\\\String\\\\Inflector\\:\\:getInstance\\(\\) should return static\\(Joomla\\\\String\\\\Inflector\\) but returns Joomla\\\\String\\\\Inflector\\.$#" - count: 1 - path: src/Inflector.php - - - - message: "#^Unsafe usage of new static\\(\\)\\.$#" - count: 2 - path: src/Inflector.php diff --git a/phpstan.neon b/phpstan.neon index 305d72f1..07d82270 100644 --- a/phpstan.neon +++ b/phpstan.neon @@ -1,7 +1,6 @@ includes: - vendor/phpstan/phpstan-deprecation-rules/rules.neon - - phpstan-baseline.neon parameters: level: 5 From 63a7c2a41202238af3c2602195c3125edd8b553b Mon Sep 17 00:00:00 2001 From: Robert Deutz Date: Wed, 16 Jul 2025 16:42:14 +0200 Subject: [PATCH 16/18] revert signature change (#61) Co-authored-by: Robert Deutz --- src/StringHelper.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/StringHelper.php b/src/StringHelper.php index bb5e549f..dfc43063 100644 --- a/src/StringHelper.php +++ b/src/StringHelper.php @@ -462,7 +462,7 @@ public static function strcmp($str1, $str2, $locale = false) * @link https://www.php.net/strcspn * @since 1.3.0 */ - public static function strcspn(string $str, string $mask, $start = null, $length = null) + public static function strcspn($str, $mask, $start = null, $length = null) { if (strlen($mask) == 0) { return 0; @@ -535,7 +535,7 @@ public static function strrev($str) * @link https://www.php.net/strspn * @since 1.3.0 */ - public static function strspn(string $str, string $mask, ?int $start = null, ?int $length = null) + public static function strspn($str, $mask, $start = null, $length = null) { $mask = preg_replace('!([\\\\\\-\\]\\[/^])!', '\\\${1}', $mask); From b01329f38a9cb7b5930ee52b42cb84776147a382 Mon Sep 17 00:00:00 2001 From: Richard Fath Date: Sat, 19 Jul 2025 23:02:54 +0200 Subject: [PATCH 17/18] Update .gitattributes --- .gitattributes | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index 0f255e23..4ebfb831 100644 --- a/.gitattributes +++ b/.gitattributes @@ -5,6 +5,5 @@ Tests/ export-ignore .gitattributes export-ignore .gitignore export-ignore phpstan.neon export-ignore -phpstan-baseline.neon export-ignore phpunit.xml.dist export-ignore ruleset.xml export-ignore From da2329e05f1f5fc98b709f8638f279513bcd1108 Mon Sep 17 00:00:00 2001 From: Hannes Papenberg Date: Wed, 23 Jul 2025 20:42:26 +0200 Subject: [PATCH 18/18] Joomla! Framework v4.0 --- composer.json | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/composer.json b/composer.json index fa2f0abf..2a4e782e 100644 --- a/composer.json +++ b/composer.json @@ -12,15 +12,12 @@ }, "require-dev": { "doctrine/inflector": "^2.0.10", - "joomla/test": "dev-4.x-dev", + "joomla/test": "^4.0", "phpunit/phpunit": "^12.2.6", "squizlabs/php_codesniffer": "^3.7.2", "phpstan/phpstan": "2.1.17", "phpstan/phpstan-deprecation-rules": "2.0.3" }, - "conflict": { - "doctrine/inflector": "<1.2" - }, "suggest": { "ext-mbstring": "For improved processing", "doctrine/inflector": "To use the string inflector" @@ -35,12 +32,5 @@ "Joomla\\String\\Tests\\": "Tests/" } }, - "minimum-stability": "dev", - "extra": { - "branch-alias": { - "dev-2.0-dev": "2.0-dev", - "dev-3.x-dev": "3.x-dev", - "dev-4.x-dev": "4.x-dev" - } - } + "minimum-stability": "dev" }