- A+
所属分类:PHP
PHP转编码函数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
<?php function isUTF8($str) { return preg_match('/^([\x09\x0A\x0D\x20-\x7E]|[\xC2][\xA0-\xBF]|[\xC3-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$/', $str); } function isISO88591($str) { return preg_match('/^([\x09\x0A\x0D\x20-\x7E\xA0-\xFF])*$/', $str); } function isCP1252($str) { return preg_match('/^([\x09\x0A\x0D\x20-\x7E\x80\x82-\x8C\x8E\x91-\x9C\x9E-\xFF])*$/', $str); } // Generate a UTF-8 encoded character from the code point function utf8Char($codePoint){ $char = ''; if ($codePoint < 0){ return false; } elseif ($codePoint <= 0x007f) { $char .= chr($codePoint); } elseif ($codePoint <= 0x07ff) { $char .= chr(0xc0 | ($codePoint >> 6)); $char .= chr(0x80 | ($codePoint & 0x003f)); } elseif ($codePoint == 0xFEFF) { // nop -- zap the BOM } elseif ($codePoint >= 0xD800 && $codePoint <= 0xDFFF) { // found a surrogate return false; } elseif($codePoint <= 0xffff) { $char .= chr(0xe0 | ($codePoint >> 12)); $char .= chr(0x80 | (($codePoint >> 6) & 0x003f)); $char .= chr(0x80 | ($codePoint & 0x003f)); } elseif($codePoint <= 0x10ffff) { $char .= chr(0xf0 | ($codePoint >> 18)); $char .= chr(0x80 | (($codePoint >> 12) & 0x3f)); $char .= chr(0x80 | (($codePoint >> 6) & 0x3f)); $char .= chr(0x80 | ($codePoint & 0x3f)); } else { // out of range return false; } return $char; } // Callback function for utf8FromCP1252() function utf8FromCP1252Char($char) { $utf8CodePoint = array( 128 => 0x20AC, 129 => '', 130 => 0x201A, 131 => 0x0192, 132 => 0x201E, 133 => 0x2026, 134 => 0x2020, 135 => 0x2021, 136 => 0x02C6, 137 => 0x2030, 138 => 0x0160, 139 => 0x2039, 140 => 0x0152, 141 => '', 142 => 0x017D, 143 => '', 144 => '', 145 => 0x2018, 146 => 0x2019, 147 => 0x201C, 148 => 0x201D, 149 => 0x2022, 150 => 0x2013, 151 => 0x2014, 152 => 0x02DC, 153 => 0x2122, 154 => 0x0161, 155 => 0x203A, 156 => 0x0153, 157 => '', 158 => 0x017E, 159 => 0x0178); $cp1252CodePoint = ord($char); return utf8Char($utf8CodePoint[$cp1252CodePoint]); } // Convert the encoding of a string from Windows-1252 to UTF-8 function utf8FromCP1252($string) { if (isCP1252($string)) { $utf8String = utf8_encode($string); return preg_replace_callback('|\xC2([\x80\x82-\x8C\x8E\x91-\x9C\x9E\x9F])|', create_function('$s','return utf8FromCP1252Char($s[1]);'), $utf8String); } else { return ''; } } ?> |
转自http://lachy.id.au/dev/2005/11/encoding-functions-source

微信公众号
扫一扫关注运维生存时间公众号,获取最新技术文章~