PHP 转编码函数

默北 PHPPHP 转编码函数已关闭评论7,729字数 2288阅读7分37秒阅读模式

PHP转编码函数

<?php

function isUTF8($str) {
	return preg_match('/^([\x09\x0A\x0D\x20-\x7E]|[\xC2][\xA0-\xBF]|[\xC3-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF][\x80-\xBF]{2}|[\xF1-\xF3][\x80-\xBF]{3}|\xF4[\x80-\x8F][\x80-\xBF]{2})*$/', $str);
}

function isISO88591($str) {
	return preg_match('/^([\x09\x0A\x0D\x20-\x7E\xA0-\xFF])*$/', $str);
}

function isCP1252($str) {
	return preg_match('/^([\x09\x0A\x0D\x20-\x7E\x80\x82-\x8C\x8E\x91-\x9C\x9E-\xFF])*$/', $str);
}

// Generate a UTF-8 encoded character from the code point
function utf8Char($codePoint){
	$char = '';
	if ($codePoint < 0){
		return false;
	} elseif ($codePoint <= 0x007f) {
		$char .= chr($codePoint);
	} elseif ($codePoint <= 0x07ff) {
		$char .= chr(0xc0 | ($codePoint >> 6));
		$char .= chr(0x80 | ($codePoint & 0x003f));
	} elseif ($codePoint == 0xFEFF) {
		// nop -- zap the BOM
	} elseif ($codePoint >= 0xD800 && $codePoint <= 0xDFFF) {
		// found a surrogate
		return false;
	} elseif($codePoint <= 0xffff) {
		$char .= chr(0xe0 | ($codePoint >> 12));
		$char .= chr(0x80 | (($codePoint >> 6) & 0x003f));
		$char .= chr(0x80 | ($codePoint & 0x003f));
	} elseif($codePoint <= 0x10ffff) {
		$char .= chr(0xf0 | ($codePoint >> 18));
		$char .= chr(0x80 | (($codePoint >> 12) & 0x3f));
		$char .= chr(0x80 | (($codePoint >> 6) & 0x3f));
		$char .= chr(0x80 | ($codePoint & 0x3f));
	} else { 
		// out of range
		return false;
	}
	return $char;
}

// Callback function for utf8FromCP1252()
function utf8FromCP1252Char($char) {
	$utf8CodePoint = array(
		128 => 0x20AC,
		129 => '',
		130 => 0x201A,
		131 => 0x0192,
		132 => 0x201E,
		133 => 0x2026,
		134 => 0x2020,
		135 => 0x2021,
		136 => 0x02C6,
		137 => 0x2030,
		138 => 0x0160,
		139 => 0x2039,
		140 => 0x0152,
		141 => '',
		142 => 0x017D,
		143 => '',
		144 => '',
		145 => 0x2018,
		146 => 0x2019,
		147 => 0x201C,
		148 => 0x201D,
		149 => 0x2022,
		150 => 0x2013,
		151 => 0x2014,
		152 => 0x02DC,
		153 => 0x2122,
		154 => 0x0161,
		155 => 0x203A,
		156 => 0x0153,
		157 => '',
		158 => 0x017E,
		159 => 0x0178);
	$cp1252CodePoint = ord($char);
	return utf8Char($utf8CodePoint[$cp1252CodePoint]);
}

// Convert the encoding of a string from Windows-1252 to UTF-8
function utf8FromCP1252($string) {
	if (isCP1252($string)) {
		$utf8String = utf8_encode($string);
		return preg_replace_callback('|\xC2([\x80\x82-\x8C\x8E\x91-\x9C\x9E\x9F])|', create_function('$s','return utf8FromCP1252Char($s[1]);'), $utf8String);
	} else {
		return '';
	}
}
?>

转自http://lachy.id.au/dev/2005/11/encoding-functions-source文章源自运维生存时间-https://www.ttlsa.com/php/php-coding-function/ 文章源自运维生存时间-https://www.ttlsa.com/php/php-coding-function/

weinxin
我的微信
微信公众号
扫一扫关注运维生存时间公众号,获取最新技术文章~
默北
  • 本文由 发表于 05/12/2013 17:54:54
  • 转载请务必保留本文链接:https://www.ttlsa.com/php/php-coding-function/
  • preg_match
  • 转编码