源码网商城,靠谱的源码在线交易网站 我的订单 购物车 帮助

源码网商城

php获取中文拼音首字母类和函数分享

  • 时间:2020-12-07 04:28 编辑: 来源: 阅读:
  • 扫一扫,手机访问
摘要:php获取中文拼音首字母类和函数分享
一、公司同事整理的类,挺实用的.相信拿出来分享下他不会介意的O(∩_∩)O.不过如果首字母是数字或英文会有些问题.
[u]复制代码[/u] 代码如下:
/**  * Helper_Spell 汉字拼音首字母工具类  *  * @category Helper  * @package  Helper_Spell  * @author   Lancer <lancer.he@gmail.com>  * @version  1.0  * @see      Translation_Big2gb  */ class Helper_Spell { /**  * $_pinyins  * @var array  * @access private  */ private $_pinyins = array(     176161 => 'A',     176197 => 'B',     178193 => 'C',     180238 => 'D',     182234 => 'E',     183162 => 'F',     184193 => 'G',     185254 => 'H',     187247 => 'J',     191166 => 'K',     192172 => 'L',     194232 => 'M',     196195 => 'N',     197182 => 'O',     197190 => 'P',     198218 => 'Q',     200187 => 'R',     200246 => 'S',     203250 => 'T',     205218 => 'W',     206244 => 'X',     209185 => 'Y',     212209 => 'Z',     215249 => 'Z', ); /**  * $_charset  * @var string  * @access private  */ private $_charset = null; /**  * __construct 构造函数, 指定需要的编码 default: utf-8 支持utf-8, gb2312  *  * @param unknown_type $charset  */ public function __construct( $charset = 'utf-8' ) {     $this->_charset = $charset; } /**  * getInitialsFirst 返回首个汉字的拼音  *  * @access public  * @static  * @param  string $str  * @return string  * @example Helper_Spell::getInitialsFirst('我的爱'); => w  */ public static function getInitialsFirst( $str, $charset = 'utf-8' ) {     $chars = array(         'A','B','C','D','E','F',         'G','H','I','J','K','L',         'M','N','O','P','Q','R',         'S','T','U','V','W','X',         'Y','Z');     $string = self::getInitials( $str );     $length = strlen($string);     for($i=0; $i < $length; $i++) {         if ( in_array( $string{$i}, $chars ) ) {             return $string{$i};         }     }     return '*'; } /**  * getInitials 返回拼音组合  *  * @access public  * @static  * @param  string $str  * @return string  * @example Helper_Spell::getInitials('我的爱'); => wda  */ public static function getInitials( $str, $charset = 'utf-8' ) {     $instance = new Helper_Spell( $charset );     return $instance->_getInitials( $str ); } /**  * _getInitials 获取中文字串的拼音首字符  *              注:英文的字串:不变返回(包括数字)    eg .abc123 => abc123  *                  中文字符串:返回拼音首字符        eg. 王小明 => WXM  *                  中英混合串: 返回拼音首字符和英文  eg. 我i我j => WIWJ  *  * @access private  * @param  string $str  * @return string  */ private function _getInitials( $str, $translation=TRUE ){     if ( empty($str) ) return '';     if ( $this->_isAscii($str[0]) && $this->_isAsciis( $str ))         return $str;     if ( $translation )         $str = Translation_Big2gb::big2gb( $str );     $result = array();     if ( $this->_charset == 'utf-8' ){         //IGNORE很重要,加上这个就可以是ICONV()函数忽略错误,继续执行         $str = iconv( 'utf-8', 'gbk//IGNORE', $str );     }     $words = $this->_cutWord( $str );     foreach ( $words AS $word ) {                   if ( $this->_isAscii($word) ) {//非中文             $result[] = $word;             continue;         }         $code = ( ord(substr($word,0,1)) ) * 1000 + (ord(substr($word,1,1)));         //获取拼音首字母A--Z         if ( ($i = $this->_search($code)) != -1 ){             $result[] = $this->_pinyins[$i];         }     }     return strtoupper(implode('', $result)); } /**  * _msubstr 获取中文字符串  *  * @access private  * @param string $str  * @param int    $start  * @param int    $len  * @return string  */ private function _msubstr ($str, $start, $len) {     $start  = $start * 2;     $len    = $len * 2;     $strlen = strlen($str);     $result = '';     for ( $i = 0; $i < $strlen; $i++ ) {         if ( $i >= $start && $i < ($start + $len) ) {             if ( ord(substr($str, $i, 1)) > 129 ) $result .= substr($str, $i, 2);             else $result .= substr($str, $i, 1);         }         if ( ord(substr($str, $i, 1)) > 129 ) $i++;     }     return $result; } /**  * _cutWord  字符串切分为数组 (汉字或者一个字符为单位)  *  * @access private  * @param string $str  * @return array  */ private function _cutWord( $str ) {     $words = array();     while ( $str != "" ) {         if ( $this->_isAscii($str) ) {//非中文             $words[] = $str[0];             $str = substr( $str, strlen($str[0]) );         } else {             $word = $this->_msubstr( $str, 0, 1 );             $words[] = $word;             $str = substr( $str,  strlen($word) );         }      }      return $words; } /**  * _isAscii 判断字符是否是ascii字符  *  * @access private  * @param  string $char  * @return bool  */ private function _isAscii( $char ) {     return ( ord( substr($char,0,1) ) < 160 ); }   /**  * _isAsciis 判断字符串前3个字符是否是ascii字符  *  * @access private  * @param  string $str  * @return bool  */ private function _isAsciis( $str ) {     $len = strlen($str) >= 3 ? 3: 2;     $chars = array();     for( $i = 1; $i < $len -1; $i++ ){         $chars[] = $this->_isAscii( $str[$i] ) ? 'yes':'no';     }     $result = array_count_values( $chars );     if ( empty($result['no']) ){         return true;     }     return false; } /**  * _getChar 通过ASC码返回字母或者数字  *  * @access private  * @param  string $ascii  * @return string  */ private function _getChar( $ascii ){     if ( $ascii >= 48 && $ascii <= 57 ) {         return chr($ascii);  //数字     } elseif ( $ascii>=65 && $ascii<=90 ) {         return chr($ascii);   // A--Z     } elseif ($ascii>=97 && $ascii<=122 ) {         return chr($ascii-32); // a--z     } else {         return '~'; //其他     } } /**  * _search 查找需要的汉字内码(gb2312) 对应的拼音字符(二分法)  *  * @access private  * @param int $code  * @return int  */ private function _search( $code ) {        $data = array_keys($this->_pinyins);     $lower = 0;     $upper = sizeof($data)-1;     // 排除非一级汉字     if ($code < $data[0] || $code > $data[23]) return -1;     for (;;) {                 if ( $lower > $upper ){                         return $data[$lower-1];         }         $middle = (int) round(($lower + $upper) / 2);         if ( !isset($data[$middle]) ) {                      return -1;         }         if ( $data[$middle] < $code ){             $lower = (int)$middle + 1;         } else if ( $data[$middle] == $code ) {                       return $data[$middle];         } else {             $upper = (int)$middle - 1;         }     }// end for } }
二、用来得到中文的首字母 这个是将中文转换为拼音的类:charset
[u]复制代码[/u] 代码如下:
<?php /**  * 汉字转化为拼音,拼音转化为汉字  *  */ class charset {     private $_code=array(     array("a",'-20319'),     array("ai",'-20317'),     array("an",'-20304'),     array("ang",'-20295'),     array("ao",'-20292'),     array("ba",'-20283'),     array("bai",'-20265'),     array("ban",'-20257'),     array("bang",'-20242'),     array("bao",'-20230'),     array("bei",'-20051'),     array("ben",'-20036'),     array("beng",'-20032'),     array("bi",'-20026'),     array("bian",'-20002'),     array("biao",'-19990'),     array("bie",'-19986'),     array("bin",'-19982'),     array("bing",'-19976'),     array("bo",'-19805'),     array("bu",'-19784'),     array("ca",'-19775'),     array("cai",'-19774'),     array("can",'-19763'),     array("cang",'-19756'),     array("cao",'-19751'),     array("ce",'-19746'),     array("ceng",'-19741'),     array("cha",'-19739'),     array("chai",'-19728'),     array("chan",'-19725'),     array("chang",'-19715'),     array("chao",'-19540'),     array("che",'-19531'),     array("chen",'-19525'),     array("cheng",'-19515'),     array("chi",'-19500'),     array("chong",'-19484'),     array("chou",'-19479'),     array("chu",'-19467'),     array("chuai",'-19289'),     array("chuan",'-19288'),     array("chuang",'-19281'),     array("chui",'-19275'),     array("chun",'-19270'),     array("chuo",'-19263'),     array("ci",'-19261'),     array("cong",'-19249'),     array("cou",'-19243'),     array("cu",'-19242'),     array("cuan",'-19238'),     array("cui",'-19235'),     array("cun",'-19227'),     array("cuo",'-19224'),     array("da",'-19218'),     array("dai",'-19212'),     array("dan",'-19038'),     array("dang",'-19023'),     array("dao",'-19018'),     array("de",'-19006'),     array("deng",'-19003'),     array("di",'-18996'),     array("dian",'-18977'),     array("diao",'-18961'),     array("die",'-18952'),     array("ding",'-18783'),     array("diu",'-18774'),     array("dong",'-18773'),     array("dou",'-18763'),     array("du",'-18756'),     array("duan",'-18741'),     array("dui",'-18735'),     array("dun",'-18731'),     array("duo",'-18722'),     array("e",'-18710'),     array("en",'-18697'),     array("er",'-18696'),     array("fa",'-18526'),     array("fan",'-18518'),     array("fang",'-18501'),     array("fei",'-18490'),     array("fen",'-18478'),     array("feng",'-18463'),     array("fo",'-18448'),     array("fou",'-18447'),     array("fu",'-18446'),     array("ga",'-18239'),     array("gai",'-18237'),     array("gan",'-18231'),     array("gang",'-18220'),     array("gao",'-18211'),     array("ge",'-18201'),     array("gei",'-18184'),     array("gen",'-18183'),     array("geng",'-18181'),     array("gong",'-18012'),     array("gou",'-17997'),     array("gu",'-17988'),     array("gua",'-17970'),     array("guai",'-17964'),     array("guan",'-17961'),     array("guang",'-17950'),     array("gui",'-17947'),     array("gun",'-17931'),     array("guo",'-17928'),     array("ha",'-17922'),     array("hai",'-17759'),     array("han",'-17752'),     array("hang",'-17733'),     array("hao",'-17730'),     array("he",'-17721'),     array("hei",'-17703'),     array("hen",'-17701'),     array("heng",'-17697'),     array("hong",'-17692'),     array("hou",'-17683'),     array("hu",'-17676'),     array("hua",'-17496'),     array("huai",'-17487'),     array("huan",'-17482'),     array("huang",'-17468'),     array("hui",'-17454'),     array("hun",'-17433'),     array("huo",'-17427'),     array("ji",'-17417'),     array("jia",'-17202'),     array("jian",'-17185'),     array("jiang",'-16983'),     array("jiao",'-16970'),     array("jie",'-16942'),     array("jin",'-16915'),     array("jing",'-16733'),     array("jiong",'-16708'),     array("jiu",'-16706'),     array("ju",'-16689'),     array("juan",'-16664'),     array("jue",'-16657'),     array("jun",'-16647'),     array("ka",'-16474'),     array("kai",'-16470'),     array("kan",'-16465'),     array("kang",'-16459'),     array("kao",'-16452'),     array("ke",'-16448'),     array("ken",'-16433'),     array("keng",'-16429'),     array("kong",'-16427'),     array("kou",'-16423'),     array("ku",'-16419'),     array("kua",'-16412'),     array("kuai",'-16407'),     array("kuan",'-16403'),     array("kuang",'-16401'),     array("kui",'-16393'),     array("kun",'-16220'),     array("kuo",'-16216'),     array("la",'-16212'),     array("lai",'-16205'),     array("lan",'-16202'),     array("lang",'-16187'),     array("lao",'-16180'),     array("le",'-16171'),     array("lei",'-16169'),     array("leng",'-16158'),     array("li",'-16155'),     array("lia",'-15959'),     array("lian",'-15958'),     array("liang",'-15944'),     array("liao",'-15933'),     array("lie",'-15920'),     array("lin",'-15915'),     array("ling",'-15903'),     array("liu",'-15889'),     array("long",'-15878'),     array("lou",'-15707'),     array("lu",'-15701'),     array("lv",'-15681'),     array("luan",'-15667'),     array("lue",'-15661'),     array("lun",'-15659'),     array("luo",'-15652'),     array("ma",'-15640'),     array("mai",'-15631'),     array("man",'-15625'),     array("mang",'-15454'),     array("mao",'-15448'),     array("me",'-15436'),     array("mei",'-15435'),     array("men",'-15419'),     array("meng",'-15416'),     array("mi",'-15408'),     array("mian",'-15394'),     array("miao",'-15385'),     array("mie",'-15377'),     array("min",'-15375'),     array("ming",'-15369'),     array("miu",'-15363'),     array("mo",'-15362'),     array("mou",'-15183'),     array("mu",'-15180'),     array("na",'-15165'),     array("nai",'-15158'),     array("nan",'-15153'),     array("nang",'-15150'),     array("nao",'-15149'),     array("ne",'-15144'),     array("nei",'-15143'),     array("nen",'-15141'),     array("neng",'-15140'),     array("ni",'-15139'),     array("nian",'-15128'),     array("niang",'-15121'),     array("niao",'-15119'),     array("nie",'-15117'),     array("nin",'-15110'),     array("ning",'-15109'),     array("niu",'-14941'),     array("nong",'-14937'),     array("nu",'-14933'),     array("nv",'-14930'),     array("nuan",'-14929'),     array("nue",'-14928'),     array("nuo",'-14926'),     array("o",'-14922'),     array("ou",'-14921'),     array("pa",'-14914'),     array("pai",'-14908'),     array("pan",'-14902'),     array("pang",'-14894'),     array("pao",'-14889'),     array("pei",'-14882'),     array("pen",'-14873'),     array("peng",'-14871'),     array("pi",'-14857'),     array("pian",'-14678'),     array("piao",'-14674'),     array("pie",'-14670'),     array("pin",'-14668'),     array("ping",'-14663'),     array("po",'-14654'),     array("pu",'-14645'),     array("qi",'-14630'),     array("qia",'-14594'),     array("qian",'-14429'),     array("qiang",'-14407'),     array("qiao",'-14399'),     array("qie",'-14384'),     array("qin",'-14379'),     array("qing",'-14368'),     array("qiong",'-14355'),     array("qiu",'-14353'),     array("qu",'-14345'),     array("quan",'-14170'),     array("que",'-14159'),     array("qun",'-14151'),     array("ran",'-14149'),     array("rang",'-14145'),     array("rao",'-14140'),     array("re",'-14137'),     array("ren",'-14135'),     array("reng",'-14125'),     array("ri",'-14123'),     array("rong",'-14122'),     array("rou",'-14112'),     array("ru",'-14109'),     array("ruan",'-14099'),     array("rui",'-14097'),     array("run",'-14094'),     array("ruo",'-14092'),     array("sa",'-14090'),     array("sai",'-14087'),     array("san",'-14083'),     array("sang",'-13917'),     array("sao",'-13914'),     array("se",'-13910'),     array("sen",'-13907'),     array("seng",'-13906'),     array("sha",'-13905'),     array("shai",'-13896'),     array("shan",'-13894'),     array("shang",'-13878'),     array("shao",'-13870'),     array("she",'-13859'),     array("shen",'-13847'),     array("sheng",'-13831'),     array("shi",'-13658'),     array("shou",'-13611'),     array("shu",'-13601'),     array("shua",'-13406'),     array("shuai",'-13404'),     array("shuan",'-13400'),     array("shuang",'-13398'),     array("shui",'-13395'),     array("shun",'-13391'),     array("shuo",'-13387'),     array("si",'-13383'),     array("song",'-13367'),     array("sou",'-13359'),     array("su",'-13356'),     array("suan",'-13343'),     array("sui",'-13340'),     array("sun",'-13329'),     array("suo",'-13326'),     array("ta",'-13318'),     array("tai",'-13147'),     array("tan",'-13138'),     array("tang",'-13120'),     array("tao",'-13107'),     array("te",'-13096'),     array("teng",'-13095'),     array("ti",'-13091'),     array("tian",'-13076'),     array("tiao",'-13068'),     array("tie",'-13063'),     array("ting",'-13060'),     array("tong",'-12888'),     array("tou",'-12875'),     array("tu",'-12871'),     array("tuan",'-12860'),     array("tui",'-12858'),     array("tun",'-12852'),     array("tuo",'-12849'),     array("wa",'-12838'),     array("wai",'-12831'),     array("wan",'-12829'),     array("wang",'-12812'),     array("wei",'-12802'),     array("wen",'-12607'),     array("weng",'-12597'),     array("wo",'-12594'),     array("wu",'-12585'),     array("xi",'-12556'),     array("xia",'-12359'),     array("xian",'-12346'),     array("xiang",'-12320'),     array("xiao",'-12300'),     array("xie",'-12120'),     array("xin",'-12099'),     array("xing",'-12089'),     array("xiong",'-12074'),     array("xiu",'-12067'),     array("xu",'-12058'),     array("xuan",'-12039'),     array("xue",'-11867'),     array("xun",'-11861'),     array("ya",'-11847'),     array("yan",'-11831'),     array("yang",'-11798'),     array("yao",'-11781'),     array("ye",'-11604'),     array("yi",'-11589'),     array("yin",'-11536'),     array("ying",'-11358'),     array("yo",'-11340'),     array("yong",'-11339'),     array("you",'-11324'),     array("yu",'-11303'),     array("yuan",'-11097'),     array("yue",'-11077'),     array("yun",'-11067'),     array("za",'-11055'),     array("zai",'-11052'),     array("zan",'-11045'),     array("zang",'-11041'),     array("zao",'-11038'),     array("ze",'-11024'),     array("zei",'-11020'),     array("zen",'-11019'),     array("zeng",'-11018'),     array("zha",'-11014'),     array("zhai",'-10838'),     array("zhan",'-10832'),     array("zhang",'-10815'),     array("zhao",'-10800'),     array("zhe",'-10790'),     array("zhen",'-10780'),     array("zheng",'-10764'),     array("zhi",'-10587'),     array("zhong",'-10544'),     array("zhou",'-10533'),     array("zhu",'-10519'),     array("zhua",'-10331'),     array("zhuai",'-10329'),     array("zhuan",'-10328'),     array("zhuang",'-10322'),     array("zhui",'-10315'),     array("zhun",'-10309'),     array("zhuo",'-10307'),     array("zi",'-10296'),     array("zong",'-10281'),     array("zou",'-10274'),     array("zu",'-10270'),     array("zuan",'-10262'),     array("zui",'-10260'),     array("zun",'-10256'),     array("zuo",'-10254')     );     //拼音转化函数     function PinYin($str){         $ret="";         for($i=0;$i<strlen($str);$i++){             $p=ord(substr($str,$i,1)); //查看ASCII码             if($p>160){ //如果是中文,再多截取一个字符                 $q=ord(substr($str,++$i,1));                 $p=$p*256+$q-65536;             }             $ret.=$this->convert($p);         }         return $ret;     }     //转化函数     function convert($num){         if($num>0&&$num<160){ //如果不是汉字,直接返回相对字符             return chr($num);         }elseif($num<"-20319"||$num>"-10247"){             return "";         }else{ //汉字,查找对应拼音             for($i=count($this->_code)-1;$i>=0;$i--){                 if($this->_code[$i][1]<=$num)                 break;             }             return substr($this->_code[$i][0],0,1);         }     } }
    下面这个是用来测试代码的:
[u]复制代码[/u] 代码如下:
<?php include_once 'charset.class.php'; header('Content-type: text/html;charset=utf-8'); $charset=new charset(); foreach(array('武汉','中国','上海') as $val){  echo iconv('gbk','utf-8//IGNORE',strtoupper($charset->PinYin(mb_convert_encoding($val,'gbk','utf-8'))));  echo '<br/>'; }
三、这是网上找到个一个方法,经过测试可以正常使用,但对一些生僻字或者特殊字符会有问题.
[u]复制代码[/u] 代码如下:
function getFirstCharter($str){  if(empty($str)){return '';}  $fchar=ord($str{0});  if($fchar>=ord('A')&&$fchar<=ord('z')) return strtoupper($str{0});  $s1=iconv('UTF-8','gb2312',$str);  $s2=iconv('gb2312','UTF-8',$s1);  $s=$s2==$str?$s1:$str;  $asc=ord($s{0})*256+ord($s{1})-65536;  if($asc>=-20319&&$asc<=-20284) return 'A';  if($asc>=-20283&&$asc<=-19776) return 'B';  if($asc>=-19775&&$asc<=-19219) return 'C';  if($asc>=-19218&&$asc<=-18711) return 'D';  if($asc>=-18710&&$asc<=-18527) return 'E';  if($asc>=-18526&&$asc<=-18240) return 'F';  if($asc>=-18239&&$asc<=-17923) return 'G';  if($asc>=-17922&&$asc<=-17418) return 'H';  if($asc>=-17417&&$asc<=-16475) return 'J';  if($asc>=-16474&&$asc<=-16213) return 'K';  if($asc>=-16212&&$asc<=-15641) return 'L';  if($asc>=-15640&&$asc<=-15166) return 'M';  if($asc>=-15165&&$asc<=-14923) return 'N';  if($asc>=-14922&&$asc<=-14915) return 'O';  if($asc>=-14914&&$asc<=-14631) return 'P';  if($asc>=-14630&&$asc<=-14150) return 'Q';  if($asc>=-14149&&$asc<=-14091) return 'R';  if($asc>=-14090&&$asc<=-13319) return 'S';  if($asc>=-13318&&$asc<=-12839) return 'T';  if($asc>=-12838&&$asc<=-12557) return 'W';  if($asc>=-12556&&$asc<=-11848) return 'X';  if($asc>=-11847&&$asc<=-11056) return 'Y';  if($asc>=-11055&&$asc<=-10247) return 'Z';  return null; }
  • 全部评论(0)
联系客服
客服电话:
400-000-3129
微信版

扫一扫进微信版
返回顶部