源码网商城,靠谱的源码在线交易网站 我的订单 购物车 帮助

源码网商城

PHP改进计算字符串相似度的函数similar_text()、levenshtein()

  • 时间:2021-06-30 05:36 编辑: 来源: 阅读:
  • 扫一扫,手机访问
摘要:PHP改进计算字符串相似度的函数similar_text()、levenshtein()
[b]similar_text()中文汉字版[/b]
[u]复制代码[/u] 代码如下:
     <?php       //拆分字符串       function split_str($str) {         preg_match_all("/./u", $str, $arr);         return $arr[0];       }              //相似度检测       function similar_text_cn($str1, $str2) {         $arr_1 = array_unique(split_str($str1));         $arr_2 = array_unique(split_str($str2));         $similarity = count($arr_2) - count(array_diff($arr_2, $arr_1));                  return $similarity;       }  
[b]levenshtein()中文汉字版 [/b] 
[u]复制代码[/u] 代码如下:
     <?php       //拆分字符串       function mbStringToArray($string, $encoding = 'UTF-8') {           $arrayResult = array();           while ($iLen = mb_strlen($string, $encoding)) {               array_push($arrayResult, mb_substr($string, 0, 1, $encoding));               $string = mb_substr($string, 1, $iLen, $encoding);           }           return $arrayResult;       }       //编辑距离       function levenshtein_cn($str1, $str2, $costReplace = 1, $encoding = 'UTF-8') {           $count_same_letter = 0;           $d = array();           $mb_len1 = mb_strlen($str1, $encoding);           $mb_len2 = mb_strlen($str2, $encoding);           $mb_str1 = mbStringToArray($str1, $encoding);           $mb_str2 = mbStringToArray($str2, $encoding);           for ($i1 = 0; $i1 <= $mb_len1; $i1++) {               $d[$i1] = array();               $d[$i1][0] = $i1;           }           for ($i2 = 0; $i2 <= $mb_len2; $i2++) {               $d[0][$i2] = $i2;           }           for ($i1 = 1; $i1 <= $mb_len1; $i1++) {               for ($i2 = 1; $i2 <= $mb_len2; $i2++) {                   // $cost = ($str1[$i1 - 1] == $str2[$i2 - 1]) ? 0 : 1;                   if ($mb_str1[$i1 - 1] === $mb_str2[$i2 - 1]) {                       $cost = 0;                       $count_same_letter++;                   } else {                       $cost = $costReplace; //替换                   }                   $d[$i1][$i2] = min($d[$i1 - 1][$i2] + 1, //插入                   $d[$i1][$i2 - 1] + 1, //删除                   $d[$i1 - 1][$i2 - 1] + $cost);               }           }           return $d[$mb_len1][$mb_len2];           //return array('distance' => $d[$mb_len1][$mb_len2], 'count_same_letter' => $count_same_letter);       }  
[b]  最长公共子序列LCS()[/b]  
[u]复制代码[/u] 代码如下:
         <?php           //最长公共子序列英文版           function LCS_en($str_1, $str_2) {             $len_1 = strlen($str_1);             $len_2 = strlen($str_2);             $len = $len_1 > $len_2 ? $len_1 : $len_2;             $dp = array();             for ($i = 0; $i <= $len; $i++) {               $dp[$i] = array();               $dp[$i][0] = 0;               $dp[0][$i] = 0;             }             for ($i = 1; $i <= $len_1; $i++) {               for ($j = 1; $j <= $len_2; $j++) {                 if ($str_1[$i - 1] == $str_2[$j - 1]) {                   $dp[$i][$j] = $dp[$i - 1][$j - 1] + 1;                 } else {                   $dp[$i][$j] = $dp[$i - 1][$j] > $dp[$i][$j - 1] ? $dp[$i - 1][$j] : $dp[$i][$j - 1];                 }               }             }             return $dp[$len_1][$len_2];           }           //拆分字符串           function mbStringToArray($string, $encoding = 'UTF-8') {             $arrayResult = array();             while ($iLen = mb_strlen($string, $encoding)) {               array_push($arrayResult, mb_substr($string, 0, 1, $encoding));               $string = mb_substr($string, 1, $iLen, $encoding);             }             return $arrayResult;           }           //最长公共子序列中文版           function LCS_cn($str1, $str2, $encoding = 'UTF-8') {             $mb_len1 = mb_strlen($str1, $encoding);             $mb_len2 = mb_strlen($str2, $encoding);             $mb_str1 = mbStringToArray($str1, $encoding);             $mb_str2 = mbStringToArray($str2, $encoding);             $len = $mb_len1 > $mb_len2 ? $mb_len1 : $mb_len2;             $dp = array();             for ($i = 0; $i <= $len; $i++) {               $dp[$i] = array();               $dp[$i][0] = 0;               $dp[0][$i] = 0;             }             for ($i = 1; $i <= $mb_len1; $i++) {               for ($j = 1; $j <= $mb_len2; $j++) {                 if ($mb_str1[$i - 1] == $mb_str2[$j - 1]) {                   $dp[$i][$j] = $dp[$i - 1][$j - 1] + 1;                 } else {                   $dp[$i][$j] = $dp[$i - 1][$j] > $dp[$i][$j - 1] ? $dp[$i - 1][$j] : $dp[$i][$j - 1];                 }               }             }             return $dp[$mb_len1][$mb_len2];           }
  • 全部评论(0)
联系客服
客服电话:
400-000-3129
微信版

扫一扫进微信版
返回顶部