微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

php的敏感词的过滤类不依赖扩展

废话不多**,直接上货:

<?PHP
class Logic_BlackWord
{
  const APP_FORUM = 1;
  const APP_BLOG  = 2;
  const APP_Vote  = 3;
  /**
   * 过滤得到禁词
   * @param unkNown $txt
   * @return Ambigous <multitype:, unkNown>
   */
  public function getHitList($txt)
  {
    $hitList = array();
    //对禁词分批过滤
    $max = $this->getMax();
    if($max)
    {
      $size = 1000;
      $last = ceil($max/$size);
      for($page=1;$page<=$last;$page++)
      {
        $result = $this->getHitListByPage($txt,$page,$size);
        if($result) $hitList = array_merge($hitList,$result);
      }
    }
    $hitList2 = array();
    foreach($hitList as $hit=>$type)
    {
      $hitList2[$type][] = $hit;
    }
    return $hitList2;
  }
  private function getMax()
  {
    $redis = Rds::factory();
    $memKey = 'blackWord_max';
    $max = $redis->get($memKey);
    if($max===false)
    {
      $max = 0;
      $blackWord = new Model_BlackWord_BlackWord();
      $para['field'] = "MAX(id) AS max";
      $result = $blackWord->search($para);
      if(isset($result[0]['max'])) $max = $result[0]['max'];
      $redis->setex($memKey,300,$max);
    }
    return $max;
  }
  /**
   * 分批过滤得到禁词
   * @param unkNown $txt
   * @param number $page
   * @param number $size
   * @return multitype:Ambigous <multitype:unkNown, multitype:arr >
   */
  private function getHitListByPage($txt,$page=1,$size=1000)
  {
    $hitList = array();
    //分批得到禁词树
    $wordTree = $this->getWordTreeByPage($page,$size);
    $txt = strip_tags($txt);
    $txt = preg_replace('/[^a-zA-Z0-9\\x{4e00}-\\x{9fa5}]/iu','',$txt);
    $len = mb_strlen($txt,'UTF-8');
    for($i=0;$i<$len;$i++)
    {
      $char = mb_substr($txt,$i,1,'UTF-8');
      if(isset($wordTree[$char]))
      {
        $result = $this->getHitListByTree(mb_substr($txt,$i,50,'UTF-8'),$wordTree);
        if($result)
        {
          foreach($result as $hit=>$type)
          {
            $hitList[$hit] = $type;
          }
        }
      }
    }
    return $hitList;
  }
  /**
   * 是否禁词
   * @param str $txt
   * @param arr $wordTree
   * @return multitype:unkNown
   */
  private function getHitListByTree($txt,&$wordTree)
  {
    $len = mb_strlen($txt,'UTF-8');
    $point = & $wordTree;
    $hit = '';
    $hitList = array();
    for($i=0;$i<$len;$i++)
    {
      $char = mb_substr($txt,$i,1,'UTF-8');
      if(isset($point[$char]))
      {
        $hit .= $char;
        $point = & $point[$char];
        if(isset($point['type']))//匹配成功
        {
          $hitList[$hit] = $point['type'];
        }
      }
      else
      {
        break;
      }
    }
    return $hitList;
  }
  /**
   * 分批得到禁词树
   * @param int $page
   * @param int $size
   * @return arr:
   */
  private function getWordTreeByPage($page=1,$size=1000)
  {
    $redis = Rds::factory();
    $memKey = 'blackWord_tree_'.$page.'_'.$size;
    $wordTree = $redis->get($memKey);
    if($wordTree===false)
    {
      $wordTree = array();
      $blackWord = new Model_BlackWord_BlackWord();
      $start = ($page-1)*$size;
      $end = $start + $size;
      $para['where'] = "status=1 AND id>".$start." AND id<=".$end;
      $result = $blackWord->search($para);
      if($result)
      {
        foreach($result as $value)
        {
          if($value['word'])
          {
            $value['word'] = preg_split('/(?<!^)(?!$)/u',$value['word']);
            $point = & $wordTree;
            foreach($value['word'] as $char)
            {
              $point = & $point[$char];
            }
            $point['type'] = $value['type'];
          }
        }
      }
      $redis->setex($memKey,300,$wordTree);
    }
    return $wordTree;
  }
}

文章转载于:http://www.thinkphp.cn/extend/1121.html

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。

相关推荐