php实现专业获取网站SEO信息类实例


Posted in PHP onApril 02, 2015

本文实例讲述了php实现专业获取网站SEO信息类。分享给大家供大家参考。具体如下:

这个seo类的功能包括:
- 检查指定的网站响应
- 获取从该网站主页的语言和其他meta标签数据的
- 获取网站的导入链接,从Alexa的流量排名
- 获取网站的导入链接,由谷歌索引的网页数量
- 获取网站的信任,从WOT排名。
- 获取,因为它是第一个注册的网站域名年龄
- 获取的Twitter网站页面的数量
- 获取的Facebook链接的网站页面
- 获取网站谷歌网页速度等级
- 获取网站的谷歌网页排名

<?php
/**
 *
 * SEO report for different metrics
 *
 * @category SEO
 * @author Chema <chema@garridodiaz.com>
 * @copyright (c) 2009-2012 Open Classifieds Team
 * @license GPL v3
 * Based on seo report script http://www.phpeasycode.com && PHP class SEOstats
 *
 */
class seoreport{
  /**
   *
   * check if a url is online/alive
   * @param string $url
   * @return bool
   */
  public static function is_alive($url)
  {
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
    curl_setopt($ch, CURLOPT_HEADERFUNCTION, 'curlHeaderCallback');
    curl_setopt($ch, CURLOPT_FAILONERROR, 1);
    curl_exec ($ch);
    $int_return_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    curl_close ($ch);
    if ($int_return_code != 200 && $int_return_code != 302 && $int_return_code != 304)
    {
      return FALSE;
    }
    else return TRUE;
  }
  /**
   * HTTP GET request with curl.
   *
   * @param string $url String, containing the URL to curl.
   * @return string Returns string, containing the curl result.
   *
   */
  protected static function get_html($url)
  {
    $ch = curl_init($url);
    curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
    curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,5);
    curl_setopt($ch,CURLOPT_FOLLOWLOCATION,1);
    curl_setopt($ch,CURLOPT_MAXREDIRS,2);
    if(strtolower(parse_url($url, PHP_URL_SCHEME)) == 'https')
    {
      curl_setopt($ch,CURLOPT_SSL_VERIFYPEER,1);
      curl_setopt($ch,CURLOPT_SSL_VERIFYHOST,1);
    }
    $str = curl_exec($ch);
    curl_close($ch);
    return ($str)?$str:FALSE;
  }
  /**
   *
   * get the domain from any URL
   * @param string $url
   */
  public static function domain_name($url)
  {
    $nowww = ereg_replace('www\.','',$url);
    $domain = parse_url($nowww);
    if(!empty($domain["host"]))
      return $domain["host"];
    else
      return $domain["path"];
  }
  /**
   *
   * get the metas from a url and the language of the site
   * @param string $url
   * @return array
   */
  public static function meta_info($url)
  {
    //doesn't work at mediatemple
    /*$html = new DOMDocument();
    if(!$html->loadHtmlFile($url))
      return FALSE;*/
    if (!$html_content = self::get_html($url))
        return FALSE;
    $html = new DOMDocument();
    $html->loadHtml($html_content);
       
    $xpath = new DOMXPath( $html );
    $url_info = array();
    $langs = $xpath->query( '//html' );
    foreach ($langs as $lang)
    {
      $url_info['language'] = $lang->getAttribute('lang');
    }
    $metas = $xpath->query( '//meta' );
    foreach ($metas as $meta)
    {
      if ($meta->getAttribute('name'))
      {
        $url_info[$meta->getAttribute('name')] = $meta->getAttribute('content');
      }
    }
    return $url_info;
  }
  /**
   *
   * Alexa rank
   * @param string $url
   * @return integer
   */
  public static function alexa_rank($url)
  {
    $domain   = self::domain_name($url);
    $request   = "http://data.alexa.com/data?cli=10&dat=s&url=" . $domain;
    $data     = self::get_html($request);
    preg_match('/<POPULARITY URL="(.*?)" TEXT="([\d]+)"\/>/si', $data, $p);
    return ($l[2]) ? $l[2] : NULL;
  }
  /**
   *
   * Alexa inbounds link
   * @param string $url
   * @return integer
   */
  public static function alexa_links($url)
  {
    $domain   = self::domain_name($url);
    $request   = "http://data.alexa.com/data?cli=10&dat=s&url=" . $domain;
    $data     = self::get_html($request);
    preg_match('/<LINKSIN NUM="([\d]+)"\/>/si', $data, $l);
    return ($l[1]) ? $l[1] : NULL;
  }
  /**
   * Returns total amount of results for any Google search,
   * requesting the deprecated Websearch API.
   *
   * @param    string    $query   String, containing the search query.
   * @return    integer          Returns a total count.
   */
  public static function google_pages($url)
  {
    //$query = self::domain_name($url);
    $url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&rsz=1&q='.$url;
    $str = self::get_html($url);
    $data = json_decode($str);
    return (!isset($data->responseData->cursor->estimatedResultCount))
        ? '0'
        : intval($data->responseData->cursor->estimatedResultCount);
  }
  /**
   *
   * gets the inbounds links from a site
   * @param string $url
   * @param integer
   */
  public static function google_links($url)
  {
    $request   = "http://www.google.com/search?q=" . urlencode("link:" . $url) . "&hl=en";
    $data     = self::get_html($request);
    preg_match('/<div id=resultStats>(About )?([\d,]+) result/si', $data, $l);
    return ($l[2]) ? $l[2] : NULL;
  }
  /**
   *
   * web of trust rating
   * @param string $url
   * @reutn integer
   */
  public static function WOT_rating($url)
  {
    $domain = self::domain_name($url);
    $request = "http://api.mywot.com/0.4/public_query2?target=" . $domain;
    $data   = self::get_html($request);
    preg_match_all('/<application name="(\d+)" r="(\d+)" c="(\d+)"\/>/si', $data, $regs);
    $trustworthiness = ($regs[2][0]) ? $regs[2][0] : NULL;
    return (is_numeric($trustworthiness))? $trustworthiness:NULL;
  }
   
  /**
   *
   * how old is the domain?
   * @param string $domain
   * @return integer unixtime
   */
  public static function domain_age($domain)
  {
    $request = "http://reports.internic.net/cgi/whois?whois_nic=" . $domain . "&type=domain";
    $data   = self::get_html($request);
    preg_match('/Creation Date: ([a-z0-9-]+)/si', $data, $p);
    return (!$p[1])?FALSE:strtotime($p[1]);
  }
  /**
   *
   * counts how many tweets about the url
   * @param string $url
   * @return integer
   */
  public static function tweet_count($url)
  {
    $url = urlencode($url);
    $twitterEndpoint = "http://urls.api.twitter.com/1/urls/count.json?url=%s";
    $fileData = file_get_contents(sprintf($twitterEndpoint, $url));
    $json = json_decode($fileData, true);
    unset($fileData);        // free memory
    return (is_numeric($json['count']))? $json['count']:NULL;
  }
  /**
   * Returns the total amount of Facebook Shares for a single page
   *
   * @link     https://graph.facebook.com/
   * @param     string   The URL to check.
   * @return    integer  Returns the total amount of Facebook
   */
  public static function facebook_shares($q)
  {
    //Execution and result of Json
    $str = self::get_html('http://graph.facebook.com/?id='.urlencode($q));
    $data = json_decode($str);
    //Return only number of facebook shares
    $r = $data->shares;
    return ($r != NULL) ? $r : intval('0');
  }
  /**
   *
   * get the pagespeed rank over 100
   * @param string $url
   * @return integer
   */
  public static function page_speed($url)
  {
    $url = 'https://developers.google.com/_apps/pagespeed/run_pagespeed?url='.$url.'&format=json';
    $str = self::get_html($url);
    $data = json_decode($str);
    return intval($data->results->score);
  }
  /**
   *
   * get google page rank
   * @param string $url
   * @return integer
   */
  public static function page_rank($url)
  {
     $query = "http://toolbarqueries.google.com/tbr?client=navclient-auto&ch=".self::CheckHash(self::HashURL($url)). "&features=Rank&q=info:".$url."&num=100&filter=0";
      $data = self::get_html($query);//die(print_r($data));
    $pos  = strpos($data, "Rank_");
    if($pos === false)
    {
      return NULL;
    }
    else
    {
      $pagerank = substr($data, $pos + 9);
      return $pagerank;
    }
  }
  // functions for google pagerank
  /**
   * To calculate PR functions
   */
  public static function StrToNum($Str, $Check, $Magic)
  {
    $Int32Unit = 4294967296; // 2^32
    $length = strlen($Str);
    for ($i = 0; $i < $length; $i++) {
      $Check *= $Magic;
      //If the float is beyond the boundaries of integer (usually +/- 2.15e+9 = 2^31),
      // the result of converting to integer is undefined
      // refer to http://www.php.net/manual/en/language.types.integer.php
      if ($Check >= $Int32Unit) {
        $Check = ($Check - $Int32Unit * (int) ($Check / $Int32Unit));
        //if the check less than -2^31
        $Check = ($Check < -2147483648) ? ($Check + $Int32Unit) : $Check;
      }
      $Check += ord($Str{$i});
    }
    return $Check;
  }
  /**
   * Genearate a hash for a url
   */
  public static function HashURL($String)
  {
    $Check1 = self::StrToNum($String, 0x1505, 0x21);
    $Check2 = self::StrToNum($String, 0, 0x1003F);
    $Check1 >>= 2;
    $Check1 = (($Check1 >> 4) & 0x3FFFFC0 ) | ($Check1 & 0x3F);
    $Check1 = (($Check1 >> 4) & 0x3FFC00 ) | ($Check1 & 0x3FF);
    $Check1 = (($Check1 >> 4) & 0x3C000 ) | ($Check1 & 0x3FFF);
    $T1 = (((($Check1 & 0x3C0) << 4) | ($Check1 & 0x3C)) <<2 ) | ($Check2 & 0xF0F );
    $T2 = (((($Check1 & 0xFFFFC000) << 4) | ($Check1 & 0x3C00)) << 0xA) | ($Check2 & 0xF0F0000 );
    return ($T1 | $T2);
  }
  /**
   * genearate a checksum for the hash string
   */
  public static function CheckHash($Hashnum)
  {
    $CheckByte = 0;
    $Flag = 0;
    $HashStr = sprintf('%u', $Hashnum) ;
    $length = strlen($HashStr);
    for ($i = $length - 1; $i >= 0; $i --) {
      $Re = $HashStr{$i};
      if (1 === ($Flag % 2)) {
        $Re += $Re;
        $Re = (int)($Re / 10) + ($Re % 10);
      }
      $CheckByte += $Re;
      $Flag ++;
    }
    $CheckByte %= 10;
    if (0 !== $CheckByte) {
      $CheckByte = 10 - $CheckByte;
      if (1 === ($Flag % 2) ) {
        if (1 === ($CheckByte % 2)) {
          $CheckByte += 9;
        }
        $CheckByte >>= 1;
      }
    }
    return '7'.$CheckByte.$HashStr;
  }
}

使用范例

<?php
include 'seoreport.php';
ini_set('max_execution_time', 180);
  $url = (isset($_GET['url']))?$_GET['url']:'http://phpclasses.org';
  $meta_tags = seoreport::meta_info($url);
  //die(var_dump($meta_tags));
  //first check if site online
  if ($meta_tags!==FALSE)
  {
    $stats = array();
    $stats['meta'] = $meta_tags;
    $stats['alexa']['rank'] = seoreport::alexa_rank($url);
    $stats['alexa']['links'] = seoreport::alexa_links($url);
    $stats['domain']['WOT_rating'] = seoreport::WOT_rating($url);  
    $stats['domain']['domain_age'] = seoreport::domain_age($url);  
    $stats['social']['twitter'] = seoreport::tweet_count($url);  
    $stats['social']['facebook'] = seoreport::facebook_shares($url);
    $stats['google']['page_rank'] = seoreport::page_rank($url);
    $stats['google']['page_speed'] = seoreport::page_speed($url);
    $stats['google']['pages'] = seoreport::google_pages($url);
    $stats['google']['links'] = seoreport::google_links($url);
    var_dump($stats);
  }
  else 'Site not online. '.$url;

希望本文所述对大家的php程序设计有所帮助。

PHP 相关文章推荐
PHP5中MVC结构学习
Oct 09 PHP
php 编写安全的代码时容易犯的错误小结
May 20 PHP
用PHP书写安全的脚本代码
Feb 05 PHP
php截取后台登陆密码的代码
May 05 PHP
非常好用的Zend Framework分页类
Jun 25 PHP
Yii学习总结之数据访问对象 (DAO)
Feb 22 PHP
php.ini中的request_order推荐设置
May 10 PHP
PHP伪造来源HTTP_REFERER的方法实例详解
Jul 06 PHP
Zend Framework教程之Loader以及PluginLoader用法详解
Mar 09 PHP
PHP判断文件是否被引入的方法get_included_files用法示例
Nov 29 PHP
php反射学习之依赖注入示例
Jun 14 PHP
PHP实现财务审核通过后返现金额到客户的功能
Jul 04 PHP
php获得网站访问统计信息类Compete API用法实例
Apr 02 #PHP
php实现从上传文件创建缩略图的方法
Apr 02 #PHP
php调用KyotoTycoon简单实例
Apr 02 #PHP
PHP中数据类型转换的三种方式
Apr 02 #PHP
php在apache环境下实现gzip配置方法
Apr 02 #PHP
PHP中使用socket方式GET、POST数据实例
Apr 02 #PHP
php获取百度收录、百度热词及百度快照的方法
Apr 02 #PHP
You might like
php生成excel文件的简单方法
2014/02/08 PHP
mysql查找删除重复数据并只保留一条实例详解
2016/09/24 PHP
Laravel (Lumen) 解决JWT-Auth刷新token的问题
2019/10/24 PHP
PHP设计模式入门之迭代器模式原理与实现方法分析
2020/04/26 PHP
短信提示使用 特效
2007/01/19 Javascript
javascript 树形导航菜单实例代码
2013/08/13 Javascript
Jquery中children与find之间的区别详细解析
2013/11/29 Javascript
javascript删除字符串最后一个字符
2014/01/14 Javascript
JavaScript绑定事件监听函数的通用方法
2016/05/14 Javascript
浅析jQuery 3.0中的Data
2016/06/14 Javascript
Angularjs 实现分页功能及示例代码
2016/09/14 Javascript
理解javascript中的闭包
2017/01/11 Javascript
微信小程序实现图片压缩功能
2018/01/26 Javascript
详解Angular5路由传值方式及其相关问题
2018/04/28 Javascript
Angularjs Ng_repeat中实现复选框选中并显示不同的样式方法
2018/09/12 Javascript
JavaScript继承的特性与实践应用深入详解
2018/12/30 Javascript
JavaScript实现身份证验证代码实例
2019/08/26 Javascript
对layui数据表格动态cols(字段)动态变化详解
2019/10/25 Javascript
使用 Angular RouteReuseStrategy 缓存(路由)组件的实例代码
2019/11/01 Javascript
[03:17]2016完美“圣”典风云人物:冷冷专访
2016/12/08 DOTA
[56:42]VP vs RNG 2019国际邀请赛小组赛 BO2 第二场 8.15
2019/08/17 DOTA
python字典值排序并取出前n个key值的方法
2018/10/17 Python
如何利用Python分析出微信朋友男女统计图
2019/01/25 Python
Python爬虫:url中带字典列表参数的编码转换方法
2019/08/21 Python
Python3简单爬虫抓取网页图片代码实例
2019/08/26 Python
python异常处理try except过程解析
2020/02/03 Python
python通用读取vcf文件的类(复制粘贴即可用)
2020/02/29 Python
Windows下pycharm安装第三方库失败(通用解决方案)
2020/09/17 Python
Python 排序最长英文单词链(列表中前一个单词末字母是下一个单词的首字母)
2020/12/14 Python
html5中valid、invalid、required的定义
2014/02/21 HTML / CSS
Fanatics官网:运动服装、球衣、运动装备
2020/10/12 全球购物
大四学年自我鉴定
2013/11/13 职场文书
护士长竞聘书
2014/03/31 职场文书
党在我心中演讲稿
2014/09/02 职场文书
导游词之麻姑仙境
2019/11/18 职场文书
Python 数据可视化工具 Pyecharts 安装及应用
2022/04/20 Python