php实现专业获取网站SEO信息类实例


Posted in PHP onApril 02, 2015

本文实例讲述了php实现专业获取网站SEO信息类。分享给大家供大家参考。具体如下:

这个seo类的功能包括:
- 检查指定的网站响应
- 获取从该网站主页的语言和其他meta标签数据的
- 获取网站的导入链接,从Alexa的流量排名
- 获取网站的导入链接,由谷歌索引的网页数量
- 获取网站的信任,从WOT排名。
- 获取,因为它是第一个注册的网站域名年龄
- 获取的Twitter网站页面的数量
- 获取的Facebook链接的网站页面
- 获取网站谷歌网页速度等级
- 获取网站的谷歌网页排名

<?php
/**
 *
 * SEO report for different metrics
 *
 * @category SEO
 * @author Chema <chema@garridodiaz.com>
 * @copyright (c) 2009-2012 Open Classifieds Team
 * @license GPL v3
 * Based on seo report script http://www.phpeasycode.com && PHP class SEOstats
 *
 */
class seoreport{
  /**
   *
   * check if a url is online/alive
   * @param string $url
   * @return bool
   */
  public static function is_alive($url)
  {
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
    curl_setopt($ch, CURLOPT_HEADERFUNCTION, 'curlHeaderCallback');
    curl_setopt($ch, CURLOPT_FAILONERROR, 1);
    curl_exec ($ch);
    $int_return_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    curl_close ($ch);
    if ($int_return_code != 200 && $int_return_code != 302 && $int_return_code != 304)
    {
      return FALSE;
    }
    else return TRUE;
  }
  /**
   * HTTP GET request with curl.
   *
   * @param string $url String, containing the URL to curl.
   * @return string Returns string, containing the curl result.
   *
   */
  protected static function get_html($url)
  {
    $ch = curl_init($url);
    curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
    curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,5);
    curl_setopt($ch,CURLOPT_FOLLOWLOCATION,1);
    curl_setopt($ch,CURLOPT_MAXREDIRS,2);
    if(strtolower(parse_url($url, PHP_URL_SCHEME)) == 'https')
    {
      curl_setopt($ch,CURLOPT_SSL_VERIFYPEER,1);
      curl_setopt($ch,CURLOPT_SSL_VERIFYHOST,1);
    }
    $str = curl_exec($ch);
    curl_close($ch);
    return ($str)?$str:FALSE;
  }
  /**
   *
   * get the domain from any URL
   * @param string $url
   */
  public static function domain_name($url)
  {
    $nowww = ereg_replace('www\.','',$url);
    $domain = parse_url($nowww);
    if(!empty($domain["host"]))
      return $domain["host"];
    else
      return $domain["path"];
  }
  /**
   *
   * get the metas from a url and the language of the site
   * @param string $url
   * @return array
   */
  public static function meta_info($url)
  {
    //doesn't work at mediatemple
    /*$html = new DOMDocument();
    if(!$html->loadHtmlFile($url))
      return FALSE;*/
    if (!$html_content = self::get_html($url))
        return FALSE;
    $html = new DOMDocument();
    $html->loadHtml($html_content);
       
    $xpath = new DOMXPath( $html );
    $url_info = array();
    $langs = $xpath->query( '//html' );
    foreach ($langs as $lang)
    {
      $url_info['language'] = $lang->getAttribute('lang');
    }
    $metas = $xpath->query( '//meta' );
    foreach ($metas as $meta)
    {
      if ($meta->getAttribute('name'))
      {
        $url_info[$meta->getAttribute('name')] = $meta->getAttribute('content');
      }
    }
    return $url_info;
  }
  /**
   *
   * Alexa rank
   * @param string $url
   * @return integer
   */
  public static function alexa_rank($url)
  {
    $domain   = self::domain_name($url);
    $request   = "http://data.alexa.com/data?cli=10&dat=s&url=" . $domain;
    $data     = self::get_html($request);
    preg_match('/<POPULARITY URL="(.*?)" TEXT="([\d]+)"\/>/si', $data, $p);
    return ($l[2]) ? $l[2] : NULL;
  }
  /**
   *
   * Alexa inbounds link
   * @param string $url
   * @return integer
   */
  public static function alexa_links($url)
  {
    $domain   = self::domain_name($url);
    $request   = "http://data.alexa.com/data?cli=10&dat=s&url=" . $domain;
    $data     = self::get_html($request);
    preg_match('/<LINKSIN NUM="([\d]+)"\/>/si', $data, $l);
    return ($l[1]) ? $l[1] : NULL;
  }
  /**
   * Returns total amount of results for any Google search,
   * requesting the deprecated Websearch API.
   *
   * @param    string    $query   String, containing the search query.
   * @return    integer          Returns a total count.
   */
  public static function google_pages($url)
  {
    //$query = self::domain_name($url);
    $url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&rsz=1&q='.$url;
    $str = self::get_html($url);
    $data = json_decode($str);
    return (!isset($data->responseData->cursor->estimatedResultCount))
        ? '0'
        : intval($data->responseData->cursor->estimatedResultCount);
  }
  /**
   *
   * gets the inbounds links from a site
   * @param string $url
   * @param integer
   */
  public static function google_links($url)
  {
    $request   = "http://www.google.com/search?q=" . urlencode("link:" . $url) . "&hl=en";
    $data     = self::get_html($request);
    preg_match('/<div id=resultStats>(About )?([\d,]+) result/si', $data, $l);
    return ($l[2]) ? $l[2] : NULL;
  }
  /**
   *
   * web of trust rating
   * @param string $url
   * @reutn integer
   */
  public static function WOT_rating($url)
  {
    $domain = self::domain_name($url);
    $request = "http://api.mywot.com/0.4/public_query2?target=" . $domain;
    $data   = self::get_html($request);
    preg_match_all('/<application name="(\d+)" r="(\d+)" c="(\d+)"\/>/si', $data, $regs);
    $trustworthiness = ($regs[2][0]) ? $regs[2][0] : NULL;
    return (is_numeric($trustworthiness))? $trustworthiness:NULL;
  }
   
  /**
   *
   * how old is the domain?
   * @param string $domain
   * @return integer unixtime
   */
  public static function domain_age($domain)
  {
    $request = "http://reports.internic.net/cgi/whois?whois_nic=" . $domain . "&type=domain";
    $data   = self::get_html($request);
    preg_match('/Creation Date: ([a-z0-9-]+)/si', $data, $p);
    return (!$p[1])?FALSE:strtotime($p[1]);
  }
  /**
   *
   * counts how many tweets about the url
   * @param string $url
   * @return integer
   */
  public static function tweet_count($url)
  {
    $url = urlencode($url);
    $twitterEndpoint = "http://urls.api.twitter.com/1/urls/count.json?url=%s";
    $fileData = file_get_contents(sprintf($twitterEndpoint, $url));
    $json = json_decode($fileData, true);
    unset($fileData);        // free memory
    return (is_numeric($json['count']))? $json['count']:NULL;
  }
  /**
   * Returns the total amount of Facebook Shares for a single page
   *
   * @link     https://graph.facebook.com/
   * @param     string   The URL to check.
   * @return    integer  Returns the total amount of Facebook
   */
  public static function facebook_shares($q)
  {
    //Execution and result of Json
    $str = self::get_html('http://graph.facebook.com/?id='.urlencode($q));
    $data = json_decode($str);
    //Return only number of facebook shares
    $r = $data->shares;
    return ($r != NULL) ? $r : intval('0');
  }
  /**
   *
   * get the pagespeed rank over 100
   * @param string $url
   * @return integer
   */
  public static function page_speed($url)
  {
    $url = 'https://developers.google.com/_apps/pagespeed/run_pagespeed?url='.$url.'&format=json';
    $str = self::get_html($url);
    $data = json_decode($str);
    return intval($data->results->score);
  }
  /**
   *
   * get google page rank
   * @param string $url
   * @return integer
   */
  public static function page_rank($url)
  {
     $query = "http://toolbarqueries.google.com/tbr?client=navclient-auto&ch=".self::CheckHash(self::HashURL($url)). "&features=Rank&q=info:".$url."&num=100&filter=0";
      $data = self::get_html($query);//die(print_r($data));
    $pos  = strpos($data, "Rank_");
    if($pos === false)
    {
      return NULL;
    }
    else
    {
      $pagerank = substr($data, $pos + 9);
      return $pagerank;
    }
  }
  // functions for google pagerank
  /**
   * To calculate PR functions
   */
  public static function StrToNum($Str, $Check, $Magic)
  {
    $Int32Unit = 4294967296; // 2^32
    $length = strlen($Str);
    for ($i = 0; $i < $length; $i++) {
      $Check *= $Magic;
      //If the float is beyond the boundaries of integer (usually +/- 2.15e+9 = 2^31),
      // the result of converting to integer is undefined
      // refer to http://www.php.net/manual/en/language.types.integer.php
      if ($Check >= $Int32Unit) {
        $Check = ($Check - $Int32Unit * (int) ($Check / $Int32Unit));
        //if the check less than -2^31
        $Check = ($Check < -2147483648) ? ($Check + $Int32Unit) : $Check;
      }
      $Check += ord($Str{$i});
    }
    return $Check;
  }
  /**
   * Genearate a hash for a url
   */
  public static function HashURL($String)
  {
    $Check1 = self::StrToNum($String, 0x1505, 0x21);
    $Check2 = self::StrToNum($String, 0, 0x1003F);
    $Check1 >>= 2;
    $Check1 = (($Check1 >> 4) & 0x3FFFFC0 ) | ($Check1 & 0x3F);
    $Check1 = (($Check1 >> 4) & 0x3FFC00 ) | ($Check1 & 0x3FF);
    $Check1 = (($Check1 >> 4) & 0x3C000 ) | ($Check1 & 0x3FFF);
    $T1 = (((($Check1 & 0x3C0) << 4) | ($Check1 & 0x3C)) <<2 ) | ($Check2 & 0xF0F );
    $T2 = (((($Check1 & 0xFFFFC000) << 4) | ($Check1 & 0x3C00)) << 0xA) | ($Check2 & 0xF0F0000 );
    return ($T1 | $T2);
  }
  /**
   * genearate a checksum for the hash string
   */
  public static function CheckHash($Hashnum)
  {
    $CheckByte = 0;
    $Flag = 0;
    $HashStr = sprintf('%u', $Hashnum) ;
    $length = strlen($HashStr);
    for ($i = $length - 1; $i >= 0; $i --) {
      $Re = $HashStr{$i};
      if (1 === ($Flag % 2)) {
        $Re += $Re;
        $Re = (int)($Re / 10) + ($Re % 10);
      }
      $CheckByte += $Re;
      $Flag ++;
    }
    $CheckByte %= 10;
    if (0 !== $CheckByte) {
      $CheckByte = 10 - $CheckByte;
      if (1 === ($Flag % 2) ) {
        if (1 === ($CheckByte % 2)) {
          $CheckByte += 9;
        }
        $CheckByte >>= 1;
      }
    }
    return '7'.$CheckByte.$HashStr;
  }
}

使用范例

<?php
include 'seoreport.php';
ini_set('max_execution_time', 180);
  $url = (isset($_GET['url']))?$_GET['url']:'http://phpclasses.org';
  $meta_tags = seoreport::meta_info($url);
  //die(var_dump($meta_tags));
  //first check if site online
  if ($meta_tags!==FALSE)
  {
    $stats = array();
    $stats['meta'] = $meta_tags;
    $stats['alexa']['rank'] = seoreport::alexa_rank($url);
    $stats['alexa']['links'] = seoreport::alexa_links($url);
    $stats['domain']['WOT_rating'] = seoreport::WOT_rating($url);  
    $stats['domain']['domain_age'] = seoreport::domain_age($url);  
    $stats['social']['twitter'] = seoreport::tweet_count($url);  
    $stats['social']['facebook'] = seoreport::facebook_shares($url);
    $stats['google']['page_rank'] = seoreport::page_rank($url);
    $stats['google']['page_speed'] = seoreport::page_speed($url);
    $stats['google']['pages'] = seoreport::google_pages($url);
    $stats['google']['links'] = seoreport::google_links($url);
    var_dump($stats);
  }
  else 'Site not online. '.$url;

希望本文所述对大家的php程序设计有所帮助。

PHP 相关文章推荐
详解PHP显示MySQL数据的三种方法
Jun 05 PHP
php下关于中英数字混排的字符串分割问题
Apr 06 PHP
php替换超长文本中的特殊字符的函数代码
May 22 PHP
php查看请求头信息获取远程图片大小的方法分享
Dec 25 PHP
php查看网页源代码的方法
Mar 13 PHP
php生成PDF格式文件并且加密
Jun 22 PHP
php实现网站文件批量压缩下载功能
Oct 28 PHP
php 截取GBK文档某个位置开始的n个字符方法
Mar 08 PHP
thinkphp3.2实现跨控制器调用其他模块的方法
Mar 14 PHP
PHP 进度条函数的简单实例
Sep 19 PHP
详细解读php的命名空间(二)
Feb 21 PHP
Smarty缓存机制实例详解【三种缓存方式】
Jul 20 PHP
php获得网站访问统计信息类Compete API用法实例
Apr 02 #PHP
php实现从上传文件创建缩略图的方法
Apr 02 #PHP
php调用KyotoTycoon简单实例
Apr 02 #PHP
PHP中数据类型转换的三种方式
Apr 02 #PHP
php在apache环境下实现gzip配置方法
Apr 02 #PHP
PHP中使用socket方式GET、POST数据实例
Apr 02 #PHP
php获取百度收录、百度热词及百度快照的方法
Apr 02 #PHP
You might like
《星际争霸2》终章已出 RTS时代宣告终结
2017/02/07 星际争霸
PHP学习笔记 用户注册模块用户类以及验证码类
2011/09/20 PHP
php生成固定长度纯数字编码的方法
2015/07/09 PHP
php时间计算相关问题小结
2016/05/09 PHP
PHP图片裁剪与缩放示例(无损裁剪图片)
2017/02/08 PHP
JavaScript学习点滴 call、apply的区别
2010/10/22 Javascript
jquery 插件学习(三)
2012/08/06 Javascript
javascript中indexOf技术详解
2015/05/07 Javascript
Javascript 是你的高阶函数(高级应用)
2015/06/15 Javascript
jquery常用函数与方法汇总
2015/09/01 Javascript
JavaScript实现获取某个元素相邻兄弟节点的prev与next方法
2016/01/25 Javascript
三种Node.js写文件的方式
2016/03/08 Javascript
JavaScript实战之菜单特效
2016/08/16 Javascript
jQuery EasyUI ProgressBar进度条组件
2017/02/28 Javascript
Bootstrap提示框效果的实例代码
2017/07/12 Javascript
html中通过JS获取JSON数据并加载的方法
2017/11/30 Javascript
Vue打包后出现一些map文件的解决方法
2018/02/13 Javascript
微信小程序 wepy框架与iview-weapp的用法详解
2019/04/10 Javascript
浅谈Python中用datetime包进行对时间的一些操作
2016/06/23 Python
PyCharm代码格式调整方法
2018/05/23 Python
python之cv2与图像的载入、显示和保存实例
2018/12/05 Python
详解pandas安装若干异常及解决方案总结
2019/01/10 Python
Python 项目转化为so文件实例
2019/12/23 Python
浅谈ROC曲线的最佳阈值如何选取
2020/02/28 Python
Python3开发环境搭建详细教程
2020/06/18 Python
使用PyWeChatSpy自动回复微信拍一拍功能的实现代码
2020/07/02 Python
python爬虫破解字体加密案例详解
2021/03/02 Python
澳大利亚儿童鞋在线:The Trybe
2019/07/16 全球购物
正宗的澳大利亚Ugg靴子零售商:UGG Express
2020/04/19 全球购物
override和overload的区别
2016/03/09 面试题
生物科学专业职业规划书范文
2014/02/11 职场文书
学生打架检讨书
2014/02/14 职场文书
数学教师个人总结
2015/02/06 职场文书
护士旷工检讨书
2015/08/15 职场文书
python 离散点图画法的实现
2022/04/01 Python
python三子棋游戏
2022/05/04 Python