每天迁移MySQL历史数据到历史库Python脚本


Posted in Python onApril 13, 2018

本文实例为大家分享了Python每天迁移MySQL历史数据到历史库的具体代码,供大家参考,具体内容如下

#!/usr/bin/env python 
# coding:utf-8 
__author__ = 'John' 
 

import MySQLdb 
import sys 
import datetime 
import time 
 
class ClassMigrate(object): 
  def _get_argv(self): 
    self.usage = """ 
      usage(): 
      python daily_migration.py --source=192.168.1.4:3306/db_name:tab_name/proxy/password \\ 
                    --dest=192.168.1.150:13301/db_name_archive:tab_name_201601/proxy/password \\ 
                    --delete_strategy=delete --primary_key=auto_id --date_col=ut --time_interval=180 
      """ 
    if len(sys.argv) == 1: 
      print self.usage 
      sys.exit(1) 
    elif sys.argv[1] == '--help' or sys.argv[1] == '-h': 
        print self.usage 
        sys.exit() 
    elif len(sys.argv) > 2: 
      for i in sys.argv[1:]: 
        _argv = i.split('=') 
        if _argv[0] == '--source': 
          _list = _argv[1].split('/') 
          self.source_host = _list[0].split(':')[0] 
          self.source_port = int(_list[0].split(':')[1]) 
          self.source_db = _list[1].split(':')[0] 
          self.source_tab = _list[1].split(':')[1] 
          self.source_user = _list[2] 
          self.source_password = _list[3] 
        elif _argv[0] == '--dest': 
          _list = _argv[1].split('/') 
          self.dest_host = _list[0].split(':')[0] 
          self.dest_port = int(_list[0].split(':')[1]) 
          self.dest_db = _list[1].split(':')[0] 
          self.dest_tab = _list[1].split(':')[1] 
          self.dest_user = _list[2] 
          self.dest_password = _list[3] 
        elif _argv[0] == '--delete_strategy': 
          self.deleteStrategy = _argv[1] 
          if self.deleteStrategy not in ('delete', 'drop'): 
            print (self.usage) 
            sys.exit(1) 
        elif _argv[0] == '--primary_key': 
          self.pk = _argv[1] 
        elif _argv[0] == '--date_col': 
          self.date_col = _argv[1] 
        elif _argv[0] == '--time_interval': 
          self.interval = _argv[1] 
        else: 
          print (self.usage) 
          sys.exit(1) 
 
  def __init__(self): 
    self._get_argv() 
## -------------------------------------------------------------------- 
    self.sourcedb_conn_str = MySQLdb.connect(host=self.source_host, port=self.source_port, user=self.source_user, passwd=self.source_password, db=self.source_db, charset='utf8') 
    self.sourcedb_conn_str.autocommit(True) 
    self.destdb_conn_str = MySQLdb.connect(host=self.dest_host, port=self.dest_port, user=self.dest_user, passwd=self.dest_password, db=self.dest_db, charset='utf8') 
    self.destdb_conn_str.autocommit(True) 
## -------------------------------------------------------------------- 
    self.template_tab = self.source_tab + '_template' 
    self.step_size = 20000 
## -------------------------------------------------------------------- 
    self._migCompleteState = False 
    self._deleteCompleteState = False 
## -------------------------------------------------------------------- 
    self.source_cnt = '' 
    self.source_min_id = '' 
    self.source_max_id = '' 
    self.source_checksum = '' 
    self.dest_cn = '' 
## -------------------------------------------------------------------- 
    self.today = time.strftime("%Y-%m-%d") 
    # self.today = '2016-05-30 09:59:40' 

def sourcedb_query(self, sql, sql_type): 
    try: 
      cr = self.sourcedb_conn_str.cursor() 
      cr.execute(sql) 
      if sql_type == 'select': 
        return cr.fetchall() 
      elif sql_type == 'dml': 
        rows = self.sourcedb_conn_str.affected_rows() 
        return rows 
      else: 
        return True 
    except Exception, e: 
      print (str(e) + "<br>") 
      return False 
    finally: 
      cr.close() 

  def destdb_query(self, sql, sql_type, values=''): 
    try: 
      cr = self.destdb_conn_str.cursor() 
      if sql_type == 'select': 
        cr.execute(sql) 
        return cr.fetchall() 
      elif sql_type == 'insertmany': 
        cr.executemany(sql, values) 
        rows = self.destdb_conn_str.affected_rows() 
        return rows 
      else: 
        cr.execute(sql) 
        return True 
    except Exception, e: 
      print (str(e) + "<br>") 
      return False 
    finally: 
      cr.close() 
 
 def create_table_from_source(self): 
    '''''因为tab_name表的数据需要迁移到archive引擎表,所以不适合使用这种方式。 预留作其他用途。''' 
    try: 
      sql = "show create table %s;" % self.source_tab 
      create_str = self.sourcedb_query(sql, 'select')[0][1] 
      create_str = create_str.replace('CREATE TABLE', 'CREATE TABLE IF NOT EXISTS') 
      self.destdb_query(create_str, 'ddl') 
      return True 
    except Exception, e: 
      print (str(e) + "<br>") 
      return False 

  def create_table_from_template(self): 
    try: 
      sql = 'CREATE TABLE IF NOT EXISTS %s like %s;' % (self.dest_tab, self.template_tab) 
      state = self.destdb_query(sql, 'ddl') 
      if state: 
        return True 
      else: 
        return False 
    except Exception, e: 
      print (str(e + "<br>") + "<br>") 
      return False 

  def get_min_max(self): 
    """ 创建目标表、并获取源表需要迁移的总条数、最小id、最大id """ 
    try: 
      print ("\nStarting Migrate at -- %s <br>") % (datetime.datetime.now().__str__()) 
      sql = """select count(*),IFNULL(min(%s),-1),IFNULL(max(%s),-1) from %s where %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00') \ 
           and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """ \ 
            % (self.pk, self.pk, self.source_tab, self.date_col, self.today, self.interval, self.date_col, self.today, self.interval) 
      q = self.sourcedb_query(sql, 'select') 
      self.source_cnt = q[0][0] 
      self.source_min_id = q[0][1] 
      self.source_max_id = q[0][2] 
      self.source_checksum = str(self.source_cnt) + '_' + str(self.source_min_id) + '_' + str(self.source_max_id) 
      if self.source_cnt == 0 or self.source_min_id == -1 or self.source_max_id == -1: 
        print ("There is 0 record in source table been matched! <br>") 
        return False 
      else: 
        return True 
    except Exception, e: 
      print (str(e) + "<br>") 
      return False 
 
  def migrate_2_destdb(self): 
    try: 
      get_min_max_id = self.get_min_max() 
      if get_min_max_id: 
        k = self.source_min_id 
        desc_sql = "desc %s;" % self.source_tab 
        # self.filed = [] 
        cols = self.sourcedb_query(desc_sql, 'select') 
        # for j in cols: 
        #   self.filed.append(j[0]) 
        fileds = "%s," * len(cols) # 源表有多少个字段,就拼凑多少个%s,拼接到insert语句 
        fileds = fileds.rstrip(',') 
        while k <= self.source_max_id: 
          sql = """select * from %s where %s >= %d and %s< %d \ 
               and %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00') \ 
               and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """\ 
             % (self.source_tab, self.pk, k, self.pk, k+self.step_size, self.date_col, self.today, self.interval, self.date_col, self.today, self.interval) 
          print ("\n%s <br>") % sql 
          starttime = datetime.datetime.now() 
          results = self.sourcedb_query(sql, 'select') 
          insert_sql = "insert into " + self.dest_tab + " values (%s)" % fileds 
          rows = self.destdb_query(insert_sql, 'insertmany', results) 
          if rows == False: 
            print ("Insert failed!! <br>") 
          else: 
            print ("Inserted %s rows. <br>") % rows 
          endtime = datetime.datetime.now() 
          timeinterval = endtime - starttime 
          print("Elapsed :" + str(timeinterval.seconds) + '.' + str(timeinterval.microseconds) + " seconds <br>") 
          k += self.step_size 
        print ("\nInsert complete at -- %s <br>") % (datetime.datetime.now().__str__()) 
        return True 
      else: 
        return False 
    except Exception, e: 
      print (str(e) + "<br>") 
      return False 
  
  def verify_total_cnt(self): 
    try: 
      sql = """select count(*),IFNULL(min(%s),-1),IFNULL(max(%s),-1) from %s where %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00') \ 
           and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """ \ 
            % (self.pk, self.pk, self.dest_tab, self.date_col, self.today, self.interval, self.date_col, self.today, self.interval) 
      dest_result = self.destdb_query(sql, 'select') 
      self.dest_cnt = dest_result[0][0] 
      dest_checksum = str(self.dest_cnt) + '_' + str(dest_result[0][1]) + '_' + str(dest_result[0][2]) 
      print ("source_checksum: %s, dest_checksum: %s <br>") % (self.source_checksum, dest_checksum) 
      if self.source_cnt == dest_result[0][0] and dest_result[0][0] != 0 and self.source_checksum == dest_checksum: 
        self._migCompleteState = True 
        print ("Verify successfully !!<br>") 
      else: 
        print ("Verify failed !!<br>") 
        sys.exit(77) 
    except Exception, e: 
      print (str(e) + "<br>") 
  
  def drop_daily_partition(self): 
    try: 
      if self._migCompleteState: 
        sql = """explain partitions select * from %s where %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00') 
               and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """\ 
             % (self.source_tab, self.date_col, self.today, self.interval, self.date_col, self.today, self.interval) 
        partition_name = self.sourcedb_query(sql, 'select') 
        partition_name = partition_name[0][3] 
 
 
 
 
        sql = """select count(*),IFNULL(min(%s),-1),IFNULL(max(%s),-1) from %s partition (%s)""" \ 
            % (self.pk, self.pk, self.source_tab, partition_name) 
        q = self.sourcedb_query(sql, 'select') 
        source_cnt = q[0][0] 
        source_min_id = q[0][1] 
        source_max_id = q[0][2] 
        checksum = str(source_cnt) + '_' + str(source_min_id) + '_' + str(source_max_id) 
        if source_cnt == 0 or source_min_id == -1 or source_max_id == -1: 
          print ("There is 0 record in source PARTITION been matched! <br>") 
        else: 
          if checksum == self.source_checksum: 
            drop_par_sql = "alter table %s drop partition %s;" % (self.source_tab, partition_name) 
            droped = self.sourcedb_query(drop_par_sql, 'ddl') 
            if droped: 
              print (drop_par_sql + " <br>") 
              print ("\nDrop partition complete at -- %s <br>") % (datetime.datetime.now().__str__()) 
              self._deleteCompleteState = True 
            else: 
              print (drop_par_sql + " <br>") 
              print ("Drop partition failed.. <br>") 
          else: 
            print ("The partition %s checksum failed !! Drop failed !!") % partition_name 
            sys.exit(77) 
    except Exception, e: 
      print (str(e) + "<br>") 
 
  def delete_data(self): 
    try: 
      if self._migCompleteState: 
        k = self.source_min_id 
        while k <= self.source_max_id: 
          sql = """delete from %s where %s >= %d and %s< %d \ 
               and %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00') \ 
               and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """ \ 
             % (self.source_tab, self.pk, k, self.pk, k+self.step_size, self.date_col, self.today, self.interval, self.date_col, self.today, self.interval) 
          print ("\n%s <br>") % sql 
          starttime = datetime.datetime.now() 
          rows = self.sourcedb_query(sql, 'dml') 
          if rows == False: 
            print ("Delete failed!! <br>") 
          else: 
            print ("Deleted %s rows. <br>") % rows 
          endtime = datetime.datetime.now() 
          timeinterval = endtime - starttime 
          print("Elapsed :" + str(timeinterval.seconds) + '.' + str(timeinterval.microseconds) + " seconds <br>") 
          time.sleep(1) 
          k += self.step_size 
        print ("\nDelete complete at -- %s <br>") % (datetime.datetime.now().__str__()) 
        self._deleteCompleteState = True 
    except Exception, e: 
      print (str(e) + "<br>") 
       
  def do(self): 
    tab_create = self.create_table_from_template() 
    if tab_create: 
      migration = self.migrate_2_destdb() 
      if migration: 
        self.verify_total_cnt() 
        if self._migCompleteState: 
          if self.deleteStrategy == 'drop': 
            self.drop_daily_partition() 
          else: 
            self.delete_data() 
          print ("\n<br>") 
          print ("====="*5 + '<br>') 
          print ("source_total_cnt: %s <br>") % self.source_cnt 
          print ("dest_total_cnt: %s <br>") % self.dest_cnt 
          print ("====="*5 + '<br>') 
          if self._deleteCompleteState: 
            print ("\nFinal result: Successfully !! <br>") 
            sys.exit(88) 
          else: 
            print ("\nFinal result: Failed !! <br>") 
            sys.exit(254) 
    else: 
      print ("Create table failed ! Exiting. . .") 
      sys.exit(255) 
  
f = ClassMigrate() 
f.do()

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持三水点靠木。

Python 相关文章推荐
python自动截取需要区域,进行图像识别的方法
May 17 Python
python 读取目录下csv文件并绘制曲线v111的方法
Jul 06 Python
python 将print输出的内容保存到txt文件中
Jul 17 Python
pycharm: 恢复(reset) 误删文件的方法
Oct 22 Python
Ubuntu下升级 python3.7.1流程备忘(推荐)
Dec 10 Python
python识别图像并提取文字的实现方法
Jun 28 Python
django 消息框架 message使用详解
Jul 22 Python
Python generator生成器和yield表达式详解
Aug 08 Python
使用python接受tgam的脑波数据实例
Apr 09 Python
使用pygame实现垃圾分类小游戏功能(已获校级二等奖)
Jul 23 Python
Python 的 __str__ 和 __repr__ 方法对比
Sep 02 Python
Python PIL按比例裁剪图片
May 11 Python
python实现数据库跨服务器迁移
Apr 12 #Python
解决python3爬虫无法显示中文的问题
Apr 12 #Python
python读取中文txt文本的方法
Apr 12 #Python
基于python 处理中文路径的终极解决方法
Apr 12 #Python
解决Python2.7读写文件中的中文乱码问题
Apr 12 #Python
python 实现对文件夹内的文件排序编号
Apr 12 #Python
pandas数值计算与排序方法
Apr 12 #Python
You might like
小偷PHP+Html+缓存
2006/12/20 PHP
PHP+MYSQL 出现乱码的解决方法
2008/08/08 PHP
PHP获取文件绝对路径的代码(上一级目录)
2011/05/29 PHP
PHP中CURL方法curl_setopt()函数的参数分享
2013/01/19 PHP
PHP中创建图像并绘制文字的例子
2014/11/19 PHP
Laravel解决nesting level错误和隐藏index.php的问题
2019/10/12 PHP
javascript jQuery $.post $.ajax用法
2008/07/09 Javascript
JScript 脚本实现文件下载 一般用于下载木马
2009/10/29 Javascript
关于this和self的使用说明
2010/08/01 Javascript
某页码显示的helper 少量调整,另附js版
2010/09/12 Javascript
jQuery固定浮动侧边栏实现思路及代码
2014/09/28 Javascript
javascript无刷新评论实现方法
2015/05/13 Javascript
javascript实现类似百度分享功能的方法
2015/07/27 Javascript
D3.js实现柱状图的方法详解
2016/09/21 Javascript
jQuery插件扩展实例【添加回调函数】
2016/11/26 Javascript
JS数组搜索之折半搜索实现方法分析
2017/03/27 Javascript
Vue实现购物车功能
2017/04/27 Javascript
node.js中axios使用心得总结
2017/11/29 Javascript
使用FileReader API创建Vue文件阅读器组件
2018/04/03 Javascript
Vue实现自定义下拉菜单功能
2018/07/16 Javascript
通过js给网页加上水印背景实例
2019/06/17 Javascript
用python与文件进行交互的方法
2018/03/01 Python
Python 确定多项式拟合/回归的阶数实例
2018/12/29 Python
对Python3之进程池与回调函数的实例详解
2019/01/22 Python
Python编译为二进制so可执行文件实例
2019/12/23 Python
python剪切视频与合并视频的实现
2020/03/03 Python
keras打印loss对权重的导数方式
2020/06/10 Python
Django数据库迁移常见使用方法
2020/11/12 Python
详解CSS3:overflow属性
2020/11/17 HTML / CSS
精致的手工皮鞋:Shoe Embassy
2019/11/08 全球购物
你经历的项目中的SCM配置项主要有哪些?什么是配置项?
2013/11/04 面试题
客户表扬信范文
2014/01/10 职场文书
2014年平安创建工作总结
2014/11/24 职场文书
小学音乐课教学反思
2016/02/18 职场文书
Nginx反爬虫策略,防止UA抓取网站
2021/03/31 Servers
win10截图快捷键win+shift+s没有反应无法截图怎么解决?
2022/08/14 数码科技