每天迁移MySQL历史数据到历史库Python脚本


Posted in Python onApril 13, 2018

本文实例为大家分享了Python每天迁移MySQL历史数据到历史库的具体代码,供大家参考,具体内容如下

#!/usr/bin/env python 
# coding:utf-8 
__author__ = 'John' 
 

import MySQLdb 
import sys 
import datetime 
import time 
 
class ClassMigrate(object): 
  def _get_argv(self): 
    self.usage = """ 
      usage(): 
      python daily_migration.py --source=192.168.1.4:3306/db_name:tab_name/proxy/password \\ 
                    --dest=192.168.1.150:13301/db_name_archive:tab_name_201601/proxy/password \\ 
                    --delete_strategy=delete --primary_key=auto_id --date_col=ut --time_interval=180 
      """ 
    if len(sys.argv) == 1: 
      print self.usage 
      sys.exit(1) 
    elif sys.argv[1] == '--help' or sys.argv[1] == '-h': 
        print self.usage 
        sys.exit() 
    elif len(sys.argv) > 2: 
      for i in sys.argv[1:]: 
        _argv = i.split('=') 
        if _argv[0] == '--source': 
          _list = _argv[1].split('/') 
          self.source_host = _list[0].split(':')[0] 
          self.source_port = int(_list[0].split(':')[1]) 
          self.source_db = _list[1].split(':')[0] 
          self.source_tab = _list[1].split(':')[1] 
          self.source_user = _list[2] 
          self.source_password = _list[3] 
        elif _argv[0] == '--dest': 
          _list = _argv[1].split('/') 
          self.dest_host = _list[0].split(':')[0] 
          self.dest_port = int(_list[0].split(':')[1]) 
          self.dest_db = _list[1].split(':')[0] 
          self.dest_tab = _list[1].split(':')[1] 
          self.dest_user = _list[2] 
          self.dest_password = _list[3] 
        elif _argv[0] == '--delete_strategy': 
          self.deleteStrategy = _argv[1] 
          if self.deleteStrategy not in ('delete', 'drop'): 
            print (self.usage) 
            sys.exit(1) 
        elif _argv[0] == '--primary_key': 
          self.pk = _argv[1] 
        elif _argv[0] == '--date_col': 
          self.date_col = _argv[1] 
        elif _argv[0] == '--time_interval': 
          self.interval = _argv[1] 
        else: 
          print (self.usage) 
          sys.exit(1) 
 
  def __init__(self): 
    self._get_argv() 
## -------------------------------------------------------------------- 
    self.sourcedb_conn_str = MySQLdb.connect(host=self.source_host, port=self.source_port, user=self.source_user, passwd=self.source_password, db=self.source_db, charset='utf8') 
    self.sourcedb_conn_str.autocommit(True) 
    self.destdb_conn_str = MySQLdb.connect(host=self.dest_host, port=self.dest_port, user=self.dest_user, passwd=self.dest_password, db=self.dest_db, charset='utf8') 
    self.destdb_conn_str.autocommit(True) 
## -------------------------------------------------------------------- 
    self.template_tab = self.source_tab + '_template' 
    self.step_size = 20000 
## -------------------------------------------------------------------- 
    self._migCompleteState = False 
    self._deleteCompleteState = False 
## -------------------------------------------------------------------- 
    self.source_cnt = '' 
    self.source_min_id = '' 
    self.source_max_id = '' 
    self.source_checksum = '' 
    self.dest_cn = '' 
## -------------------------------------------------------------------- 
    self.today = time.strftime("%Y-%m-%d") 
    # self.today = '2016-05-30 09:59:40' 

def sourcedb_query(self, sql, sql_type): 
    try: 
      cr = self.sourcedb_conn_str.cursor() 
      cr.execute(sql) 
      if sql_type == 'select': 
        return cr.fetchall() 
      elif sql_type == 'dml': 
        rows = self.sourcedb_conn_str.affected_rows() 
        return rows 
      else: 
        return True 
    except Exception, e: 
      print (str(e) + "<br>") 
      return False 
    finally: 
      cr.close() 

  def destdb_query(self, sql, sql_type, values=''): 
    try: 
      cr = self.destdb_conn_str.cursor() 
      if sql_type == 'select': 
        cr.execute(sql) 
        return cr.fetchall() 
      elif sql_type == 'insertmany': 
        cr.executemany(sql, values) 
        rows = self.destdb_conn_str.affected_rows() 
        return rows 
      else: 
        cr.execute(sql) 
        return True 
    except Exception, e: 
      print (str(e) + "<br>") 
      return False 
    finally: 
      cr.close() 
 
 def create_table_from_source(self): 
    '''''因为tab_name表的数据需要迁移到archive引擎表,所以不适合使用这种方式。 预留作其他用途。''' 
    try: 
      sql = "show create table %s;" % self.source_tab 
      create_str = self.sourcedb_query(sql, 'select')[0][1] 
      create_str = create_str.replace('CREATE TABLE', 'CREATE TABLE IF NOT EXISTS') 
      self.destdb_query(create_str, 'ddl') 
      return True 
    except Exception, e: 
      print (str(e) + "<br>") 
      return False 

  def create_table_from_template(self): 
    try: 
      sql = 'CREATE TABLE IF NOT EXISTS %s like %s;' % (self.dest_tab, self.template_tab) 
      state = self.destdb_query(sql, 'ddl') 
      if state: 
        return True 
      else: 
        return False 
    except Exception, e: 
      print (str(e + "<br>") + "<br>") 
      return False 

  def get_min_max(self): 
    """ 创建目标表、并获取源表需要迁移的总条数、最小id、最大id """ 
    try: 
      print ("\nStarting Migrate at -- %s <br>") % (datetime.datetime.now().__str__()) 
      sql = """select count(*),IFNULL(min(%s),-1),IFNULL(max(%s),-1) from %s where %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00') \ 
           and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """ \ 
            % (self.pk, self.pk, self.source_tab, self.date_col, self.today, self.interval, self.date_col, self.today, self.interval) 
      q = self.sourcedb_query(sql, 'select') 
      self.source_cnt = q[0][0] 
      self.source_min_id = q[0][1] 
      self.source_max_id = q[0][2] 
      self.source_checksum = str(self.source_cnt) + '_' + str(self.source_min_id) + '_' + str(self.source_max_id) 
      if self.source_cnt == 0 or self.source_min_id == -1 or self.source_max_id == -1: 
        print ("There is 0 record in source table been matched! <br>") 
        return False 
      else: 
        return True 
    except Exception, e: 
      print (str(e) + "<br>") 
      return False 
 
  def migrate_2_destdb(self): 
    try: 
      get_min_max_id = self.get_min_max() 
      if get_min_max_id: 
        k = self.source_min_id 
        desc_sql = "desc %s;" % self.source_tab 
        # self.filed = [] 
        cols = self.sourcedb_query(desc_sql, 'select') 
        # for j in cols: 
        #   self.filed.append(j[0]) 
        fileds = "%s," * len(cols) # 源表有多少个字段,就拼凑多少个%s,拼接到insert语句 
        fileds = fileds.rstrip(',') 
        while k <= self.source_max_id: 
          sql = """select * from %s where %s >= %d and %s< %d \ 
               and %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00') \ 
               and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """\ 
             % (self.source_tab, self.pk, k, self.pk, k+self.step_size, self.date_col, self.today, self.interval, self.date_col, self.today, self.interval) 
          print ("\n%s <br>") % sql 
          starttime = datetime.datetime.now() 
          results = self.sourcedb_query(sql, 'select') 
          insert_sql = "insert into " + self.dest_tab + " values (%s)" % fileds 
          rows = self.destdb_query(insert_sql, 'insertmany', results) 
          if rows == False: 
            print ("Insert failed!! <br>") 
          else: 
            print ("Inserted %s rows. <br>") % rows 
          endtime = datetime.datetime.now() 
          timeinterval = endtime - starttime 
          print("Elapsed :" + str(timeinterval.seconds) + '.' + str(timeinterval.microseconds) + " seconds <br>") 
          k += self.step_size 
        print ("\nInsert complete at -- %s <br>") % (datetime.datetime.now().__str__()) 
        return True 
      else: 
        return False 
    except Exception, e: 
      print (str(e) + "<br>") 
      return False 
  
  def verify_total_cnt(self): 
    try: 
      sql = """select count(*),IFNULL(min(%s),-1),IFNULL(max(%s),-1) from %s where %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00') \ 
           and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """ \ 
            % (self.pk, self.pk, self.dest_tab, self.date_col, self.today, self.interval, self.date_col, self.today, self.interval) 
      dest_result = self.destdb_query(sql, 'select') 
      self.dest_cnt = dest_result[0][0] 
      dest_checksum = str(self.dest_cnt) + '_' + str(dest_result[0][1]) + '_' + str(dest_result[0][2]) 
      print ("source_checksum: %s, dest_checksum: %s <br>") % (self.source_checksum, dest_checksum) 
      if self.source_cnt == dest_result[0][0] and dest_result[0][0] != 0 and self.source_checksum == dest_checksum: 
        self._migCompleteState = True 
        print ("Verify successfully !!<br>") 
      else: 
        print ("Verify failed !!<br>") 
        sys.exit(77) 
    except Exception, e: 
      print (str(e) + "<br>") 
  
  def drop_daily_partition(self): 
    try: 
      if self._migCompleteState: 
        sql = """explain partitions select * from %s where %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00') 
               and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """\ 
             % (self.source_tab, self.date_col, self.today, self.interval, self.date_col, self.today, self.interval) 
        partition_name = self.sourcedb_query(sql, 'select') 
        partition_name = partition_name[0][3] 
 
 
 
 
        sql = """select count(*),IFNULL(min(%s),-1),IFNULL(max(%s),-1) from %s partition (%s)""" \ 
            % (self.pk, self.pk, self.source_tab, partition_name) 
        q = self.sourcedb_query(sql, 'select') 
        source_cnt = q[0][0] 
        source_min_id = q[0][1] 
        source_max_id = q[0][2] 
        checksum = str(source_cnt) + '_' + str(source_min_id) + '_' + str(source_max_id) 
        if source_cnt == 0 or source_min_id == -1 or source_max_id == -1: 
          print ("There is 0 record in source PARTITION been matched! <br>") 
        else: 
          if checksum == self.source_checksum: 
            drop_par_sql = "alter table %s drop partition %s;" % (self.source_tab, partition_name) 
            droped = self.sourcedb_query(drop_par_sql, 'ddl') 
            if droped: 
              print (drop_par_sql + " <br>") 
              print ("\nDrop partition complete at -- %s <br>") % (datetime.datetime.now().__str__()) 
              self._deleteCompleteState = True 
            else: 
              print (drop_par_sql + " <br>") 
              print ("Drop partition failed.. <br>") 
          else: 
            print ("The partition %s checksum failed !! Drop failed !!") % partition_name 
            sys.exit(77) 
    except Exception, e: 
      print (str(e) + "<br>") 
 
  def delete_data(self): 
    try: 
      if self._migCompleteState: 
        k = self.source_min_id 
        while k <= self.source_max_id: 
          sql = """delete from %s where %s >= %d and %s< %d \ 
               and %s >= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 00:00:00') \ 
               and %s <= CONCAT(DATE_FORMAT(DATE_ADD('%s', INTERVAL -%s day),'%%Y-%%m-%%d'), ' 23:59:59') """ \ 
             % (self.source_tab, self.pk, k, self.pk, k+self.step_size, self.date_col, self.today, self.interval, self.date_col, self.today, self.interval) 
          print ("\n%s <br>") % sql 
          starttime = datetime.datetime.now() 
          rows = self.sourcedb_query(sql, 'dml') 
          if rows == False: 
            print ("Delete failed!! <br>") 
          else: 
            print ("Deleted %s rows. <br>") % rows 
          endtime = datetime.datetime.now() 
          timeinterval = endtime - starttime 
          print("Elapsed :" + str(timeinterval.seconds) + '.' + str(timeinterval.microseconds) + " seconds <br>") 
          time.sleep(1) 
          k += self.step_size 
        print ("\nDelete complete at -- %s <br>") % (datetime.datetime.now().__str__()) 
        self._deleteCompleteState = True 
    except Exception, e: 
      print (str(e) + "<br>") 
       
  def do(self): 
    tab_create = self.create_table_from_template() 
    if tab_create: 
      migration = self.migrate_2_destdb() 
      if migration: 
        self.verify_total_cnt() 
        if self._migCompleteState: 
          if self.deleteStrategy == 'drop': 
            self.drop_daily_partition() 
          else: 
            self.delete_data() 
          print ("\n<br>") 
          print ("====="*5 + '<br>') 
          print ("source_total_cnt: %s <br>") % self.source_cnt 
          print ("dest_total_cnt: %s <br>") % self.dest_cnt 
          print ("====="*5 + '<br>') 
          if self._deleteCompleteState: 
            print ("\nFinal result: Successfully !! <br>") 
            sys.exit(88) 
          else: 
            print ("\nFinal result: Failed !! <br>") 
            sys.exit(254) 
    else: 
      print ("Create table failed ! Exiting. . .") 
      sys.exit(255) 
  
f = ClassMigrate() 
f.do()

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持三水点靠木。

Python 相关文章推荐
python线程池的实现实例
Nov 18 Python
python获取本机mac地址和ip地址的方法
Apr 29 Python
Python中splitlines()方法的使用简介
May 20 Python
深入解析Python中的__builtins__内建对象
Jun 21 Python
Python 删除连续出现的指定字符的实例
Jun 29 Python
python3.5绘制随机漫步图
Aug 27 Python
详解配置Django的Celery异步之路踩坑
Nov 25 Python
Python超越函数积分运算以及绘图实现代码
Nov 20 Python
使用OpenCV去除面积较小的连通域
Jul 05 Python
如何利用Python matplotlib绘制雷达图
Dec 21 Python
Python实现网络聊天室的示例代码(支持多人聊天与私聊)
Jan 27 Python
只需要这一行代码就能让python计算速度提高十倍
May 24 Python
python实现数据库跨服务器迁移
Apr 12 #Python
解决python3爬虫无法显示中文的问题
Apr 12 #Python
python读取中文txt文本的方法
Apr 12 #Python
基于python 处理中文路径的终极解决方法
Apr 12 #Python
解决Python2.7读写文件中的中文乱码问题
Apr 12 #Python
python 实现对文件夹内的文件排序编号
Apr 12 #Python
pandas数值计算与排序方法
Apr 12 #Python
You might like
DC漫画《蝙蝠侠和猫女》图透 猫女怀孕老爷当爹
2020/04/09 欧美动漫
PHP网页游戏学习之Xnova(ogame)源码解读(一)
2014/06/23 PHP
详解PHP防止直接访问.php 文件的实现方法
2017/07/28 PHP
彪哥1.1(智能表格)提供下载
2006/09/07 Javascript
JAVASCRIPT  THIS详解 面向对象
2009/03/25 Javascript
Javascript中的var_dump函数实现代码
2009/09/07 Javascript
JavaScript 浮点数运算 精度问题
2009/10/06 Javascript
Javascript isArray 数组类型检测函数
2009/10/08 Javascript
原生JS实现响应式瀑布流布局
2015/04/02 Javascript
jquery控制显示服务器生成的图片流
2015/08/04 Javascript
JavaScript驾驭网页-CSS与DOM
2016/03/24 Javascript
jQuery弹出层后禁用底部滚动条(移动端关闭回到原位置)
2016/08/29 Javascript
jquery与ajax获取特殊字符实例详解
2017/01/08 Javascript
vue 移动端适配方案详解
2018/11/15 Javascript
Python中列表(list)操作方法汇总
2014/08/18 Python
python网络编程实例简析
2014/09/26 Python
python实现文本去重且不打乱原本顺序
2016/01/26 Python
Python实现单词翻译功能
2017/06/06 Python
Python三种遍历文件目录的方法实例代码
2018/01/19 Python
python 不以科学计数法输出的方法
2018/07/16 Python
python如何基于redis实现ip代理池
2020/01/17 Python
numpy库ndarray多维数组的维度变换方法(reshape、resize、swapaxes、flatten)
2020/04/28 Python
Python3实现个位数字和十位数字对调, 其乘积不变
2020/05/03 Python
python中not、and和or的优先级与详细用法介绍
2020/11/03 Python
H&M美国官网:欧洲最大的服饰零售商
2016/09/07 全球购物
床上用品全球在线购物:BeddingInn
2016/12/18 全球购物
Booking.com亚太地区:Booking.com APAC
2020/02/07 全球购物
RIP版本1跟版本2的区别
2013/12/30 面试题
描述RIP和OSPF区别以及特点
2015/01/17 面试题
移动通信行业实习自我鉴定
2013/09/28 职场文书
创建绿色社区汇报材料
2014/08/22 职场文书
乱丢垃圾袋检讨书
2014/10/08 职场文书
干部作风建设个人剖析材料
2014/10/11 职场文书
学习三严三实心得体会
2014/10/13 职场文书
2015年煤矿安全工作总结
2015/05/23 职场文书
WIN10使用IIS部署ftp服务器详细教程
2022/08/05 Servers