python调用百度语音识别实现大音频文件语音识别功能


Posted in Python onAugust 30, 2018

本文为大家分享了python实现大音频文件语音识别功能的具体代码,供大家参考,具体内容如下

实现思路:先用ffmpeg将其他非wav格式的音频转换为wav格式,并转换音频的声道(百度支持声道为1),采样率(值为8000),格式转换完成后,再用ffmpeg将音频切成百度。

支持的时长(30秒和60秒2种,本程序用的是30秒)。

# coding: utf-8
import json
import time
import base64
from inc import rtysdb
import urllib2
import requests
import os
import uuid
from inc import db_config
 
 
class BaiduRest:
  def __init__(self, cu_id, api_key, api_secert):
    self.token_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s"
    self.getvoice_url = "http://tsn.baidu.com/text2audio?tex=%s&lan=zh&cuid=%s&ctp=1&tok=%s"
    self.upvoice_url = 'http://vop.baidu.com/server_api'
 
    self.cu_id = cu_id
    self.get_token(api_key, api_secert)
    return
 
  def get_token(self, api_key, api_secert):
    token_url = self.token_url % (api_key, api_secert)
    r_str = urllib2.urlopen(token_url).read()
    token_data = json.loads(r_str)
    self.token_str = token_data['access_token']
    return True
 
  # 语音合成
  def text2audio(self, text, filename):
    get_url = self.getvoice_url % (urllib2.quote(text), self.cu_id, self.token_str)
    voice_data = urllib2.urlopen(get_url).read()
    voice_fp = open(filename, 'wb+')
    voice_fp.write(voice_data)
    voice_fp.close()
    return True
 
  ##语音识别
  def audio2text(self, filename):
    data = {}
    data['format'] = 'wav'
    data['rate'] = 8000
    data['channel'] = 1
    data['cuid'] = self.cu_id
    data['token'] = self.token_str
 
    wav_fp = open(filename, 'rb')
    voice_data = wav_fp.read()
    data['len'] = len(voice_data)
    # data['speech'] = base64.b64encode(voice_data).decode('utf-8')
    data['speech'] = base64.b64encode(voice_data).replace('\n', '')
    # post_data = json.dumps(data)
    result = requests.post(self.upvoice_url, json=data, headers={'Content-Type': 'application/json'})
    data_result = result.json()
    if(data_result['err_msg'] == 'success.'):
      return data_result['result'][0]
    else:
      return False
 
 
 
def test_voice(voice_file):
  api_key = "vossGHIgEETS6IMRxBDeahv8"
  api_secert = "3c1fe6a6312f41fa21fa2c394dad5510"
  bdr = BaiduRest("0-57-7B-9F-1F-A1", api_key, api_secert)
 
  # 生成
  #start = time.time()
  #bdr.text2audio("你好啊", "out.wav")
  #using = time.time() - start
  #print using
 
  # 识别
  #start = time.time()
  result = bdr.audio2text(voice_file)
  # result = bdr.audio2text("weather.pcm")
  #using = time.time() - start
  return result
 
def get_master_audio(check_status='cut_status'):
  if check_status == 'cut_status':
    sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE status=0"
  elif check_status == 'finished_status':
    sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE finished_status=0"
  else:
    return False
  data = rtysdb.select_data(sql,'more')
  if data:
    return data
  else:
    return False
 
 
def go_recognize(master_id):
  section_path = db_config.SYS_PATH
  sql = "SELECT id,rid,url,status FROM ocenter_section WHERE rid=%d AND status=0 order by id asc limit 10" % (master_id)
  #print sql
  record = rtysdb.select_data(sql,'more')
  #print record
  if not record:
    return False
  for rec in record:
    #print section_path+'/'+rec[1]
    voice_file = section_path+'/'+rec[2]
    if not os.path.exists(voice_file):
      continue
    result = test_voice(voice_file)
    print result
    exit(0)
    if result:
      #rtysdb.update_by_pk('ocenter_section',rec[0],{'content':result,'status':1})
      sql = "update ocenter_section set content='%s', status='%d' where id=%d" % (result,1,rec[0])      #print sql
      rtysdb.do_exec_sql(sql)
      parent_content = rtysdb.select_data("SELECT id,content FROM ocenter_recognition WHERE id=%d" % (rec[1]))
      #print parent_content
      if parent_content:
        new_content = parent_content[1]+result
        update_content_sql = "update ocenter_recognition set content='%s' where id=%d" % (new_content,rec[1])
        rtysdb.do_exec_sql(update_content_sql)
    else:
      rtysdb.do_exec_sql("update ocenter_section set status='%d' where id=%d" % (result,1,rec[0]))
    time.sleep(5)
  else:
    rtysdb.do_exec_sql("UPDATE ocenter_recognition SET finished_status=1 WHERE id=%d" % (master_id))
#对百度语音识别不了的音频文件进行转换
def ffmpeg_convert():
  section_path = db_config.SYS_PATH
  #print section_path
  used_audio = get_master_audio('cut_status')
  #print used_audio
  if used_audio:
    for audio in used_audio:
      audio_path = section_path+'/'+audio[1]
      new_audio = uuid.uuid1()
      command_line = "ffmpeg -i "+audio_path +" -ar 8000 -ac 1 -f wav "+section_path+"/Uploads/Convert/convert_" + str(new_audio) +".wav";
      #print command_line
      os.popen(command_line)
      if os.path.exists(section_path+"/Uploads/Convert/convert_" + str(new_audio) +".wav"):
        convert_name = "Uploads/Convert/convert_" + str(new_audio) +".wav"
        ffmpeg_cut(convert_name,audio[3],audio[0])
        sql = "UPDATE ocenter_recognition SET status=1,convert_name='%s' where id=%d" % (convert_name,audio[0])
        rtysdb.do_exec_sql(sql)
#将大音频文件切成碎片
def ffmpeg_cut(convert_name,sharps,master_id):
  section_path = db_config.SYS_PATH
  if sharps>0:
    for i in range(0,sharps):
      timeArray = time.localtime(i*30)
      h = time.strftime("%H", timeArray)
      h = int(h) - 8
      h = "0" + str(h)
      ms = time.strftime("%M:%S",timeArray)
      start_time = h+':'+str(ms)
      cut_name = section_path+'/'+convert_name
      db_store_name = "Uploads/Section/"+str(uuid.uuid1())+'-'+str(i+1)+".wav"
      section_name = section_path+"/"+db_store_name
      command_line = "ffmpeg.exe -i "+cut_name+" -vn -acodec copy -ss "+start_time+" -t 00:00:30 "+section_name
      #print command_line
      os.popen(command_line)
      data = {}
      data['rid'] = master_id
      data['url'] = db_store_name
      data['create_time'] = int(time.time())
      data['status'] = 0
      rtysdb.insert_one('ocenter_section',data)
 
if __name__ == "__main__":
  ffmpeg_convert()
  audio = get_master_audio('finished_status')
  if audio:
     for ad in audio:
      go_recognize(ad[0])

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持三水点靠木。

Python 相关文章推荐
python中 ? : 三元表达式的使用介绍
Oct 09 Python
Python3实现从文件中读取指定行的方法
May 22 Python
Python实现的计数排序算法示例
Nov 29 Python
python 分离文件名和路径以及分离文件名和后缀的方法
Oct 21 Python
pycharm恢复默认设置或者是替换pycharm的解释器实例
Oct 29 Python
Python 实现还原已撤回的微信消息
Jun 18 Python
numpy数组广播的机制
Jul 12 Python
python按键按住不放持续响应的实例代码
Jul 17 Python
Python 使用 docopt 解析json参数文件过程讲解
Aug 13 Python
python 创建一维的0向量实例
Dec 02 Python
解析Python 偏函数用法全方位实现
Jun 26 Python
python中re模块知识点总结
Jan 17 Python
python的中异常处理机制
Aug 30 #Python
python调用百度REST API实现语音识别
Aug 30 #Python
python调用百度语音REST API
Aug 30 #Python
python调用百度语音识别api
Aug 30 #Python
python实现ID3决策树算法
Aug 29 #Python
python实现C4.5决策树算法
Aug 29 #Python
python机器学习之KNN分类算法
Aug 29 #Python
You might like
让PHP支持页面回退的两种方法[转]
2007/02/14 PHP
php一行代码获取文件后缀名实例分析
2014/11/12 PHP
php内存缓存实现方法
2015/01/24 PHP
非常漂亮的JS代码经典广告
2007/10/21 Javascript
iframe 父窗口和子窗口相互的调用方法集锦
2010/12/15 Javascript
document.write()及其输出内容的样式、位置控制
2013/08/12 Javascript
JavaScript动态操作表格实例(添加,删除行,列及单元格)
2013/11/25 Javascript
JQuery中层次选择器用法实例详解
2015/05/18 Javascript
jQuery表格行上移下移和置顶的实现方法
2015/10/08 Javascript
小巧强大的jquery layer弹窗弹层插件
2015/12/06 Javascript
Move.js入门
2017/02/08 Javascript
JavaScript实现向select下拉框中添加和删除元素的方法
2017/03/07 Javascript
小程序自定义组件实现城市选择功能
2018/07/18 Javascript
vue在手机中通过本机IP地址访问webApp的方法
2018/08/15 Javascript
Three.JS实现三维场景
2018/12/30 Javascript
vue组件之间的数据传递方法详解
2019/04/19 Javascript
深入理解redux之compose的具体应用
2020/01/12 Javascript
从局部变量和全局变量开始全面解析Python中变量的作用域
2016/06/16 Python
Python连接DB2数据库
2016/08/27 Python
python2.6.6如何升级到python2.7.14
2018/04/08 Python
Python Threading 线程/互斥锁/死锁/GIL锁
2019/07/21 Python
Python完成哈夫曼树编码过程及原理详解
2019/07/29 Python
python scipy卷积运算的实现方法
2019/09/16 Python
Python 网络编程之TCP客户端/服务端功能示例【基于socket套接字】
2019/10/12 Python
基于python调用psutil模块过程解析
2019/12/20 Python
如何将tensorflow训练好的模型移植到Android (MNIST手写数字识别)
2020/04/22 Python
Spark处理数据排序问题如何避免OOM
2020/05/21 Python
Python使用Matlab命令过程解析
2020/06/04 Python
python能自学吗
2020/06/18 Python
jupyter notebook远程访问不了的问题解决方法
2021/01/11 Python
2019史上最全Database工程师题库
2015/12/06 面试题
《陈涉世家》教学反思
2014/04/12 职场文书
教师考核评语
2014/04/28 职场文书
大学生就业自我推荐信
2014/05/10 职场文书
队名及霸气口号大全
2015/12/25 职场文书
如何做好员工培训计划?
2019/07/09 职场文书