python调用百度语音识别实现大音频文件语音识别功能


Posted in Python onAugust 30, 2018

本文为大家分享了python实现大音频文件语音识别功能的具体代码,供大家参考,具体内容如下

实现思路:先用ffmpeg将其他非wav格式的音频转换为wav格式,并转换音频的声道(百度支持声道为1),采样率(值为8000),格式转换完成后,再用ffmpeg将音频切成百度。

支持的时长(30秒和60秒2种,本程序用的是30秒)。

# coding: utf-8
import json
import time
import base64
from inc import rtysdb
import urllib2
import requests
import os
import uuid
from inc import db_config
 
 
class BaiduRest:
  def __init__(self, cu_id, api_key, api_secert):
    self.token_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s"
    self.getvoice_url = "http://tsn.baidu.com/text2audio?tex=%s&lan=zh&cuid=%s&ctp=1&tok=%s"
    self.upvoice_url = 'http://vop.baidu.com/server_api'
 
    self.cu_id = cu_id
    self.get_token(api_key, api_secert)
    return
 
  def get_token(self, api_key, api_secert):
    token_url = self.token_url % (api_key, api_secert)
    r_str = urllib2.urlopen(token_url).read()
    token_data = json.loads(r_str)
    self.token_str = token_data['access_token']
    return True
 
  # 语音合成
  def text2audio(self, text, filename):
    get_url = self.getvoice_url % (urllib2.quote(text), self.cu_id, self.token_str)
    voice_data = urllib2.urlopen(get_url).read()
    voice_fp = open(filename, 'wb+')
    voice_fp.write(voice_data)
    voice_fp.close()
    return True
 
  ##语音识别
  def audio2text(self, filename):
    data = {}
    data['format'] = 'wav'
    data['rate'] = 8000
    data['channel'] = 1
    data['cuid'] = self.cu_id
    data['token'] = self.token_str
 
    wav_fp = open(filename, 'rb')
    voice_data = wav_fp.read()
    data['len'] = len(voice_data)
    # data['speech'] = base64.b64encode(voice_data).decode('utf-8')
    data['speech'] = base64.b64encode(voice_data).replace('\n', '')
    # post_data = json.dumps(data)
    result = requests.post(self.upvoice_url, json=data, headers={'Content-Type': 'application/json'})
    data_result = result.json()
    if(data_result['err_msg'] == 'success.'):
      return data_result['result'][0]
    else:
      return False
 
 
 
def test_voice(voice_file):
  api_key = "vossGHIgEETS6IMRxBDeahv8"
  api_secert = "3c1fe6a6312f41fa21fa2c394dad5510"
  bdr = BaiduRest("0-57-7B-9F-1F-A1", api_key, api_secert)
 
  # 生成
  #start = time.time()
  #bdr.text2audio("你好啊", "out.wav")
  #using = time.time() - start
  #print using
 
  # 识别
  #start = time.time()
  result = bdr.audio2text(voice_file)
  # result = bdr.audio2text("weather.pcm")
  #using = time.time() - start
  return result
 
def get_master_audio(check_status='cut_status'):
  if check_status == 'cut_status':
    sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE status=0"
  elif check_status == 'finished_status':
    sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE finished_status=0"
  else:
    return False
  data = rtysdb.select_data(sql,'more')
  if data:
    return data
  else:
    return False
 
 
def go_recognize(master_id):
  section_path = db_config.SYS_PATH
  sql = "SELECT id,rid,url,status FROM ocenter_section WHERE rid=%d AND status=0 order by id asc limit 10" % (master_id)
  #print sql
  record = rtysdb.select_data(sql,'more')
  #print record
  if not record:
    return False
  for rec in record:
    #print section_path+'/'+rec[1]
    voice_file = section_path+'/'+rec[2]
    if not os.path.exists(voice_file):
      continue
    result = test_voice(voice_file)
    print result
    exit(0)
    if result:
      #rtysdb.update_by_pk('ocenter_section',rec[0],{'content':result,'status':1})
      sql = "update ocenter_section set content='%s', status='%d' where id=%d" % (result,1,rec[0])      #print sql
      rtysdb.do_exec_sql(sql)
      parent_content = rtysdb.select_data("SELECT id,content FROM ocenter_recognition WHERE id=%d" % (rec[1]))
      #print parent_content
      if parent_content:
        new_content = parent_content[1]+result
        update_content_sql = "update ocenter_recognition set content='%s' where id=%d" % (new_content,rec[1])
        rtysdb.do_exec_sql(update_content_sql)
    else:
      rtysdb.do_exec_sql("update ocenter_section set status='%d' where id=%d" % (result,1,rec[0]))
    time.sleep(5)
  else:
    rtysdb.do_exec_sql("UPDATE ocenter_recognition SET finished_status=1 WHERE id=%d" % (master_id))
#对百度语音识别不了的音频文件进行转换
def ffmpeg_convert():
  section_path = db_config.SYS_PATH
  #print section_path
  used_audio = get_master_audio('cut_status')
  #print used_audio
  if used_audio:
    for audio in used_audio:
      audio_path = section_path+'/'+audio[1]
      new_audio = uuid.uuid1()
      command_line = "ffmpeg -i "+audio_path +" -ar 8000 -ac 1 -f wav "+section_path+"/Uploads/Convert/convert_" + str(new_audio) +".wav";
      #print command_line
      os.popen(command_line)
      if os.path.exists(section_path+"/Uploads/Convert/convert_" + str(new_audio) +".wav"):
        convert_name = "Uploads/Convert/convert_" + str(new_audio) +".wav"
        ffmpeg_cut(convert_name,audio[3],audio[0])
        sql = "UPDATE ocenter_recognition SET status=1,convert_name='%s' where id=%d" % (convert_name,audio[0])
        rtysdb.do_exec_sql(sql)
#将大音频文件切成碎片
def ffmpeg_cut(convert_name,sharps,master_id):
  section_path = db_config.SYS_PATH
  if sharps>0:
    for i in range(0,sharps):
      timeArray = time.localtime(i*30)
      h = time.strftime("%H", timeArray)
      h = int(h) - 8
      h = "0" + str(h)
      ms = time.strftime("%M:%S",timeArray)
      start_time = h+':'+str(ms)
      cut_name = section_path+'/'+convert_name
      db_store_name = "Uploads/Section/"+str(uuid.uuid1())+'-'+str(i+1)+".wav"
      section_name = section_path+"/"+db_store_name
      command_line = "ffmpeg.exe -i "+cut_name+" -vn -acodec copy -ss "+start_time+" -t 00:00:30 "+section_name
      #print command_line
      os.popen(command_line)
      data = {}
      data['rid'] = master_id
      data['url'] = db_store_name
      data['create_time'] = int(time.time())
      data['status'] = 0
      rtysdb.insert_one('ocenter_section',data)
 
if __name__ == "__main__":
  ffmpeg_convert()
  audio = get_master_audio('finished_status')
  if audio:
     for ad in audio:
      go_recognize(ad[0])

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持三水点靠木。

Python 相关文章推荐
使用Python解析JSON数据的基本方法
Oct 15 Python
简单谈谈Python中的闭包
Nov 30 Python
django中的HTML控件及参数传递方法
Mar 20 Python
Python3正则匹配re.split,re.finditer及re.findall函数用法详解
Jun 11 Python
通过python的matplotlib包将Tensorflow数据进行可视化的方法
Jan 09 Python
Python使用修饰器进行异常日志记录操作示例
Mar 19 Python
在python3中实现更新界面
Feb 21 Python
使用python绘制cdf的多种实现方法
Feb 25 Python
Python json格式化打印实现过程解析
Jul 21 Python
用python对oracle进行简单性能测试
Dec 05 Python
Pytest之测试命名规则的使用
Apr 16 Python
Python如何用re模块实现简易tokenizer
May 02 Python
python的中异常处理机制
Aug 30 #Python
python调用百度REST API实现语音识别
Aug 30 #Python
python调用百度语音REST API
Aug 30 #Python
python调用百度语音识别api
Aug 30 #Python
python实现ID3决策树算法
Aug 29 #Python
python实现C4.5决策树算法
Aug 29 #Python
python机器学习之KNN分类算法
Aug 29 #Python
You might like
分享8个最佳的代码片段在线测试网站
2013/06/29 PHP
合并ThinkPHP配置文件以消除代码冗余的实现方法
2014/07/22 PHP
PHP 中 Orientation 属性判断上传图片是否需要旋转
2015/10/16 PHP
laravel中短信发送验证码的实现方法
2018/04/25 PHP
Laravel5.1 框架表单验证操作实例详解
2020/01/07 PHP
jquery 图片 上一张 下一张 链接效果(续篇)
2010/04/20 Javascript
JS实现self的resend
2010/07/22 Javascript
javascript权威指南 学习笔记之javascript数据类型
2011/09/24 Javascript
JS获取元素多层嵌套思路详解
2016/05/16 Javascript
json对象转为字符串,当做参数传递时加密解密的实现方法
2016/06/29 Javascript
微信小程序 两种为对象属性赋值的方式详解
2017/02/23 Javascript
使用MUI框架模拟手机端的下拉刷新和上拉加载功能
2017/09/04 Javascript
vue中axios实现数据交互与跨域问题
2019/05/12 Javascript
详解vue微信网页授权最终解决方案
2019/06/16 Javascript
laravel实现中文和英语互相切换的例子
2019/09/30 Javascript
js实现鼠标拖拽div左右滑动
2020/01/15 Javascript
Vue 使用typescript如何优雅的调用swagger API
2020/09/01 Javascript
浅谈js数组splice删除某个元素爬坑
2020/10/14 Javascript
[48:21]林俊杰圣堂刺客超神杀戮秀
2014/10/29 DOTA
python中base64加密解密方法实例分析
2015/05/16 Python
Python 字典与字符串的互转实例
2017/01/13 Python
Python使用matplotlib绘制正弦和余弦曲线的方法示例
2018/01/06 Python
django js实现部分页面刷新的示例代码
2018/05/28 Python
Python配置虚拟环境图文步骤
2019/05/20 Python
Python基于callable函数检测对象是否可被调用
2020/10/16 Python
python源文件的字符编码知识点详解
2021/03/04 Python
纯CSS3代码实现switch滑动开关按钮效果
2016/08/30 HTML / CSS
BudgetAir印度:预订航班、酒店和汽车租赁
2019/07/07 全球购物
XMLHttpRequest对象在IE和Firefox中创建方式有没有不同
2016/03/23 面试题
设计总监岗位职责
2013/12/07 职场文书
大学校园毕业自我鉴定
2014/01/15 职场文书
小学生竞选班干部演讲稿(5篇)
2014/09/12 职场文书
光棍节联谊晚会活动策划书
2014/10/10 职场文书
导游带团欢迎词
2015/09/30 职场文书
教师正风肃纪心得体会
2016/01/15 职场文书
Python访问Redis的详细操作
2021/06/26 Python