python调用百度语音识别实现大音频文件语音识别功能


Posted in Python onAugust 30, 2018

本文为大家分享了python实现大音频文件语音识别功能的具体代码,供大家参考,具体内容如下

实现思路:先用ffmpeg将其他非wav格式的音频转换为wav格式,并转换音频的声道(百度支持声道为1),采样率(值为8000),格式转换完成后,再用ffmpeg将音频切成百度。

支持的时长(30秒和60秒2种,本程序用的是30秒)。

# coding: utf-8
import json
import time
import base64
from inc import rtysdb
import urllib2
import requests
import os
import uuid
from inc import db_config
 
 
class BaiduRest:
  def __init__(self, cu_id, api_key, api_secert):
    self.token_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s"
    self.getvoice_url = "http://tsn.baidu.com/text2audio?tex=%s&lan=zh&cuid=%s&ctp=1&tok=%s"
    self.upvoice_url = 'http://vop.baidu.com/server_api'
 
    self.cu_id = cu_id
    self.get_token(api_key, api_secert)
    return
 
  def get_token(self, api_key, api_secert):
    token_url = self.token_url % (api_key, api_secert)
    r_str = urllib2.urlopen(token_url).read()
    token_data = json.loads(r_str)
    self.token_str = token_data['access_token']
    return True
 
  # 语音合成
  def text2audio(self, text, filename):
    get_url = self.getvoice_url % (urllib2.quote(text), self.cu_id, self.token_str)
    voice_data = urllib2.urlopen(get_url).read()
    voice_fp = open(filename, 'wb+')
    voice_fp.write(voice_data)
    voice_fp.close()
    return True
 
  ##语音识别
  def audio2text(self, filename):
    data = {}
    data['format'] = 'wav'
    data['rate'] = 8000
    data['channel'] = 1
    data['cuid'] = self.cu_id
    data['token'] = self.token_str
 
    wav_fp = open(filename, 'rb')
    voice_data = wav_fp.read()
    data['len'] = len(voice_data)
    # data['speech'] = base64.b64encode(voice_data).decode('utf-8')
    data['speech'] = base64.b64encode(voice_data).replace('\n', '')
    # post_data = json.dumps(data)
    result = requests.post(self.upvoice_url, json=data, headers={'Content-Type': 'application/json'})
    data_result = result.json()
    if(data_result['err_msg'] == 'success.'):
      return data_result['result'][0]
    else:
      return False
 
 
 
def test_voice(voice_file):
  api_key = "vossGHIgEETS6IMRxBDeahv8"
  api_secert = "3c1fe6a6312f41fa21fa2c394dad5510"
  bdr = BaiduRest("0-57-7B-9F-1F-A1", api_key, api_secert)
 
  # 生成
  #start = time.time()
  #bdr.text2audio("你好啊", "out.wav")
  #using = time.time() - start
  #print using
 
  # 识别
  #start = time.time()
  result = bdr.audio2text(voice_file)
  # result = bdr.audio2text("weather.pcm")
  #using = time.time() - start
  return result
 
def get_master_audio(check_status='cut_status'):
  if check_status == 'cut_status':
    sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE status=0"
  elif check_status == 'finished_status':
    sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE finished_status=0"
  else:
    return False
  data = rtysdb.select_data(sql,'more')
  if data:
    return data
  else:
    return False
 
 
def go_recognize(master_id):
  section_path = db_config.SYS_PATH
  sql = "SELECT id,rid,url,status FROM ocenter_section WHERE rid=%d AND status=0 order by id asc limit 10" % (master_id)
  #print sql
  record = rtysdb.select_data(sql,'more')
  #print record
  if not record:
    return False
  for rec in record:
    #print section_path+'/'+rec[1]
    voice_file = section_path+'/'+rec[2]
    if not os.path.exists(voice_file):
      continue
    result = test_voice(voice_file)
    print result
    exit(0)
    if result:
      #rtysdb.update_by_pk('ocenter_section',rec[0],{'content':result,'status':1})
      sql = "update ocenter_section set content='%s', status='%d' where id=%d" % (result,1,rec[0])      #print sql
      rtysdb.do_exec_sql(sql)
      parent_content = rtysdb.select_data("SELECT id,content FROM ocenter_recognition WHERE id=%d" % (rec[1]))
      #print parent_content
      if parent_content:
        new_content = parent_content[1]+result
        update_content_sql = "update ocenter_recognition set content='%s' where id=%d" % (new_content,rec[1])
        rtysdb.do_exec_sql(update_content_sql)
    else:
      rtysdb.do_exec_sql("update ocenter_section set status='%d' where id=%d" % (result,1,rec[0]))
    time.sleep(5)
  else:
    rtysdb.do_exec_sql("UPDATE ocenter_recognition SET finished_status=1 WHERE id=%d" % (master_id))
#对百度语音识别不了的音频文件进行转换
def ffmpeg_convert():
  section_path = db_config.SYS_PATH
  #print section_path
  used_audio = get_master_audio('cut_status')
  #print used_audio
  if used_audio:
    for audio in used_audio:
      audio_path = section_path+'/'+audio[1]
      new_audio = uuid.uuid1()
      command_line = "ffmpeg -i "+audio_path +" -ar 8000 -ac 1 -f wav "+section_path+"/Uploads/Convert/convert_" + str(new_audio) +".wav";
      #print command_line
      os.popen(command_line)
      if os.path.exists(section_path+"/Uploads/Convert/convert_" + str(new_audio) +".wav"):
        convert_name = "Uploads/Convert/convert_" + str(new_audio) +".wav"
        ffmpeg_cut(convert_name,audio[3],audio[0])
        sql = "UPDATE ocenter_recognition SET status=1,convert_name='%s' where id=%d" % (convert_name,audio[0])
        rtysdb.do_exec_sql(sql)
#将大音频文件切成碎片
def ffmpeg_cut(convert_name,sharps,master_id):
  section_path = db_config.SYS_PATH
  if sharps>0:
    for i in range(0,sharps):
      timeArray = time.localtime(i*30)
      h = time.strftime("%H", timeArray)
      h = int(h) - 8
      h = "0" + str(h)
      ms = time.strftime("%M:%S",timeArray)
      start_time = h+':'+str(ms)
      cut_name = section_path+'/'+convert_name
      db_store_name = "Uploads/Section/"+str(uuid.uuid1())+'-'+str(i+1)+".wav"
      section_name = section_path+"/"+db_store_name
      command_line = "ffmpeg.exe -i "+cut_name+" -vn -acodec copy -ss "+start_time+" -t 00:00:30 "+section_name
      #print command_line
      os.popen(command_line)
      data = {}
      data['rid'] = master_id
      data['url'] = db_store_name
      data['create_time'] = int(time.time())
      data['status'] = 0
      rtysdb.insert_one('ocenter_section',data)
 
if __name__ == "__main__":
  ffmpeg_convert()
  audio = get_master_audio('finished_status')
  if audio:
     for ad in audio:
      go_recognize(ad[0])

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持三水点靠木。

Python 相关文章推荐
flask + pymysql操作Mysql数据库的实例
Nov 13 Python
Django中的CBV和FBV示例介绍
Feb 25 Python
python利用高阶函数实现剪枝函数
Mar 20 Python
PyQt5每天必学之QSplitter实现窗口分隔
Apr 19 Python
详谈Pandas中iloc和loc以及ix的区别
Jun 08 Python
python numpy 按行归一化的实例
Jan 21 Python
Python实现的爬取百度文库功能示例
Feb 16 Python
django 消息框架 message使用详解
Jul 22 Python
Django如何将URL映射到视图
Jul 29 Python
jupyter notebook插入本地图片的实现
Apr 13 Python
使用darknet框架的imagenet数据分类预训练操作
Jul 07 Python
Python之qq自动发消息的示例代码
Feb 18 Python
python的中异常处理机制
Aug 30 #Python
python调用百度REST API实现语音识别
Aug 30 #Python
python调用百度语音REST API
Aug 30 #Python
python调用百度语音识别api
Aug 30 #Python
python实现ID3决策树算法
Aug 29 #Python
python实现C4.5决策树算法
Aug 29 #Python
python机器学习之KNN分类算法
Aug 29 #Python
You might like
php高级编程-函数-郑阿奇
2011/07/04 PHP
PDO::setAttribute讲解
2019/01/29 PHP
php实现的生成排列算法示例
2019/07/25 PHP
Jquery 高亮显示文本中重要的关键字
2009/12/24 Javascript
JavaScript 题型问答有答案参考
2010/02/17 Javascript
Package.js  现代化的JavaScript项目make工具
2012/05/23 Javascript
javascript上传图片前预览图片兼容大多数浏览器
2013/10/25 Javascript
jquery中页面Ajax方法$.load的功能使用介绍
2014/10/20 Javascript
node.js中的fs.writeSync方法使用说明
2014/12/15 Javascript
关于JS中的方法是否加括号的问题
2016/07/27 Javascript
JavaScript实现前端实时搜索功能
2020/03/26 Javascript
详解Vue.js 2.0 如何使用axios
2017/04/21 Javascript
移动端手指放大缩小插件与js源码
2017/05/22 Javascript
JavaScript实现简单的双色球(实例讲解)
2017/07/31 Javascript
js正则相关知识点专题
2018/05/10 Javascript
基于js Canvas实现二次贝塞尔曲线
2018/12/25 Javascript
webpack4之如何编写loader的方法步骤
2019/06/06 Javascript
微信小程序如何调用json数据接口并解析
2019/06/29 Javascript
layui 数据表格 根据值(1=业务,2=机构)显示中文名称示例
2019/10/26 Javascript
JS实现随机点名器
2020/04/12 Javascript
es6函数之尾调用优化实例分析
2020/04/25 Javascript
python常用web框架简单性能测试结果分享(包含django、flask、bottle、tornado)
2014/08/25 Python
Python制作钉钉加密/解密工具
2016/12/07 Python
python+opencv实现高斯平滑滤波
2020/07/21 Python
详解PyTorch中Tensor的高阶操作
2019/08/18 Python
python中sort和sorted排序的实例方法
2019/08/26 Python
浅谈PyTorch的可重复性问题(如何使实验结果可复现)
2020/02/20 Python
python集合能干吗
2020/07/19 Python
Pandas数据分析的一些常用小技巧
2021/02/07 Python
纯CSS3打造属于自己的“小黄人”
2016/03/14 HTML / CSS
Melissa香港官网:MDreams
2016/07/01 全球购物
运动会闭幕式解说词
2014/02/21 职场文书
心得体会的写法
2014/09/05 职场文书
2015年机关作风和效能建设工作总结
2015/07/23 职场文书
python删除csv文件的行列
2021/04/06 Python
CSS中妙用 drop-shadow 实现线条光影效果
2021/11/11 HTML / CSS