python调用百度语音识别实现大音频文件语音识别功能


Posted in Python onAugust 30, 2018

本文为大家分享了python实现大音频文件语音识别功能的具体代码,供大家参考,具体内容如下

实现思路:先用ffmpeg将其他非wav格式的音频转换为wav格式,并转换音频的声道(百度支持声道为1),采样率(值为8000),格式转换完成后,再用ffmpeg将音频切成百度。

支持的时长(30秒和60秒2种,本程序用的是30秒)。

# coding: utf-8
import json
import time
import base64
from inc import rtysdb
import urllib2
import requests
import os
import uuid
from inc import db_config
 
 
class BaiduRest:
  def __init__(self, cu_id, api_key, api_secert):
    self.token_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s"
    self.getvoice_url = "http://tsn.baidu.com/text2audio?tex=%s&lan=zh&cuid=%s&ctp=1&tok=%s"
    self.upvoice_url = 'http://vop.baidu.com/server_api'
 
    self.cu_id = cu_id
    self.get_token(api_key, api_secert)
    return
 
  def get_token(self, api_key, api_secert):
    token_url = self.token_url % (api_key, api_secert)
    r_str = urllib2.urlopen(token_url).read()
    token_data = json.loads(r_str)
    self.token_str = token_data['access_token']
    return True
 
  # 语音合成
  def text2audio(self, text, filename):
    get_url = self.getvoice_url % (urllib2.quote(text), self.cu_id, self.token_str)
    voice_data = urllib2.urlopen(get_url).read()
    voice_fp = open(filename, 'wb+')
    voice_fp.write(voice_data)
    voice_fp.close()
    return True
 
  ##语音识别
  def audio2text(self, filename):
    data = {}
    data['format'] = 'wav'
    data['rate'] = 8000
    data['channel'] = 1
    data['cuid'] = self.cu_id
    data['token'] = self.token_str
 
    wav_fp = open(filename, 'rb')
    voice_data = wav_fp.read()
    data['len'] = len(voice_data)
    # data['speech'] = base64.b64encode(voice_data).decode('utf-8')
    data['speech'] = base64.b64encode(voice_data).replace('\n', '')
    # post_data = json.dumps(data)
    result = requests.post(self.upvoice_url, json=data, headers={'Content-Type': 'application/json'})
    data_result = result.json()
    if(data_result['err_msg'] == 'success.'):
      return data_result['result'][0]
    else:
      return False
 
 
 
def test_voice(voice_file):
  api_key = "vossGHIgEETS6IMRxBDeahv8"
  api_secert = "3c1fe6a6312f41fa21fa2c394dad5510"
  bdr = BaiduRest("0-57-7B-9F-1F-A1", api_key, api_secert)
 
  # 生成
  #start = time.time()
  #bdr.text2audio("你好啊", "out.wav")
  #using = time.time() - start
  #print using
 
  # 识别
  #start = time.time()
  result = bdr.audio2text(voice_file)
  # result = bdr.audio2text("weather.pcm")
  #using = time.time() - start
  return result
 
def get_master_audio(check_status='cut_status'):
  if check_status == 'cut_status':
    sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE status=0"
  elif check_status == 'finished_status':
    sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE finished_status=0"
  else:
    return False
  data = rtysdb.select_data(sql,'more')
  if data:
    return data
  else:
    return False
 
 
def go_recognize(master_id):
  section_path = db_config.SYS_PATH
  sql = "SELECT id,rid,url,status FROM ocenter_section WHERE rid=%d AND status=0 order by id asc limit 10" % (master_id)
  #print sql
  record = rtysdb.select_data(sql,'more')
  #print record
  if not record:
    return False
  for rec in record:
    #print section_path+'/'+rec[1]
    voice_file = section_path+'/'+rec[2]
    if not os.path.exists(voice_file):
      continue
    result = test_voice(voice_file)
    print result
    exit(0)
    if result:
      #rtysdb.update_by_pk('ocenter_section',rec[0],{'content':result,'status':1})
      sql = "update ocenter_section set content='%s', status='%d' where id=%d" % (result,1,rec[0])      #print sql
      rtysdb.do_exec_sql(sql)
      parent_content = rtysdb.select_data("SELECT id,content FROM ocenter_recognition WHERE id=%d" % (rec[1]))
      #print parent_content
      if parent_content:
        new_content = parent_content[1]+result
        update_content_sql = "update ocenter_recognition set content='%s' where id=%d" % (new_content,rec[1])
        rtysdb.do_exec_sql(update_content_sql)
    else:
      rtysdb.do_exec_sql("update ocenter_section set status='%d' where id=%d" % (result,1,rec[0]))
    time.sleep(5)
  else:
    rtysdb.do_exec_sql("UPDATE ocenter_recognition SET finished_status=1 WHERE id=%d" % (master_id))
#对百度语音识别不了的音频文件进行转换
def ffmpeg_convert():
  section_path = db_config.SYS_PATH
  #print section_path
  used_audio = get_master_audio('cut_status')
  #print used_audio
  if used_audio:
    for audio in used_audio:
      audio_path = section_path+'/'+audio[1]
      new_audio = uuid.uuid1()
      command_line = "ffmpeg -i "+audio_path +" -ar 8000 -ac 1 -f wav "+section_path+"/Uploads/Convert/convert_" + str(new_audio) +".wav";
      #print command_line
      os.popen(command_line)
      if os.path.exists(section_path+"/Uploads/Convert/convert_" + str(new_audio) +".wav"):
        convert_name = "Uploads/Convert/convert_" + str(new_audio) +".wav"
        ffmpeg_cut(convert_name,audio[3],audio[0])
        sql = "UPDATE ocenter_recognition SET status=1,convert_name='%s' where id=%d" % (convert_name,audio[0])
        rtysdb.do_exec_sql(sql)
#将大音频文件切成碎片
def ffmpeg_cut(convert_name,sharps,master_id):
  section_path = db_config.SYS_PATH
  if sharps>0:
    for i in range(0,sharps):
      timeArray = time.localtime(i*30)
      h = time.strftime("%H", timeArray)
      h = int(h) - 8
      h = "0" + str(h)
      ms = time.strftime("%M:%S",timeArray)
      start_time = h+':'+str(ms)
      cut_name = section_path+'/'+convert_name
      db_store_name = "Uploads/Section/"+str(uuid.uuid1())+'-'+str(i+1)+".wav"
      section_name = section_path+"/"+db_store_name
      command_line = "ffmpeg.exe -i "+cut_name+" -vn -acodec copy -ss "+start_time+" -t 00:00:30 "+section_name
      #print command_line
      os.popen(command_line)
      data = {}
      data['rid'] = master_id
      data['url'] = db_store_name
      data['create_time'] = int(time.time())
      data['status'] = 0
      rtysdb.insert_one('ocenter_section',data)
 
if __name__ == "__main__":
  ffmpeg_convert()
  audio = get_master_audio('finished_status')
  if audio:
     for ad in audio:
      go_recognize(ad[0])

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持三水点靠木。

Python 相关文章推荐
利用Opencv中Houghline方法实现直线检测
Feb 11 Python
Python简单计算文件MD5值的方法示例
Apr 11 Python
python人民币小写转大写辅助工具
Jun 20 Python
mac安装pytorch及系统的numpy更新方法
Jul 26 Python
python 内置模块详解
Jan 01 Python
python中的协程深入理解
Jun 10 Python
如何在python开发工具PyCharm中搭建QtPy环境(教程详解)
Feb 04 Python
Python集合操作方法详解
Feb 09 Python
python deque模块简单使用代码实例
Mar 12 Python
简单的Python人脸识别系统
Jul 14 Python
Matplotlib 绘制饼图解决文字重叠的方法
Jul 24 Python
pytorch通过训练结果的复现设置随机种子
Jun 01 Python
python的中异常处理机制
Aug 30 #Python
python调用百度REST API实现语音识别
Aug 30 #Python
python调用百度语音REST API
Aug 30 #Python
python调用百度语音识别api
Aug 30 #Python
python实现ID3决策树算法
Aug 29 #Python
python实现C4.5决策树算法
Aug 29 #Python
python机器学习之KNN分类算法
Aug 29 #Python
You might like
PHP实现给定一列字符,生成指定长度的所有可能组合示例
2019/06/22 PHP
JavaScript 学习笔记(十六) js事件
2010/02/01 Javascript
简单的jquery拖拽排序效果实现代码
2011/09/20 Javascript
jquery 插件学习(三)
2012/08/06 Javascript
一个简单的网站访问JS计数器 刷新1次加1次访问
2012/09/20 Javascript
JS 各种网页尺寸判断实例方法
2013/04/18 Javascript
javascript获取网页宽高方法汇总
2015/07/19 Javascript
JS+CSS实现电子商务网站导航模板效果代码
2015/09/10 Javascript
基于jQuery仿淘宝产品图片放大镜特效
2020/10/19 Javascript
javascript实现粘贴qq截图功能(clipboardData)
2016/05/29 Javascript
js仿百度切换皮肤功能(html+css)
2016/07/10 Javascript
微信小程序 开发之全局配置
2017/05/05 Javascript
关于TypeScript中import JSON的正确姿势详解
2017/07/25 Javascript
React-native桥接Android原生开发详解
2018/01/17 Javascript
详解如何实现一个简单的 vuex
2018/02/10 Javascript
使用JavaScript破解web
2018/09/28 Javascript
JavaScript 继承 封装 多态实现及原理详解
2019/07/29 Javascript
vue中watch和computed的区别与使用方法
2020/08/23 Javascript
Python中的rfind()方法使用详解
2015/05/19 Python
详解如何利用Cython为Python代码加速
2018/01/27 Python
python定时复制远程文件夹中所有文件
2019/04/30 Python
Python 微信爬虫完整实例【单线程与多线程】
2019/07/06 Python
Python Matplotlib 基于networkx画关系网络图
2019/07/10 Python
python实现获取单向链表倒数第k个结点的值示例
2019/10/24 Python
python实现局域网内实时通信代码
2019/12/22 Python
快速解决Django关闭Debug模式无法加载media图片与static静态文件
2020/04/07 Python
python利用文件时间批量重命名照片和视频
2021/02/09 Python
客户经理竞聘演讲稿
2014/05/15 职场文书
学校交通安全责任书
2014/08/25 职场文书
优秀班集体事迹材料
2014/12/25 职场文书
餐饮服务员岗位职责
2015/02/09 职场文书
2015年度员工自我评价范文
2015/03/11 职场文书
2015年效能监察工作总结
2015/04/23 职场文书
创业计划书之孕婴生活馆
2019/11/11 职场文书
HTML基础-标签分类(闭合标签,空标签,块级元素,行内元素,行级块元素,可替换元素)
2021/03/31 HTML / CSS
Redis主从配置和底层实现原理解析(实战记录)
2021/06/30 Redis