对python 操作solr索引数据的实例详解


Posted in Python onDecember 07, 2018

测试代码1:

def test(self):
  data = {"add": {"doc": {"id": "100001", "*字段名*": u"我是一个大好人"}}}
  params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"Content-Type": "application/json"}
  r = requests.post(url, json=data, params=params, headers=headers)
  print r.text


 def Index_data(self):
  solr = pysolr.Solr('http://127.0.0.1:8983/solr/mycore/', timeout=10)

  # How you'd index data.
  result = solr.add([
   {
    "id": "doc_1",
    "title": "A test document",
   },
   {
    "id": "doc_2",
    "title": "The Banana: Tasty or Dangerous?",
   },
  ])
  print result

测试代码2:

实际数据:

对python 操作solr索引数据的实例详解

def Index_Data_FromCSV(self, csvfile):
  '''
   从CSV文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = CSVOP.ReadCSV(csvfile)
  index = 0
  doc = {}
  params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"Content-Type": "application/json"}
  for item in list:
   if index > 0: # 第一行是标题
    try:
     doc['title'] = item[0].decode('GB2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('GB2312')
     doc['keyword'] = item[4].decode('GB2312')
     data = {"add": {"doc": doc}}
     r = requests.post(url, json=data, params=params, headers=headers)
     print r.text
    except Exception,e:
     print e.message

   print index
   index += 1

#pysolr客户端代码
 def pysolr_Index_Data_FromCSV(self, csvfile,url='http://127.0.0.1:8983/solr/mycore/'):
  '''
   从CSV文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = CSVOP.ReadCSV(csvfile)
  index = 0
  listdocs = []
  for item in list:
   if index > 0: # 第一行是标题
    doc = {}
    try:
     doc['title'] = item[0].decode('GB2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('GB2312')
     doc['keyword'] = item[4].decode('GB2312')
     listdocs.append(doc)
    except Exception,e:
     print e.message
   index += 1
  solr = pysolr.Solr(url, timeout=10)
  result = solr.add(listdocs)
  print result

查询代码:

def search_data(self,message='视频'):
  url = 'http://127.0.0.1:8983/solr/mycore/select?q=title:"\%s"&wt=json&indent=true' % message
  r = requests.get(url, verify=False)
  print r.text
  r = r.json()['response']['numFound']
  print message + ":" + str(r)
  
  #pysolr客户端
  def search_data(self,where='视频',url='http://127.0.0.1:8983/solr/mycore/'):
  solr = pysolr.Solr(url, timeout=10)
  dict = {'start':10,'rows': 30,'fl':'title,keyword,source,link'}
  result = solr.search('title:视频',**dict)
  # result = solr.search('title:视频')
  # print result.raw_response['response']['numFound']

  for item in result:
   print 'keyword: %s'% item['keyword']
   print 'title: %s'% item['title']
   print 'source: %s'% item['source']
   print 'link: %s'% item['link']
   print '

'

输出结果:

{
 "responseHeader":{
 "status":0,
 "QTime":0,
 "params":{
  "q":"title:\"\\视频\"",
  "indent":"true",
  "wt":"json"}},
 "response":{"numFound":123,"start":0,"docs":[
  {
  "source":"中彩网",
  "link":"http://www.zhcw.com/video/kaijiangshipin-3D/11981126.shtml",
  "keyword":"视频",
  "title":"福彩3D开奖 视频 -中彩 视频",
  "id":"2f0a9d21-3771-4efa-a0cc-e0484cc97993",
  "_version_":1584214368617234432},
  {
  "source":"新浪视频",
  "link":"http://video.sina.com.cn/news/spj/topvideoes20170707/?opsubject_id=top1",
  "keyword":"视频",
  "title":"今日热门 视频 汇总20170707",
  "id":"c8aae0af-01e9-491f-b999-24b97004a4ba",
  "_version_":1584214367507841024},
  {
  "source":"网易新闻",
  "link":"http://news.163.com/17/0707/13/COOCNUIE00018AOR.html",
  "keyword":"视频",
  "title":"网传"兰桂坊附近不雅 视频 " 警方:传播 视频 将追责",
  "id":"353de48d-ede7-481b-89d3-bc20ab4b3884",
  "_version_":1584214367821365248},
  {
  "source":"凤凰视频",
  "link":"http://v.ifeng.com/video_7480871.shtml",
  "keyword":"视频",
  "title":"创想动画片:花粉过敏症的痛谁懂-凤凰 视频 -最具媒体品质的综合 视频 ...",
  "id":"dc5f19c4-180f-4004-a0db-4499d875a60f",
  "_version_":1584214366819975168},
  {
  "source":"凤凰视频",
  "link":"http://v.ifeng.com/video_7805858.shtml",
  "keyword":"视频",
  "title":"节气说:小暑时节就该这样养生-凤凰 视频 -最具媒体品质的综合 视频 门...",
  "id":"5e9eb7a7-48b8-4e41-9514-7712ae619d9a",
  "_version_":1584214367516229632},
  {
  "source":"凤凰视频",
  "link":"http://v.ifeng.com/video_7483506.shtml",
  "keyword":"视频",
  "title":"听导演讲《神奇女侠》的故事 -凤凰 视频 -最具媒体品质的综合 视频 门户-...",
  "id":"6b1482f1-c0c9-479f-bef7-7de324fb9372",
  "_version_":1584214367647301632},
  {
  "source":"汽车杂志",
  "link":"http://www.jiemian.com/article/1445267.html",
  "keyword":"视频",
  "title":"【视频】欧宝最近找了一堆穿睡衣的辣妈拍了一段超牛的视频",
  "id":"1d327555-a6f3-4513-9a21-43d59418ab82",
  "_version_":1584214368157958144},
  {
  "source":"味觉大师",
  "link":"http://www.jiemian.com/article/1453545.html",
  "keyword":"视频",
  "title":"【视频】大董没有肉的肉味烧茄子",
  "id":"7d777870-93cb-4c18-a32b-734af8f133f1",
  "_version_":1584213891451191296},
  {
  "source":"新浪汽车",
  "link":"http://auto.sina.com.cn/video/zz/2017-07-07/detail-ifyhwehx5311889.shtml",
  "keyword":"视频",
  "title":"视频 :两大神车pk!高尔夫思域怎么选?",
  "id":"3a50b303-6b54-4da3-aee1-a61c678c752d",
  "_version_":1584213892090822656},
  {
  "source":"味觉大师",
  "link":"http://www.jiemian.com/article/1453545.html",
  "keyword":"视频",
  "title":"【视频】大董没有肉的肉味烧茄子",
  "id":"01da8e11-77bc-4c31-ba3a-ba668e846d9d",
  "_version_":1584214366191878144}]
 }}

完整代码:

#-*- coding: UTF-8 -*-
import csv
import os
import codecs


def ReadCSV(filename):
 if os.path.exists(filename):
  with open(filename, 'r') as f:
   reader = csv.reader(f)
   list = []
   for item in reader:
    list.append(item)
   return list

#################################################
#coding=utf-8
import json
import requests

import os
import time
from os import walk
import CSVOP
from datetime import datetime
import pysolr
import math

class SolrClientObj:

 def test(self):
  data = {"add": {"doc": {"id": "100001", "*字段名*": u"我是一个大好人"}}}
  params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"Content-Type": "application/json"}
  r = requests.post(url, json=data, params=params, headers=headers)
  print r.text

 def pysolr_Index_Data_FromCSV(self, csvfile,url='http://127.0.0.1:8983/solr/mycore/'):
  '''
   从CSV文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = CSVOP.ReadCSV(csvfile)
  index = 0
  listdocs = []
  for item in list:
   if index > 0: # 第一行是标题
    doc = {}
    try:
     doc['title'] = item[0].decode('GB2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('GB2312')
     doc['keyword'] = item[4].decode('GB2312')
     listdocs.append(doc)
    except Exception,e:
     print e.message
   index += 1
  solr = pysolr.Solr(url, timeout=10)
  result = solr.add(listdocs)
  print result

 def Index_Data_FromCSV(self, csvfile):
  '''
   从CSV文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = CSVOP.ReadCSV(csvfile)
  index = 0
  doc = {}
  params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"Content-Type": "application/json"}
  for item in list:
   if index > 0: # 第一行是标题
    try:
     doc['title'] = item[0].decode('GB2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('GB2312')
     doc['keyword'] = item[4].decode('GB2312')
     data = {"add": {"doc": doc}}
     r = requests.post(url, json=data, params=params, headers=headers)
     print r.text
    except Exception,e:
     print e.message

   print index
   index += 1

 def Index_data(self):
  solr = pysolr.Solr('http://127.0.0.1:8983/solr/mycore/', timeout=10)

  # How you'd index data.
  result = solr.add([
   {
    "id": "doc_1",
    "title": "A test document",
   },
   {
    "id": "doc_2",
    "title": "The Banana: Tasty or Dangerous?",
   },
  ])
  print result

 def search_data(self,where='视频',url='http://127.0.0.1:8983/solr/mycore/'):
  solr = pysolr.Solr(url, timeout=10)
  dict = {'start':10,'rows': 30,'fl':'title,keyword,source,link'}
  result = solr.search('title:视频',**dict)
  # result = solr.search('title:视频')
  # print result.raw_response['response']['numFound']

  for item in result:
   print 'keyword: %s'% item['keyword']
   print 'title: %s'% item['title']
   print 'source: %s'% item['source']
   print 'link: %s'% item['link']
   print '    '

 def delete_index_data(self,where,url='http://127.0.0.1:8983/solr/mycore/'):
  '''
  删除索引
  :param where: 删除的条件
  :param url: url
  :return:
  '''
  solr = pysolr.Solr(url, timeout=10)
  # solr.delete(id=where) #id='id1':删除id为“id1”的索引
  result = solr.delete(q=where) #q='*:*'删除所有索引
  print result


obj = SolrClientObj()
# obj.delete_index_data('*:*') #删除所有索引
# obj.Index_data()
# obj.search_data()
# obj.delete_index_data('doc_1')
obj.search_data('视频')
# csvfile = 'D:/work/Solr/other/exportExcels/2017-07-07_info.csv'
# obj.pysolr_Index_Data_FromCSV(csvfile)

以上这篇对python 操作solr索引数据的实例详解就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持三水点靠木。

Python 相关文章推荐
Python之父谈Python的未来形式
Jul 01 Python
利用Python批量提取Win10锁屏壁纸实战教程
Mar 27 Python
Python Selenium Cookie 绕过验证码实现登录示例代码
Apr 10 Python
Python简直是万能的,这5大主要用途你一定要知道!(推荐)
Apr 03 Python
Python OpenCV实现视频分帧
Jun 01 Python
django解决订单并发问题【推荐】
Jul 31 Python
Python脚本实现监听服务器的思路代码详解
May 28 Python
Pymysql实现往表中插入数据过程解析
Jun 02 Python
Python基于httpx模块实现发送请求
Jul 07 Python
利用pipenv和pyenv管理多个相互独立的Python虚拟开发环境
Nov 01 Python
Python爬虫进阶之爬取某视频并下载的实现
Dec 08 Python
只需要这一行代码就能让python计算速度提高十倍
May 24 Python
python用post访问restful服务接口的方法
Dec 07 #Python
python3 实现验证码图片切割的方法
Dec 07 #Python
python 用opencv调用训练好的模型进行识别的方法
Dec 07 #Python
Python cv2 图像自适应灰度直方图均衡化处理方法
Dec 07 #Python
浅析python3字符串格式化format()函数的简单用法
Dec 07 #Python
Python实现的批量修改文件后缀名操作示例
Dec 07 #Python
Python实现随机创建电话号码的方法示例
Dec 07 #Python
You might like
PHP可变函数的使用详解
2013/06/14 PHP
浅析51个PHP处理字符串的函数
2013/08/02 PHP
PHP实现Google plus的好友拖拽分组效果
2016/10/21 PHP
php生成微信红包数组的方法
2019/09/05 PHP
PHP常用header头定义代码示例汇总
2020/08/29 PHP
FireFox与IE 下js兼容触发click事件的代码
2008/11/20 Javascript
javascript闭包的理解和实例
2010/08/12 Javascript
jQuery 对Select的操作备忘记录
2011/07/04 Javascript
js实现广告漂浮效果的小例子
2013/07/02 Javascript
javascript常用方法总结
2015/05/14 Javascript
javascript带回调函数的异步脚本载入方法实例分析
2015/07/02 Javascript
jQuery滚动加载图片实现原理
2015/12/14 Javascript
Jquery使用小技巧汇总
2015/12/29 Javascript
实例详解jQuery表单验证插件validate
2016/01/18 Javascript
Vue.js实现一个SPA登录页面的过程【推荐】
2017/04/29 Javascript
微信小程序tabbar底部导航
2018/11/05 Javascript
jQuery使用ajax传递json对象到服务端及contentType的用法示例
2020/03/12 jQuery
详解vue高级特性
2020/06/09 Javascript
[52:37]完美世界DOTA2联赛循环赛 Forest vs DM BO2第一场 10.29
2020/10/29 DOTA
[54:43]DOTA2-DPC中国联赛 正赛 CDEC vs Dynasty BO3 第一场 2月22日
2021/03/11 DOTA
浅谈Python中的闭包
2015/07/08 Python
selenium+python设置爬虫代理IP的方法
2018/11/29 Python
PyQt5的安装配置过程,将ui文件转为py文件后显示窗口的实例
2019/06/19 Python
wxPython窗体拆分布局基础组件
2019/11/19 Python
Python算法的时间复杂度和空间复杂度(实例解析)
2019/11/19 Python
Python箱型图处理离群点的例子
2019/12/09 Python
Python2 与Python3的版本区别实例分析
2020/03/30 Python
Python-jenkins模块获取jobs的执行状态操作
2020/05/12 Python
简单介绍一下pyinstaller打包以及安全性的实现
2020/06/02 Python
Python实现微信表情包炸群功能
2021/01/28 Python
光荣入党自我鉴定
2014/01/22 职场文书
学雷锋志愿服务月活动总结
2014/03/09 职场文书
化学工程专业求职信
2014/08/10 职场文书
2016党员入党决心书
2015/09/22 职场文书
导游词之新疆-喀纳斯
2019/10/10 职场文书
MySQL池化框架学习接池自定义
2022/07/23 MySQL