对python 操作solr索引数据的实例详解


Posted in Python onDecember 07, 2018

测试代码1:

def test(self):
  data = {"add": {"doc": {"id": "100001", "*字段名*": u"我是一个大好人"}}}
  params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"Content-Type": "application/json"}
  r = requests.post(url, json=data, params=params, headers=headers)
  print r.text


 def Index_data(self):
  solr = pysolr.Solr('http://127.0.0.1:8983/solr/mycore/', timeout=10)

  # How you'd index data.
  result = solr.add([
   {
    "id": "doc_1",
    "title": "A test document",
   },
   {
    "id": "doc_2",
    "title": "The Banana: Tasty or Dangerous?",
   },
  ])
  print result

测试代码2:

实际数据:

对python 操作solr索引数据的实例详解

def Index_Data_FromCSV(self, csvfile):
  '''
   从CSV文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = CSVOP.ReadCSV(csvfile)
  index = 0
  doc = {}
  params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"Content-Type": "application/json"}
  for item in list:
   if index > 0: # 第一行是标题
    try:
     doc['title'] = item[0].decode('GB2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('GB2312')
     doc['keyword'] = item[4].decode('GB2312')
     data = {"add": {"doc": doc}}
     r = requests.post(url, json=data, params=params, headers=headers)
     print r.text
    except Exception,e:
     print e.message

   print index
   index += 1

#pysolr客户端代码
 def pysolr_Index_Data_FromCSV(self, csvfile,url='http://127.0.0.1:8983/solr/mycore/'):
  '''
   从CSV文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = CSVOP.ReadCSV(csvfile)
  index = 0
  listdocs = []
  for item in list:
   if index > 0: # 第一行是标题
    doc = {}
    try:
     doc['title'] = item[0].decode('GB2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('GB2312')
     doc['keyword'] = item[4].decode('GB2312')
     listdocs.append(doc)
    except Exception,e:
     print e.message
   index += 1
  solr = pysolr.Solr(url, timeout=10)
  result = solr.add(listdocs)
  print result

查询代码:

def search_data(self,message='视频'):
  url = 'http://127.0.0.1:8983/solr/mycore/select?q=title:"\%s"&wt=json&indent=true' % message
  r = requests.get(url, verify=False)
  print r.text
  r = r.json()['response']['numFound']
  print message + ":" + str(r)
  
  #pysolr客户端
  def search_data(self,where='视频',url='http://127.0.0.1:8983/solr/mycore/'):
  solr = pysolr.Solr(url, timeout=10)
  dict = {'start':10,'rows': 30,'fl':'title,keyword,source,link'}
  result = solr.search('title:视频',**dict)
  # result = solr.search('title:视频')
  # print result.raw_response['response']['numFound']

  for item in result:
   print 'keyword: %s'% item['keyword']
   print 'title: %s'% item['title']
   print 'source: %s'% item['source']
   print 'link: %s'% item['link']
   print '

'

输出结果:

{
 "responseHeader":{
 "status":0,
 "QTime":0,
 "params":{
  "q":"title:\"\\视频\"",
  "indent":"true",
  "wt":"json"}},
 "response":{"numFound":123,"start":0,"docs":[
  {
  "source":"中彩网",
  "link":"http://www.zhcw.com/video/kaijiangshipin-3D/11981126.shtml",
  "keyword":"视频",
  "title":"福彩3D开奖 视频 -中彩 视频",
  "id":"2f0a9d21-3771-4efa-a0cc-e0484cc97993",
  "_version_":1584214368617234432},
  {
  "source":"新浪视频",
  "link":"http://video.sina.com.cn/news/spj/topvideoes20170707/?opsubject_id=top1",
  "keyword":"视频",
  "title":"今日热门 视频 汇总20170707",
  "id":"c8aae0af-01e9-491f-b999-24b97004a4ba",
  "_version_":1584214367507841024},
  {
  "source":"网易新闻",
  "link":"http://news.163.com/17/0707/13/COOCNUIE00018AOR.html",
  "keyword":"视频",
  "title":"网传"兰桂坊附近不雅 视频 " 警方:传播 视频 将追责",
  "id":"353de48d-ede7-481b-89d3-bc20ab4b3884",
  "_version_":1584214367821365248},
  {
  "source":"凤凰视频",
  "link":"http://v.ifeng.com/video_7480871.shtml",
  "keyword":"视频",
  "title":"创想动画片:花粉过敏症的痛谁懂-凤凰 视频 -最具媒体品质的综合 视频 ...",
  "id":"dc5f19c4-180f-4004-a0db-4499d875a60f",
  "_version_":1584214366819975168},
  {
  "source":"凤凰视频",
  "link":"http://v.ifeng.com/video_7805858.shtml",
  "keyword":"视频",
  "title":"节气说:小暑时节就该这样养生-凤凰 视频 -最具媒体品质的综合 视频 门...",
  "id":"5e9eb7a7-48b8-4e41-9514-7712ae619d9a",
  "_version_":1584214367516229632},
  {
  "source":"凤凰视频",
  "link":"http://v.ifeng.com/video_7483506.shtml",
  "keyword":"视频",
  "title":"听导演讲《神奇女侠》的故事 -凤凰 视频 -最具媒体品质的综合 视频 门户-...",
  "id":"6b1482f1-c0c9-479f-bef7-7de324fb9372",
  "_version_":1584214367647301632},
  {
  "source":"汽车杂志",
  "link":"http://www.jiemian.com/article/1445267.html",
  "keyword":"视频",
  "title":"【视频】欧宝最近找了一堆穿睡衣的辣妈拍了一段超牛的视频",
  "id":"1d327555-a6f3-4513-9a21-43d59418ab82",
  "_version_":1584214368157958144},
  {
  "source":"味觉大师",
  "link":"http://www.jiemian.com/article/1453545.html",
  "keyword":"视频",
  "title":"【视频】大董没有肉的肉味烧茄子",
  "id":"7d777870-93cb-4c18-a32b-734af8f133f1",
  "_version_":1584213891451191296},
  {
  "source":"新浪汽车",
  "link":"http://auto.sina.com.cn/video/zz/2017-07-07/detail-ifyhwehx5311889.shtml",
  "keyword":"视频",
  "title":"视频 :两大神车pk!高尔夫思域怎么选?",
  "id":"3a50b303-6b54-4da3-aee1-a61c678c752d",
  "_version_":1584213892090822656},
  {
  "source":"味觉大师",
  "link":"http://www.jiemian.com/article/1453545.html",
  "keyword":"视频",
  "title":"【视频】大董没有肉的肉味烧茄子",
  "id":"01da8e11-77bc-4c31-ba3a-ba668e846d9d",
  "_version_":1584214366191878144}]
 }}

完整代码:

#-*- coding: UTF-8 -*-
import csv
import os
import codecs


def ReadCSV(filename):
 if os.path.exists(filename):
  with open(filename, 'r') as f:
   reader = csv.reader(f)
   list = []
   for item in reader:
    list.append(item)
   return list

#################################################
#coding=utf-8
import json
import requests

import os
import time
from os import walk
import CSVOP
from datetime import datetime
import pysolr
import math

class SolrClientObj:

 def test(self):
  data = {"add": {"doc": {"id": "100001", "*字段名*": u"我是一个大好人"}}}
  params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"Content-Type": "application/json"}
  r = requests.post(url, json=data, params=params, headers=headers)
  print r.text

 def pysolr_Index_Data_FromCSV(self, csvfile,url='http://127.0.0.1:8983/solr/mycore/'):
  '''
   从CSV文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = CSVOP.ReadCSV(csvfile)
  index = 0
  listdocs = []
  for item in list:
   if index > 0: # 第一行是标题
    doc = {}
    try:
     doc['title'] = item[0].decode('GB2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('GB2312')
     doc['keyword'] = item[4].decode('GB2312')
     listdocs.append(doc)
    except Exception,e:
     print e.message
   index += 1
  solr = pysolr.Solr(url, timeout=10)
  result = solr.add(listdocs)
  print result

 def Index_Data_FromCSV(self, csvfile):
  '''
   从CSV文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = CSVOP.ReadCSV(csvfile)
  index = 0
  doc = {}
  params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"Content-Type": "application/json"}
  for item in list:
   if index > 0: # 第一行是标题
    try:
     doc['title'] = item[0].decode('GB2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('GB2312')
     doc['keyword'] = item[4].decode('GB2312')
     data = {"add": {"doc": doc}}
     r = requests.post(url, json=data, params=params, headers=headers)
     print r.text
    except Exception,e:
     print e.message

   print index
   index += 1

 def Index_data(self):
  solr = pysolr.Solr('http://127.0.0.1:8983/solr/mycore/', timeout=10)

  # How you'd index data.
  result = solr.add([
   {
    "id": "doc_1",
    "title": "A test document",
   },
   {
    "id": "doc_2",
    "title": "The Banana: Tasty or Dangerous?",
   },
  ])
  print result

 def search_data(self,where='视频',url='http://127.0.0.1:8983/solr/mycore/'):
  solr = pysolr.Solr(url, timeout=10)
  dict = {'start':10,'rows': 30,'fl':'title,keyword,source,link'}
  result = solr.search('title:视频',**dict)
  # result = solr.search('title:视频')
  # print result.raw_response['response']['numFound']

  for item in result:
   print 'keyword: %s'% item['keyword']
   print 'title: %s'% item['title']
   print 'source: %s'% item['source']
   print 'link: %s'% item['link']
   print '    '

 def delete_index_data(self,where,url='http://127.0.0.1:8983/solr/mycore/'):
  '''
  删除索引
  :param where: 删除的条件
  :param url: url
  :return:
  '''
  solr = pysolr.Solr(url, timeout=10)
  # solr.delete(id=where) #id='id1':删除id为“id1”的索引
  result = solr.delete(q=where) #q='*:*'删除所有索引
  print result


obj = SolrClientObj()
# obj.delete_index_data('*:*') #删除所有索引
# obj.Index_data()
# obj.search_data()
# obj.delete_index_data('doc_1')
obj.search_data('视频')
# csvfile = 'D:/work/Solr/other/exportExcels/2017-07-07_info.csv'
# obj.pysolr_Index_Data_FromCSV(csvfile)

以上这篇对python 操作solr索引数据的实例详解就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持三水点靠木。

Python 相关文章推荐
对于Python装饰器使用的一些建议
Jun 03 Python
简单谈谈Python中的几种常见的数据类型
Feb 10 Python
人生苦短我用python python如何快速入门?
Mar 12 Python
使用Python实现一个栈判断括号是否平衡
Aug 23 Python
Python抽象和自定义类定义与用法示例
Aug 23 Python
python3结合openpyxl库实现excel操作的实例代码
Sep 11 Python
python添加模块搜索路径和包的导入方法
Jan 19 Python
pygame实现俄罗斯方块游戏(AI篇2)
Oct 29 Python
python 使用cx-freeze打包程序的实现
Mar 14 Python
django模型动态修改参数,增加 filter 字段的方式
Mar 16 Python
python实现ftp文件传输系统(案例分析)
Mar 20 Python
Python 中的 copy()和deepcopy()
Nov 07 Python
python用post访问restful服务接口的方法
Dec 07 #Python
python3 实现验证码图片切割的方法
Dec 07 #Python
python 用opencv调用训练好的模型进行识别的方法
Dec 07 #Python
Python cv2 图像自适应灰度直方图均衡化处理方法
Dec 07 #Python
浅析python3字符串格式化format()函数的简单用法
Dec 07 #Python
Python实现的批量修改文件后缀名操作示例
Dec 07 #Python
Python实现随机创建电话号码的方法示例
Dec 07 #Python
You might like
PHP执行linux系统命令的常用函数使用说明
2010/04/27 PHP
PDO::beginTransaction讲解
2019/01/27 PHP
用JavaScript显示随机图像或引用
2009/04/21 Javascript
关于jquery性能最佳实践的讨论,与求教
2012/03/30 Javascript
js字符串转换成xml对象并使用技巧解读
2013/04/18 Javascript
JavaScript中document.forms[0]与getElementByName区别
2015/01/21 Javascript
asp.net中oracle 存储过程(图文)
2015/08/12 Javascript
js实现C#的StringBuilder效果完整实例
2015/12/22 Javascript
js正则表达式replace替换变量方法
2016/05/21 Javascript
如何使用jquery实现文字上下滚动效果
2016/10/12 Javascript
BootStrap注意事项小结(五)表单
2017/03/10 Javascript
Vue组件通信的四种方式汇总
2018/02/08 Javascript
jquery获取元素到屏幕四周可视距离的方法
2018/09/05 jQuery
video.js 一个页面同时播放多个视频的实例代码
2018/11/27 Javascript
vue实现带复选框的树形菜单
2019/05/27 Javascript
vue 的 solt 子组件过滤过程解析
2019/09/07 Javascript
layer.alert自定义关闭回调事件的方法
2019/09/27 Javascript
vue.js实现只能输入数字的输入框
2019/10/19 Javascript
vue-cli3访问public文件夹静态资源报错的解决方式
2020/09/02 Javascript
Vue时间轴 vue-light-timeline的用法说明
2020/10/29 Javascript
python实现可将字符转换成大写的tcp服务器实例
2015/04/29 Python
Python中subprocess的简单使用示例
2015/07/28 Python
Python实现简单拆分PDF文件的方法
2015/07/30 Python
python实现redis三种cas事务操作
2017/12/19 Python
Python应用库大全总结
2018/05/30 Python
浅谈在JupyterNotebook下导入自己的模块的问题
2020/04/16 Python
matplotlib实现数据实时刷新的示例代码
2021/01/05 Python
大都会艺术博物馆商店:The Met Store
2018/06/22 全球购物
高三毕业生自我鉴定
2013/12/20 职场文书
员工廉洁自律承诺书
2014/05/26 职场文书
支部鉴定材料
2014/06/02 职场文书
领导班子整改措施
2014/10/24 职场文书
保护环境建议书作文300字
2015/09/14 职场文书
小学数学新课改心得体会
2016/01/22 职场文书
导游词之凤凰古城
2019/10/22 职场文书
Oracle表空间与权限的深入讲解
2021/11/17 Oracle