对python 操作solr索引数据的实例详解


Posted in Python onDecember 07, 2018

测试代码1:

def test(self):
  data = {"add": {"doc": {"id": "100001", "*字段名*": u"我是一个大好人"}}}
  params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"Content-Type": "application/json"}
  r = requests.post(url, json=data, params=params, headers=headers)
  print r.text


 def Index_data(self):
  solr = pysolr.Solr('http://127.0.0.1:8983/solr/mycore/', timeout=10)

  # How you'd index data.
  result = solr.add([
   {
    "id": "doc_1",
    "title": "A test document",
   },
   {
    "id": "doc_2",
    "title": "The Banana: Tasty or Dangerous?",
   },
  ])
  print result

测试代码2:

实际数据:

对python 操作solr索引数据的实例详解

def Index_Data_FromCSV(self, csvfile):
  '''
   从CSV文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = CSVOP.ReadCSV(csvfile)
  index = 0
  doc = {}
  params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"Content-Type": "application/json"}
  for item in list:
   if index > 0: # 第一行是标题
    try:
     doc['title'] = item[0].decode('GB2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('GB2312')
     doc['keyword'] = item[4].decode('GB2312')
     data = {"add": {"doc": doc}}
     r = requests.post(url, json=data, params=params, headers=headers)
     print r.text
    except Exception,e:
     print e.message

   print index
   index += 1

#pysolr客户端代码
 def pysolr_Index_Data_FromCSV(self, csvfile,url='http://127.0.0.1:8983/solr/mycore/'):
  '''
   从CSV文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = CSVOP.ReadCSV(csvfile)
  index = 0
  listdocs = []
  for item in list:
   if index > 0: # 第一行是标题
    doc = {}
    try:
     doc['title'] = item[0].decode('GB2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('GB2312')
     doc['keyword'] = item[4].decode('GB2312')
     listdocs.append(doc)
    except Exception,e:
     print e.message
   index += 1
  solr = pysolr.Solr(url, timeout=10)
  result = solr.add(listdocs)
  print result

查询代码:

def search_data(self,message='视频'):
  url = 'http://127.0.0.1:8983/solr/mycore/select?q=title:"\%s"&wt=json&indent=true' % message
  r = requests.get(url, verify=False)
  print r.text
  r = r.json()['response']['numFound']
  print message + ":" + str(r)
  
  #pysolr客户端
  def search_data(self,where='视频',url='http://127.0.0.1:8983/solr/mycore/'):
  solr = pysolr.Solr(url, timeout=10)
  dict = {'start':10,'rows': 30,'fl':'title,keyword,source,link'}
  result = solr.search('title:视频',**dict)
  # result = solr.search('title:视频')
  # print result.raw_response['response']['numFound']

  for item in result:
   print 'keyword: %s'% item['keyword']
   print 'title: %s'% item['title']
   print 'source: %s'% item['source']
   print 'link: %s'% item['link']
   print '

'

输出结果:

{
 "responseHeader":{
 "status":0,
 "QTime":0,
 "params":{
  "q":"title:\"\\视频\"",
  "indent":"true",
  "wt":"json"}},
 "response":{"numFound":123,"start":0,"docs":[
  {
  "source":"中彩网",
  "link":"http://www.zhcw.com/video/kaijiangshipin-3D/11981126.shtml",
  "keyword":"视频",
  "title":"福彩3D开奖 视频 -中彩 视频",
  "id":"2f0a9d21-3771-4efa-a0cc-e0484cc97993",
  "_version_":1584214368617234432},
  {
  "source":"新浪视频",
  "link":"http://video.sina.com.cn/news/spj/topvideoes20170707/?opsubject_id=top1",
  "keyword":"视频",
  "title":"今日热门 视频 汇总20170707",
  "id":"c8aae0af-01e9-491f-b999-24b97004a4ba",
  "_version_":1584214367507841024},
  {
  "source":"网易新闻",
  "link":"http://news.163.com/17/0707/13/COOCNUIE00018AOR.html",
  "keyword":"视频",
  "title":"网传"兰桂坊附近不雅 视频 " 警方:传播 视频 将追责",
  "id":"353de48d-ede7-481b-89d3-bc20ab4b3884",
  "_version_":1584214367821365248},
  {
  "source":"凤凰视频",
  "link":"http://v.ifeng.com/video_7480871.shtml",
  "keyword":"视频",
  "title":"创想动画片:花粉过敏症的痛谁懂-凤凰 视频 -最具媒体品质的综合 视频 ...",
  "id":"dc5f19c4-180f-4004-a0db-4499d875a60f",
  "_version_":1584214366819975168},
  {
  "source":"凤凰视频",
  "link":"http://v.ifeng.com/video_7805858.shtml",
  "keyword":"视频",
  "title":"节气说:小暑时节就该这样养生-凤凰 视频 -最具媒体品质的综合 视频 门...",
  "id":"5e9eb7a7-48b8-4e41-9514-7712ae619d9a",
  "_version_":1584214367516229632},
  {
  "source":"凤凰视频",
  "link":"http://v.ifeng.com/video_7483506.shtml",
  "keyword":"视频",
  "title":"听导演讲《神奇女侠》的故事 -凤凰 视频 -最具媒体品质的综合 视频 门户-...",
  "id":"6b1482f1-c0c9-479f-bef7-7de324fb9372",
  "_version_":1584214367647301632},
  {
  "source":"汽车杂志",
  "link":"http://www.jiemian.com/article/1445267.html",
  "keyword":"视频",
  "title":"【视频】欧宝最近找了一堆穿睡衣的辣妈拍了一段超牛的视频",
  "id":"1d327555-a6f3-4513-9a21-43d59418ab82",
  "_version_":1584214368157958144},
  {
  "source":"味觉大师",
  "link":"http://www.jiemian.com/article/1453545.html",
  "keyword":"视频",
  "title":"【视频】大董没有肉的肉味烧茄子",
  "id":"7d777870-93cb-4c18-a32b-734af8f133f1",
  "_version_":1584213891451191296},
  {
  "source":"新浪汽车",
  "link":"http://auto.sina.com.cn/video/zz/2017-07-07/detail-ifyhwehx5311889.shtml",
  "keyword":"视频",
  "title":"视频 :两大神车pk!高尔夫思域怎么选?",
  "id":"3a50b303-6b54-4da3-aee1-a61c678c752d",
  "_version_":1584213892090822656},
  {
  "source":"味觉大师",
  "link":"http://www.jiemian.com/article/1453545.html",
  "keyword":"视频",
  "title":"【视频】大董没有肉的肉味烧茄子",
  "id":"01da8e11-77bc-4c31-ba3a-ba668e846d9d",
  "_version_":1584214366191878144}]
 }}

完整代码:

#-*- coding: UTF-8 -*-
import csv
import os
import codecs


def ReadCSV(filename):
 if os.path.exists(filename):
  with open(filename, 'r') as f:
   reader = csv.reader(f)
   list = []
   for item in reader:
    list.append(item)
   return list

#################################################
#coding=utf-8
import json
import requests

import os
import time
from os import walk
import CSVOP
from datetime import datetime
import pysolr
import math

class SolrClientObj:

 def test(self):
  data = {"add": {"doc": {"id": "100001", "*字段名*": u"我是一个大好人"}}}
  params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"Content-Type": "application/json"}
  r = requests.post(url, json=data, params=params, headers=headers)
  print r.text

 def pysolr_Index_Data_FromCSV(self, csvfile,url='http://127.0.0.1:8983/solr/mycore/'):
  '''
   从CSV文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = CSVOP.ReadCSV(csvfile)
  index = 0
  listdocs = []
  for item in list:
   if index > 0: # 第一行是标题
    doc = {}
    try:
     doc['title'] = item[0].decode('GB2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('GB2312')
     doc['keyword'] = item[4].decode('GB2312')
     listdocs.append(doc)
    except Exception,e:
     print e.message
   index += 1
  solr = pysolr.Solr(url, timeout=10)
  result = solr.add(listdocs)
  print result

 def Index_Data_FromCSV(self, csvfile):
  '''
   从CSV文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = CSVOP.ReadCSV(csvfile)
  index = 0
  doc = {}
  params = {"boost": 1.0, "overwrite": "true", "commitWithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"Content-Type": "application/json"}
  for item in list:
   if index > 0: # 第一行是标题
    try:
     doc['title'] = item[0].decode('GB2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('GB2312')
     doc['keyword'] = item[4].decode('GB2312')
     data = {"add": {"doc": doc}}
     r = requests.post(url, json=data, params=params, headers=headers)
     print r.text
    except Exception,e:
     print e.message

   print index
   index += 1

 def Index_data(self):
  solr = pysolr.Solr('http://127.0.0.1:8983/solr/mycore/', timeout=10)

  # How you'd index data.
  result = solr.add([
   {
    "id": "doc_1",
    "title": "A test document",
   },
   {
    "id": "doc_2",
    "title": "The Banana: Tasty or Dangerous?",
   },
  ])
  print result

 def search_data(self,where='视频',url='http://127.0.0.1:8983/solr/mycore/'):
  solr = pysolr.Solr(url, timeout=10)
  dict = {'start':10,'rows': 30,'fl':'title,keyword,source,link'}
  result = solr.search('title:视频',**dict)
  # result = solr.search('title:视频')
  # print result.raw_response['response']['numFound']

  for item in result:
   print 'keyword: %s'% item['keyword']
   print 'title: %s'% item['title']
   print 'source: %s'% item['source']
   print 'link: %s'% item['link']
   print '    '

 def delete_index_data(self,where,url='http://127.0.0.1:8983/solr/mycore/'):
  '''
  删除索引
  :param where: 删除的条件
  :param url: url
  :return:
  '''
  solr = pysolr.Solr(url, timeout=10)
  # solr.delete(id=where) #id='id1':删除id为“id1”的索引
  result = solr.delete(q=where) #q='*:*'删除所有索引
  print result


obj = SolrClientObj()
# obj.delete_index_data('*:*') #删除所有索引
# obj.Index_data()
# obj.search_data()
# obj.delete_index_data('doc_1')
obj.search_data('视频')
# csvfile = 'D:/work/Solr/other/exportExcels/2017-07-07_info.csv'
# obj.pysolr_Index_Data_FromCSV(csvfile)

以上这篇对python 操作solr索引数据的实例详解就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持三水点靠木。

Python 相关文章推荐
用ReactJS和Python的Flask框架编写留言板的代码示例
Dec 19 Python
python递归删除指定目录及其所有内容的方法
Jan 13 Python
简单谈谈Python中的元祖(Tuple)和字典(Dict)
Apr 21 Python
利用Python如何制作好玩的GIF动图详解
Jul 11 Python
Python常见数据类型转换操作示例
May 08 Python
对Python中小整数对象池和大整数对象池的使用详解
Jul 09 Python
pycharm中显示CSS提示的知识点总结
Jul 29 Python
python返回数组的索引实例
Nov 28 Python
对tensorflow中的strides参数使用详解
Jan 04 Python
Python实现冒泡排序算法的完整实例
Nov 04 Python
Python中threading库实现线程锁与释放锁
May 17 Python
python 离散点图画法的实现
Apr 01 Python
python用post访问restful服务接口的方法
Dec 07 #Python
python3 实现验证码图片切割的方法
Dec 07 #Python
python 用opencv调用训练好的模型进行识别的方法
Dec 07 #Python
Python cv2 图像自适应灰度直方图均衡化处理方法
Dec 07 #Python
浅析python3字符串格式化format()函数的简单用法
Dec 07 #Python
Python实现的批量修改文件后缀名操作示例
Dec 07 #Python
Python实现随机创建电话号码的方法示例
Dec 07 #Python
You might like
简单的页面缓冲技术
2006/10/09 PHP
php array_search() 函数使用
2010/04/13 PHP
使用PHP遍历文件夹与子目录的函数代码
2011/09/26 PHP
php输入流php://input使用浅析
2014/09/02 PHP
PHP+Ajax实现验证码的实时验证
2016/07/20 PHP
Yii2数据库操作常用方法小结
2017/05/04 PHP
php简单处理XML数据的方法示例
2017/05/19 PHP
php抽象方法和普通方法的区别点总结
2019/10/13 PHP
简明json介绍
2008/09/28 Javascript
JS禁用浏览器退格键实现思路及代码
2013/10/29 Javascript
Get中文乱码IE浏览器Get中文乱码解决方案
2013/12/26 Javascript
QQ空间顶部折页撕开效果示例代码
2014/06/15 Javascript
jquery中each方法示例和常用选择器
2014/07/08 Javascript
JS实现双击编辑可修改状态的方法
2015/08/14 Javascript
nodejs如何获取时间戳与时间差
2016/08/03 NodeJs
微信小程序 教程之事件
2016/10/18 Javascript
vue购物车插件编写代码
2017/11/27 Javascript
详解创建自定义的Angular Schematics
2018/06/06 Javascript
js blob类型url的视频下载问题的解决
2019/11/29 Javascript
基于JS实现table导出Excel并保留样式
2020/05/19 Javascript
解决nuxt 自定义全局方法,全局属性,全局变量的问题
2020/11/05 Javascript
Python开发之Nginx+uWSGI+virtualenv多项目部署教程
2019/05/13 Python
如何在python中判断变量的类型
2020/07/29 Python
html5是什么_动力节点Java学院整理
2017/07/07 HTML / CSS
美国休闲服装品牌:Express
2016/09/24 全球购物
VICHY薇姿英国官网:全球专业敏感肌护肤领先品牌
2017/07/04 全球购物
实现向右循环移位
2014/07/31 面试题
大学生个人总结的自我评价
2013/10/05 职场文书
函授本科自我鉴定
2014/02/04 职场文书
四年级语文教学反思
2014/02/05 职场文书
《蒙娜丽莎之约》教学反思
2014/02/27 职场文书
知识就是力量演讲稿
2014/09/13 职场文书
2014和解协议书范文
2014/09/15 职场文书
员工离职感谢信
2015/01/22 职场文书
奥巴马开学演讲观后感
2015/06/12 职场文书
Golang 结构体数据集合
2022/04/22 Golang