Golang Elasticsearches 批量修改查询及发送MQ


Posted in Golang onApril 19, 2022

update_by_query批量修改

POST post-v1_1-2021.02,post-v1_1-2021.03,post-v1_1-2021.04/_update_by_query
{
  "query": {
    "bool": {
      "must": [
        {
          "term": {
            "join_field": {
              "value": "post"
            }
          }
        },
        {
          "term": {
            "platform": {
              "value": "toutiao"
            }
          }
        },
        {
          "exists": {
            "field": "liked_count"
          }
        }
      ]
    }
  },
  "script":{
    "source":"ctx._source.liked_count=0",
    "lang":"painless"
  }
}

索引添加字段

PUT user_tiktok/_doc/_mapping?include_type_name=true
{
  "post_signature":{
    "StuClass":{
      "type":"keyword"
    },
    "post_token":{
      "type":"keyword"
    }
  }
}
PUT user_toutiao/_mapping
{
  "properties": {
    "user_token": {
      "type": "text"
    }
  }
}

查询es发送MQ

from celery import Celery
from elasticsearch import Elasticsearch
import logging
import arrow
import pytz
from elasticsearch.helpers import scan, streaming_bulk
import redis
pool_16_8 = redis.ConnectionPool(host='10.0.3.100', port=6379, db=8, password='EfcHGSzKqg6cfzWq')
rds_16_8 = redis.StrictRedis(connection_pool=pool_16_8)
logger = logging.getLogger('elasticsearch')
logger.disabled = False
logger.setLevel(logging.INFO)
es_zoo_connection = Elasticsearch('http://eswriter:e s密码@e sip:4000', dead_timeout=10,
                                  retry_on_timeout=True)
logger = logging.getLogger(__name__)
class ES(object):
    index = None
    doc_type = None
    id_field = '_id'
    version = ''
    source_id_field = ''
    aliase_field = ''
    separator = '-'
    aliase_func = None
    es = None
    tz = pytz.timezone('Asia/Shanghai')
    logger = logger
    @classmethod
    def mget(cls, ids=None, index=None, **kwargs):
        index = index or cls.index
        docs = cls.es.mget(body={'ids': ids}, doc_type=cls.doc_type, index=index, **kwargs)
        return docs
    @classmethod
    def count(cls, query=None, index=None, **kwargs):
        index = index or cls.index
        c = cls.es.count(doc_type=cls.doc_type, body=query, index=index, **kwargs)
        return c.get('count', 0)
    @classmethod
    def upsert(cls, doc, doc_id=None, index=None, doc_as_upsert=True, **kwargs):
        body = {
            "doc": doc,
        }
        if doc_as_upsert:
            body['doc_as_upsert'] = True
        id = doc_id or cls.id_name(doc)
        index = index or cls.index_name(doc)
        cls.es.update(index, id, cls.doc_type, body, **kwargs)
    @classmethod
    def search(cls, index=None, query=None, **kwargs):
        index = index or cls.index
        return cls.es.search(index=index, body=query, **kwargs)
    @classmethod
    def scan(cls, query, index=None, **kwargs):
        return scan(cls.es,
                    query=query,
                    index=index or cls.index,
                    **kwargs)
    @classmethod
    def index_name(cls, doc):
        if cls.aliase_field and cls.aliase_field in doc.keys():
            aliase_part = doc[cls.aliase_field]
            if isinstance(aliase_part, str):
                aliase_part = arrow.get(aliase_part)
            if isinstance(aliase_part, int):
                aliase_part = arrow.get(aliase_part).astimezone(cls.tz)
            if cls.version:
                index = '{}{}{}{}{}'.format(cls.index, cls.separator, cls.version, cls.separator,
                                            cls.aliase_func(aliase_part))
            else:
                index = '{}{}{}'.format(cls.index, cls.separator, cls.aliase_func(aliase_part))
        else:
            index = cls.index
        return index
    @classmethod
    def id_name(cls, doc):
        id = doc.get(cls.id_field) and doc.pop(cls.id_field) or doc.get(cls.source_id_field)
        if not id:
            print('========', doc)
        assert id, 'doc _id must not be None'
        return id
    @classmethod
    def bulk_upsert(cls, docs, **kwargs):
        """
        批量操作文章, 仅支持 index 和 update
        """
        op_type = kwargs.get('op_type') or 'update'
        chunk_size = kwargs.get('chunk_size')
        if op_type == 'update':
            upsert = kwargs.get('upsert', True)
            if upsert is None:
                upsert = True
        else:
            upsert = False
        actions = cls._gen_bulk_actions(docs, cls.index_name, cls.doc_type, cls.id_name, op_type, upsert=upsert)
        result = streaming_bulk(cls.es, actions, chunk_size=chunk_size, raise_on_error=False, raise_on_exception=False,
                                max_retries=5, request_timeout=25)
        return result
    @classmethod
    def _gen_bulk_actions(cls, docs, index_name, doc_type, id_name, op_type, upsert=True, **kwargs):
        assert not upsert or (upsert and op_type == 'update'), 'upsert should use "update" as op_type'
        for doc in docs:
            # 支持 index_name 作为一个工厂函数
            if callable(index_name):
                index = index_name(doc)
            else:
                index = index_name
            if op_type == 'index':
                _source = doc
            elif op_type == 'update' and not upsert:
                _source = {'doc': doc}
            elif op_type == 'update' and upsert:
                _source = {'doc': doc, 'doc_as_upsert': True}
            else:
                continue
            if callable(id_name):
                id = id_name(doc)
            else:
                id = id_name
            # 生成 Bulk 动作
            action = {
                "_op_type": op_type,
                "_index": index,
                "_type": doc_type,
                "_id": id,
                "_source": _source
            }
            yield action
class tiktokEsUser(ES):
    index = 'user_tiktok'
    doc_type = '_doc'
    id_field = '_id'
    source_id_field = 'user_id'
    es = es_zoo_connection
from kombu import Exchange, Queue, binding
def data_es_route_task_spider(name, args, kwargs, options, task=None, **kw):
    return {
        'exchange': 'tiktok',
        'exchange_type': 'topic',
        'routing_key': name
    }
class DataEsConfig_download(object):
    broker_url = 'amqp://用户:密码@ip:端口/'
    task_ignore_result = True
    task_serializer = 'json'
    accept_content = ['json']
    task_default_queue = 'default'
    task_default_exchange = 'default'
    task_default_routing_key = 'default'
    exchange = Exchange('tiktok', type='topic')
    task_queues = [
        Queue(
            'tiktok.user_avatar.download',
            [binding(exchange, routing_key='tiktok.user_avatar.download')],
            queue_arguments={'x-queue-mode': 'lazy'}
        ),
        Queue(
            'tiktok.post_avatar.download',
            [binding(exchange, routing_key='tiktok.post_avatar.download')],
            queue_arguments={'x-queue-mode': 'lazy'}
        ),
        Queue(
            'tiktok.post.spider',
            [binding(exchange, routing_key='tiktok.post.spider')],
            queue_arguments={'x-queue-mode': 'lazy'}
        ),
        Queue(
            'tiktok.post.save',
            [binding(exchange, routing_key='tiktok.post.save')],
            queue_arguments={'x-queue-mode': 'lazy'}
        ),
        Queue(
            'tiktok.user.save',
            [binding(exchange, routing_key='tiktok.user.save')],
            queue_arguments={'x-queue-mode': 'lazy'}
        ),
        Queue(
            'tiktok.post_avatar.invalid',
            [binding(exchange, routing_key='tiktok.post_avatar.invalid')],
            queue_arguments={'x-queue-mode': 'lazy'}
        ),
        Queue(
            'tiktok.user_avatar.invalid',
            [binding(exchange, routing_key='tiktok.user_avatar.invalid')],
            queue_arguments={'x-queue-mode': 'lazy'}
        ),
        Queue(
            'tiktok.comment.save',
            [binding(exchange, routing_key='tiktok.comment.save')],
            queue_arguments={'x-queue-mode': 'lazy'}
        ),
    ]
    task_routes = (data_es_route_task_spider,)
    enable_utc = True
    timezone = "Asia/Shanghai"
# 下载app
tiktok_app = Celery(
    'tiktok',
    include=[
        'task.tasks',
    ]
)
tiktok_app.config_from_object(DataEsConfig_download)
# 发任务生产者,更新舆情user历史信息
def send_post():
    query = {
        "query": {
            "bool": {
                "must": [
                    {
                        "exists": {
                            "field": "post_signature"
                        }
                    },
                    {
                        "range": {
                            "following_num": {
                                "gte": 1000
                            }
                        }
                    }
                ]
            }
        },
        "_source": ["region", "sec_uid", "post_signature"]
    }
    # query = {
    #     "query": {
    #         "bool": {
    #             "must": [
    #                 {"exists": {
    #                     "field": "post_signature"
    #                 }},
    #                 {
    #                     "match": {
    #                         "region": "MY"
    #                     }
    #                 }
    #             ]
    #         }
    #     },
    #     "_source": ["region", "sec_uid", "post_signature"]
    # }
    r = tiktokEsUser.scan(query=query, scroll='30m', request_timeout=100)
    for item in map(lambda x: x['_source'], r):
        tiktok_app.send_task('tiktok.post.spider', args=(item,))
def send_sign_token():
    query = {
        "query": {
            "bool": {
                "must": [
                    {
                        "exists": {
                            "field": "post_signature"
                        }
                    },
                    {
                        "range": {
                            "following_num": {
                                "gte": 1000
                            }
                        }
                    },
                    {
                        "range": {
                            "create_time": {
                                "gte": "2021-01-06T00:00:00",
                                "lte": "2021-01-06T01:00:00"
                            }
                        }
                    }
                ]
            }
        },
        "_source": ["user_id", "sec_uid"]
    }
    r = tiktokEsUser.scan(query=query, scroll='30m', request_timeout=100)
    for item in map(lambda x: x['_source'], r):
        tiktok_app.send_task('tiktok.user.sign_token', args=(item,))
if __name__ == '__main__':
    send_post()
    # send_sign_token()

以上就是go语言实现Elasticsearches批量修改查询及发送MQ操作示例的详细内容!

Golang 相关文章推荐
golang如何去除多余空白字符(含制表符)
Apr 25 Golang
golang 实现菜单树的生成方式
Apr 28 Golang
解决Golang time.Parse和time.Format的时区问题
Apr 29 Golang
golang日志包logger的用法详解
May 05 Golang
完美解决golang go get私有仓库的问题
May 05 Golang
golang 实现时间戳和时间的转化
May 07 Golang
Golang的继承模拟实例
Jun 30 Golang
golang fmt格式“占位符”的实例用法详解
Jul 04 Golang
go使用Gin框架利用阿里云实现短信验证码功能
Aug 04 Golang
Golang 遍历二叉树
Apr 19 Golang
Golang 对es的操作实例
Apr 20 Golang
Golang 切片(Slice)实现增删改查
Apr 22 Golang
GO语言异常处理分析 err接口及defer延迟
Apr 14 #Golang
GO语言字符串处理函数之处理Strings包
Apr 14 #Golang
golang的文件创建及读写操作
Apr 14 #Golang
golang定时器
Apr 14 #Golang
golang用type-switch判断interface的实际存储类型
Apr 14 #Golang
golang语言指针操作
Apr 14 #Golang
golang使用map实现去除重复数组
Apr 14 #Golang
You might like
php curl 登录163邮箱并抓取邮箱好友列表的代码(经测试)
2011/04/07 PHP
php中将网址转换为超链接的函数
2011/09/02 PHP
PHP如何读取由JavaScript设置的Cookie
2017/03/22 PHP
Yii2使用表单上传文件的实例代码
2017/08/03 PHP
php pdo连接数据库操作示例
2019/11/18 PHP
JavaScript forEach()遍历函数使用及介绍
2015/07/08 Javascript
js中最容易被忽视的事件问题大总结
2016/05/15 Javascript
AngularJS封装指令方法详解
2016/12/12 Javascript
详解jQuery简单的表格应用
2016/12/16 Javascript
angularJS利用ng-repeat遍历二维数组的实例代码
2017/06/03 Javascript
浅谈JS中的常用选择器及属性、方法的调用
2017/07/28 Javascript
JS实现中文汉字按拼音排序的方法
2017/10/09 Javascript
vue学习之mintui picker选择器实现省市二级联动示例
2017/10/12 Javascript
详解Node.js模板引擎Jade入门
2018/01/19 Javascript
vue.js element-ui tree树形控件改iview的方法
2018/03/29 Javascript
前端路由&webpack基础配置详解
2019/06/10 Javascript
Vue事件修饰符native、self示例详解
2019/07/09 Javascript
JS实现电商商品展示放大镜特效
2020/01/07 Javascript
js实现微信聊天效果
2020/08/09 Javascript
Openlayers学习之地图比例尺控件
2020/09/28 Javascript
[42:48]完美世界DOTA2联赛PWL S3 Magma vs INK ICE 第二场 12.11
2020/12/16 DOTA
python单线程实现多个定时器示例
2014/03/30 Python
基于Python实现的扫雷游戏实例代码
2014/08/01 Python
python flask实现分页效果
2017/06/27 Python
python使用百度文字识别功能方法详解
2019/07/23 Python
Python pandas RFM模型应用实例详解
2019/11/20 Python
浅析Python中字符串的intern机制
2020/10/03 Python
HTML5超炫酷粒子效果的进度条的实现示例
2019/08/23 HTML / CSS
德国婴儿推车和儿童安全座椅商店:BABYSHOP
2016/09/01 全球购物
Amara美国站:英国高端家居礼品网站,世界各地的奢侈家具品牌
2017/07/26 全球购物
校园达人秀策划书
2014/01/12 职场文书
入党积极分子自我鉴定范文
2014/03/25 职场文书
生活小常识广播稿
2014/09/16 职场文书
2016年元旦主持词
2015/07/06 职场文书
分析MySQL抛出异常的几种常见解决方式
2021/05/18 MySQL
实战Python爬虫爬取酷我音乐
2022/04/11 Python