世界上并没有完美的程序,但是我们并不因此而沮丧,因为写程序就是一个不断追求完美的过程。-侯氏工坊
文章目录
- 抽取评分最高的n个文档
sampler
from elasticsearch import Elasticsearch
import urllib3
urllib3.disable_warnings()
# PUT es_sampler
# {
# "mappings": {
# "properties": {
# "name": {"type": "keyword"}
# }
# }
# }
# POST es_sampler/_bulk
# {"index": {"_id": 1}}
# {"name": "hello"}
# {"index": {"_id": 2}}
# {"name": "good"}
# {"index": {"_id": 3}}
# {"name": "es"}
# {"index": {"_id": 4}}
# {"name": "elastic"}
# {"index": {"_id": 5}}
# {"name": "es"}
# {"index": {"_id": 6}}
# {"name": "good"}
# GET es_sampler/_search
# {
# "query": {"query_string": {
# "default_field": "name",
# "query": "name:hello OR name:good"
# }},
# "size": 0,
# "aggs": {
# "my_sampler": {
# "sampler": {
# "shard_size": 100
# },
# "aggs": {
# "my_significant_terms": {
# "significant_terms": {
# "field": "name",
# "min_doc_count": 1
# }
# }
# }
# }
# }
# }
# 创建es实例
es = Elasticsearch("https://192.168.2.64:9200",
verify_certs=False,
basic_auth=("elastic", "MuZkDqdW--VsfDjTcoex"),
request_timeout=60,
max_retries=3,
retry_on_timeout=True,
node_selector_class="round_robin")
# 刷新
es.indices.refresh(index="es_sampler")
query = {"query_string": {
"default_field": "name",
"query": "name:hello OR name:good"
}}
sampler = {
"my_sampler": {
"sampler": {
"shard_size": 100
},
"aggs": {
"my_significant_terms": {
"significant_terms": {
"field": "name",
"min_doc_count": 1
}
}
}
}
}
resp = es.search(index="es_sampler", size=0, query=query, aggregations=sampler)
print(resp['aggregations']['my_sampler'])