es的语法笔记

原创已于 2024-06-15 15:52:47 修改 · 1k 阅读

27 ·

CC 4.0 BY-SA版权

文章标签：

#elasticsearch #笔记 #数据库

于 2024-06-14 16:44:17 首次发布

写的有点乱，先把容易引起我误解的地方记录下

一：模糊查询wildcard

注意：如果是用message则查不出来，因为message经过分词了

{
   "query": {
    "wildcard": {
      "message.keyword": {
        "value": "*开始执行查询es的*"
      }
    }
  }
  ,
        "_source":{
            "includes":["applicationName","message"]
        }
}

二：查询：分组后取其中一条记录的自它字段

指定时间范围的applicationName为A服务的且requestUri关键词中有 “test1”或者“user”

的路径中，以这些进行分组，按照数量降序取前20个requestUri,而且还要取每个requestUri中按照时间降序的第一条记录的其他字段信息

{
      "query": {
        "bool": {
          "filter": [
            {
              "range": {
                "@timestamp": {
                  "gte": "2024-06-12 15:01:01",
                 "lte": "2024-06-12 21:21:01",
                  "time_zone": "Asia/Shanghai",
                  "format": "yyyy-MM-dd HH:mm:ss"
                }
              }
            },
            {
              "term": {
                "applicationName.keyword": "A服务"
              }
            },
            {
              "terms": {
                "requestUri": ["test1","user"]
              }
            }
          ]
        }
      },
      "size": 0,
      "aggs": {
        "group_by_requestUri": {
          "terms": {
            "field": "requestUri.keyword",
            "size": "20",
            "order": [
              {
                "_count": "desc"
              }
            ]
          },
          "aggs": {
            "time_top_last": {
              "top_hits": {
                "sort": [
                  {
                    "@timestamp": {
                      "order": "desc"
                    }
                  }
                ],
                "size": 1,
                "_source": {
                  "includes": [
                    "userIp",
                    "ipCity",
                    "serverIp",
                    "systemId",
                    "requestID",
                    "userIdentity"
                  ]
                }
              }
            }
          }
        }
      }
    }

以下为查询结果：将top20改为了top2

{

    "took": 2384,

    "timed_out": false,

    "_shards": {

        "total": 161,

        "successful": 161,

        "skipped": 115,

        "failed": 0

    },

    "hits": {

        "total": {

            "value": 10000,

            "relation": "gte"

        },

        "max_score": null,

        "hits": []

    },

    "aggregations": {

        "group_by_requestUri": {

            "doc_count_error_upper_bound": 90,

            "sum_other_doc_count": 5035,

            "buckets": [

                {

                    "key": "/user/v1/login",

                    "doc_count": 5737,

                    "time_top_last": {

                        "hits": {

                            "total": {

                                "value": 5737,

                                "relation": "eq"

                            },

                            "max_score": null,

                            "hits": [

                                {

                                    "_index": "ubsp-monitor-prod-2024.06.12",

                                    "_type": "_doc",

                                    "_id": "31qaDJABHhV04n43xF0e",

                                    "_score": null,

                                    "_ignored": [

                                        "message.keyword"

                                    ],

                                    "_source": {

                                        "systemId": "000",

                                        "requestID": "6e685bbf6f066ecb"

                                    },

                                    "sort": [

                                        1718198385770

                                    ]

                                }

                            ]

                        }

                    }

                },

                {

                    "key": "/ibconfig/v1/user/auth/check",

                    "doc_count": 4972,

                    "time_top_last": {

                        "hits": {

                            "total": {

                                "value": 4972,

                                "relation": "eq"

                            },

                            "max_score": null,

                            "hits": [

                                {

                                    "_index": "ubsp-gateway-2024.06.12",

                                    "_type": "_doc",

                                    "_id": "QPGbDJABJdDMneTabMrr",

                                    "_score": null,

                                    "_ignored": [

                                        "message.keyword"

                                    ],

                                    "_source": {

                                        "systemId": "097",

                                        "ipCity": "内网IP",

                                        "userIp": "10.178.141.51",

                                        "userIdentity": "AT-115-xxx"

                                    },

                                    "sort": [

                                        1718198425834

                                    ]

                                }

                            ]

                        }

                    }

                }

            ]

        }

    }

}

三：并列分组

查找时间范围且服务名为A服务的数据后以systemId进行分组倒叙排列取其前5条，且每条记录里以requestUri分组并取数量最多的前两条，同时也以userIp进行分组取其前两条数据

{
      "query": {
        "bool": {
          "filter": [
            {
              "range": {
                "@timestamp": {
                  "gte": "2024-06-12 15:01:01",
                  "lte": "2024-06-12 21:21:01",
                  "time_zone": "Asia/Shanghai",
                  "format": "yyyy-MM-dd HH:mm:ss"
                }
              }
            },
            {
              "term": {
                "applicationName.keyword": "A服务"
              }
            }
          ]
        }
      },
      "size": 0,
      "aggs": {
        "group_by_systemId": {
          "terms": {
            "field": "systemId.keyword",
            "size": 5,
            "order": [
              {
                "_count": "desc"
              }
            ]
          },
          "aggs": {
            "group_by_requestUri": {
              "terms": {
                "field": "requestUri.keyword",
                "size": 2,
                "order": [
                  {
                    "_count": "desc"
                  }
                ]
              }
            },
            "group_by_userIp": {
              "terms": {
                "field": "userIp.keyword",
                "size": 2,
                "order": [
                  {
                    "_count": "desc"
                  }
                ]
              }
            }
          }
        }
      }
    }

结果：

{

    "took": 1855,

    "timed_out": false,

    "_shards": {

        "total": 161,

        "successful": 161,

        "skipped": 78,

        "failed": 0

    },

    "hits": {

        "total": {

            "value": 10000,

            "relation": "gte"

        },

        "max_score": null,

        "hits": []

    },

    "aggregations": {

        "group_by_systemId": {

            "doc_count_error_upper_bound": 851,

            "sum_other_doc_count": 46777,

            "buckets": [

                {

                    "key": "000",

                    "doc_count": 12535,

                    "group_by_requestUri": {

                        "doc_count_error_upper_bound": 148,

                        "sum_other_doc_count": 6559,

                        "buckets": [

                            {

                                "key": "/user/v1/login",

                                "doc_count": 5061

                            },

                            {

                                "key": "/user/v1/info",

                                "doc_count": 915

                            }

                        ]

                    },

                    "group_by_userIp": {

                        "doc_count_error_upper_bound": 0,

                        "sum_other_doc_count": 27,

                        "buckets": [

                            {

                                "key": "10.178.152.0",

                                "doc_count": 10848

                            },

                            {

                                "key": "192.168.250.250",

                                "doc_count": 539

                            }

                        ]

                    }

                },

                {

                    "key": "069",

                    "doc_count": 11476,

                    "group_by_requestUri": {

                        "doc_count_error_upper_bound": 152,

                        "sum_other_doc_count": 10285,

                        "buckets": [

                            {

                                "key": "/ocrmmanage/v1/system/org/path/16",

                                "doc_count": 898

                            },

                            {

                                "key": "/ocrmlz/v1/pc/email/queryorgname",

                                "doc_count": 293

                            }

                        ]

                    },

                    "group_by_userIp": {

                        "doc_count_error_upper_bound": 3,

                        "sum_other_doc_count": 1177,

                        "buckets": [

                            {

                                "key": "10.0.0.182",

                                "doc_count": 1654

                            },

                            {

                                "key": "192.168.236.165",

                                "doc_count": 1208

                            }

                        ]

                    }

                }

            ]

        }

    }

}

四：ES查询时只能查询10000条数据解决方案

总的来说用"track_total_hits": true 会返回真实条数，为false不显示总数量，不设置时最多取10000

参考文档： ES查询时只能查询10000条数据解决方案_es查询超过一万怎么解决-CSDN博客

五： es默认检索数据为1w,那么再查询后聚合的数据也是限制再1w内吗？

不限制

虽然Elasticsearch默认设置中的某些参数（如index.max_result_window）可能暗示了对返回数据数量的限制，但这并不代表聚合分析只能针对10000条数据进行

六：关于doc_count_error_upper_bound和sum_other_doc_count

举个例子：水果种类有100种，当size:20种时只展示前20时，则有其余80种未聚合出来，

则： sum_other_doc_count代表的是其他相关却未展示的文档的数量，像其余80种因为再每个分片取top前20的原因，未取到的后80种水果聚合的文档数量

doc_count_error_upper_bound：则是有可能与前20有关但是没返回的文档数量，表示没有在这次聚合中返回、但是可能存在的潜在聚合结果。比如也可能有另一个种类的水果，或许排在19位置

"aggs": {
"group_by_systemId": {
"terms": {
"field": "水果种类",
"size": 20,
"order": [
{
"_count": "desc"
}
]
}
       }
   }

参考：Elasticsearch核心技术与实战学习笔记_doccounterrorupperbound-CSDN博客

七：query_string加上\" 用的是短语查询还是关键词查询？

用的是短语查询

假设message里有很多信息，我们随便挑一个

"message": "【API网关】【请求错误】【请求地址】【/monitor/health】 \n【响应状态】 : 401 【响应信息】：Full authentication is required to access this resource\n",

因为message是不可能分出关键词："应信息】：Full authentication is required"

{
      "query": {
        "bool": {
          "filter": [
              {
                "term": {
                 "message":"应信息】：Full authentication is required "
    
                }
              },
            {
              "range": {
                "@timestamp": {
                  "gte": "2024-06-12 15:01:01",
                  "lte": "2024-06-12 21:21:01",
                  "time_zone": "Asia/Shanghai",
                  "format": "yyyy-MM-dd HH:mm:ss"
                }
              }
            }
          ]
        }
      }
    }

上述查询出的结果为空

但是如果用下面语法是可以查出结果的：

注意：要用message不能是message.keyword

{
      "query": {
        "bool": {
          "filter": [
              {
                "query_string": {
                  "query": "message:\"应信息】：Full authentication is required \"",
                  "analyze_wildcard": true,
                  "time_zone": "Asia/Shanghai"
                }
              },
            {
              "range": {
                "@timestamp": {
                  "gte": "2024-06-12 15:01:01",
                  "lte": "2024-06-12 21:21:01",
                  "time_zone": "Asia/Shanghai",
                  "format": "yyyy-MM-dd HH:mm:ss"
                }
              }
            }
          ]
        }
      }
    }