（五）elasticsearch 源码之查询流程分析

最新推荐文章于 2025-06-23 14:57:56 发布

大叶子不小

最新推荐文章于 2025-06-23 14:57:56 发布

阅读量1.4k

点赞数 8

CC 4.0 BY-SA版权

文章标签： elasticsearch jenkins 大数据

本文链接：https://blog.csdn.net/qq_32907195/article/details/136044397

https://www.cnblogs.com/darcy-yuan/p/17039526.html

1.概述

上文我们讨论了es（elasticsearch，下同）索引流程，本文讨论es查询流程，以下是基本流程图

2.查询流程

为了方便调试代码，笔者在电脑上启动了了两个节点，创建了一个索引如下，该索引有两个分片，没有复制分片

使用postman发送如下请求：

接下来，我们看代码（本系列文章源代码版本为7.4.0），search查询也是rest请求

// org.elasticsearch.action.support.TransportAction        

        public void proceed(Task task, String actionName, Request request, ActionListener<Response> listener) {
            int i = index.getAndIncrement();
            try {
                 if (i < this.action.filters.length) {
                    this.action.filters[i].apply(task, actionName, request, listener, this); // 先处理过滤器
                   } else if (i == this.action.filters.length) {
                      this.action.doExecute(task, request, listener); // 执行action操作
                } else {
                    listener.onFailure(new IllegalStateException("proceed was called too many times"));
                }
            } catch(Exception e) {
                logger.trace("Error during transport action execution.", e);
                listener.onFailure(e);
            }
        }

具体执行操作的是 TransportSearchAction，TransportSearchAction 对查询索引的顺序做了一些优化，我们这里用的是 QUERY_THEN_FETCH

// org.elasticsearch.action.search.TransportSearchAction    

    protected void doExecute(Task task, SearchRequest searchRequest, ActionListener<SearchResponse> listener) {
        final long relativeStartNanos = System.nanoTime();
        final SearchTimeProvider timeProvider =
             new SearchTimeProvider(searchRequest.getOrCreateAbsoluteStartMillis(), relativeStartNanos, System::nanoTime);
        ActionListener<SearchSourceBuilder> rewriteListener = ActionListener.wrap(source -> {
            if (source != searchRequest.source()) {
                // only set it if it changed - we don't allow null values to be set but it might be already null. this way we catch
                // situations when source is rewritten to null due to a bug
                searchRequest.source(source);
            }
            final ClusterState clusterState = clusterService.state();
            final Map<String, OriginalIndices> remoteClusterIndices = remoteClusterService.groupIndices(searchRequest.indicesOptions(),
                searchRequest.indices(), idx -> indexNameExpressionResolver.hasIndexOrAlias(idx, clusterState));
            OriginalIndices localIndices = remoteClusterIndices.remove(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY);
            if (remoteClusterIndices.isEmpty()) {
                executeLocalSearch(task, timeProvider, searchRequest, localIndices, clusterState, listener); // 查询当前节点
            } else {
                if (shouldMinimizeRoundtrips(searchRequest)) { // 使用了折叠
                    ccsRemoteReduce(searchRequest, localIndices, remoteClusterIndices, timeProvider, searchService::createReduceContext,
                        remoteClusterService, threadPool, listener,
                        (r, l) -> executeLocalSearch(task, timeProvider, r, localIndices, clusterState, l));
                } else {
                    AtomicInteger skippedClusters = new AtomicInteger(0);
                    collectSearchShards(searchRequest.indicesOptions(), searchRequest.preference(), searchRequest.routing(),
                        skippedClusters, remoteClusterIndices, remoteClusterService, threadPool,
                        ActionListener.wrap(
                            searchShardsResponses -> {
                                List<SearchShardIterator> remoteShardIterators = new ArrayList<>();
                                Map<String, AliasFilter> remoteAliasFilters = new HashMap<>();
                                BiFunction<String, String, DiscoveryNode> clusterNodeLookup = processRemoteShards(
                                    searchShardsResponses, remoteClusterIndices, remoteShardIterators, remoteAliasFilters);
                                int localClusters = localIndices == null ? 0 : 1;
                                int totalClusters = remoteClusterIndices.size() + localClusters;
                                int successfulClusters = searchShardsResponses.size() + localClusters;
                                executeSearch((SearchTask) task, timeProvider, searchRequest, localIndices,
                                    remoteShardIterators, clusterNodeLookup, clusterState, remoteAliasFilters, listener,
                                    new SearchResponse.Clusters(totalClusters, successfulClusters, skippedClusters.get()));
                            },
                            listener::onFailure));
                }
            }
        }, listener::onFailure);
        if (searchRequest.source() == null) {
            rewriteListener.onResponse(searchRequest.source());
        } else {
            Rewriteable.rewriteAndFetch(searchRequest.source(), searchService.getRewriteContext(timeProvider::getAbsoluteStartMillis),
                rewriteListener); // 重写后回调
        }
    }

...
    private void executeSearch(SearchTask task, SearchTimeProvider timeProvider, SearchRequest searchRequest,
                               OriginalIndices localIndices, List<SearchShardIterator> remoteShardIterators,
                               BiFunction<String, String, DiscoveryNode> remoteConnections, ClusterState clusterState,
                               Map<String, AliasFilter> remoteAliasMap, ActionListener<SearchResponse> listener,
                               SearchResponse.Clusters clusters) {

        clusterState.blocks().globalBlockedRaiseException(ClusterBlockLevel.READ); // 读锁
        // TODO: I think startTime() should become part of ActionRequest and that should be used both for index name
        // date math expressions and $now in scripts. This way all apis will deal with now in the same way instead
        // of just for the _search api
        final Index[] indices = resolveLocalIndices(localIndices, searchRequest.indicesOptions(), clusterState, timeProvider);
        Map<String, AliasFilter> aliasFilter = buildPerIndexAliasFilter(searchRequest, clusterState, indices, remoteAliasMap);
        Map<String, Set<String>> routingMap = indexNameExpressionResolver.resolveSearchRouting(clusterState, searchRequest.routing(),
            searchRequest.indices());
        routingMap = routingMap == null ? Collections.emptyMap() : Collections.unmodifiableMap(routingMap);
        Map<String, Float> concreteIndexBoosts = resolveIndexBoosts(searchRequest, clusterState);

        if (shouldSplitIndices(searchRequest)) { // 分开查询只读索引和在写索引，并且优先查在写索引
            //Execute two separate searches when we can, so that indices that are being written to are searched as quickly as possible.
            //Otherwise their search context would need to stay open for too long between the query and the fetch phase, due to other
            //indices (possibly slower) being searched at the same time.
            List<String> writeIndicesList = new ArrayList<>();
            List<String> readOnlyIndicesList = new ArrayList<>();
            splitIndices(indices, clusterState, writeIndicesList, readOnlyIndicesList);
            String[] writeIndices = writeIndicesList.toArray(new String[0]);
            String[] readOnlyIndices = readOnlyIndicesList.toArray(new String[0]);

            if (readOnlyIndices.length == 0) {
                executeSearch(task, timeProvider, searchRequest, localIndices, writeIndices, routingMap,
                    aliasFilter, concreteIndexBoosts, remoteShardIterators, remoteConnections, clusterState, listener, clusters);
            } else if (writeIndices.length == 0 && remoteShardIterators.isEmpty()) {
                executeSearch(task, timeProvider, searchRequest, localIndices, readOnlyIndices, routingMap,
                    aliasFilter, concreteIndexBoosts, remoteShardIterators, remoteConne

最低0.47元/天解锁文章

200万优质内容无限畅学