Skip to content

Commit be703a0

Browse files
authored
Switch IVF Writer to ES Logger (#129224)
update to use ES logger instead of infostream and fixing native access warnings
1 parent 916cd05 commit be703a0

File tree

9 files changed

+51
-105
lines changed

9 files changed

+51
-105
lines changed

qa/vector/build.gradle

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
* License v3.0 only", or the "Server Side Public License, v 1".
88
*/
99

10+
import org.elasticsearch.gradle.internal.test.TestUtil
11+
1012
apply plugin: 'elasticsearch.java'
1113
apply plugin: 'elasticsearch.build'
1214

@@ -23,6 +25,8 @@ dependencies {
2325
api "org.apache.lucene:lucene-core:${versions.lucene}"
2426
api "org.apache.lucene:lucene-queries:${versions.lucene}"
2527
api "org.apache.lucene:lucene-codecs:${versions.lucene}"
28+
implementation project(':libs:simdvec')
29+
implementation project(':libs:native')
2630
implementation project(':libs:logging')
2731
implementation project(':server')
2832
}
@@ -37,6 +41,7 @@ tasks.register("checkVec", JavaExec) {
3741
// Configure logging to console
3842
systemProperty "es.logger.out", "console"
3943
systemProperty "es.logger.level", "INFO" // Change to DEBUG if needed
44+
systemProperty 'es.nativelibs.path', TestUtil.getTestLibraryPath(file("../../libs/native/libraries/build/platform/").toString())
4045

4146
if (buildParams.getRuntimeJavaVersion().map { it.majorVersion.toInteger() }.get() >= 21) {
4247
jvmArgs '-Xms4g', '-Xmx4g', '--add-modules=jdk.incubator.vector', '--enable-native-access=ALL-UNNAMED', '-Djava.util.concurrent.ForkJoinPool.common.parallelism=8', '-XX:+UnlockDiagnosticVMOptions', '-XX:+DebugNonSafepoints', '-XX:+HeapDumpOnOutOfMemoryError'

qa/vector/src/main/java/module-info.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
requires org.elasticsearch.base;
1212
requires org.elasticsearch.server;
1313
requires org.elasticsearch.xcontent;
14+
requires org.elasticsearch.cli;
1415
requires org.apache.lucene.core;
1516
requires org.apache.lucene.codecs;
1617
requires org.apache.lucene.queries;

qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java

Lines changed: 17 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,19 @@
1515
import org.apache.lucene.codecs.KnnVectorsFormat;
1616
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
1717
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
18+
import org.elasticsearch.cli.ProcessInfo;
1819
import org.elasticsearch.common.Strings;
1920
import org.elasticsearch.common.logging.LogConfigurator;
21+
import org.elasticsearch.common.settings.Settings;
2022
import org.elasticsearch.core.PathUtils;
2123
import org.elasticsearch.index.codec.vectors.ES813Int8FlatVectorFormat;
2224
import org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat;
2325
import org.elasticsearch.index.codec.vectors.IVFVectorsFormat;
2426
import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat;
2527
import org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat;
2628
import org.elasticsearch.logging.Level;
29+
import org.elasticsearch.logging.LogManager;
30+
import org.elasticsearch.logging.Logger;
2731
import org.elasticsearch.xcontent.XContentParser;
2832
import org.elasticsearch.xcontent.XContentParserConfiguration;
2933
import org.elasticsearch.xcontent.XContentType;
@@ -35,19 +39,26 @@
3539
import java.util.ArrayList;
3640
import java.util.List;
3741
import java.util.Locale;
42+
import java.util.Map;
3843

3944
/**
4045
* A utility class to create and test KNN indices using Lucene.
4146
* It supports various index types (HNSW, FLAT, IVF) and configurations.
4247
*/
4348
public class KnnIndexTester {
44-
static final Level LOG_LEVEL = Level.DEBUG;
45-
46-
static final SysOutLogger logger = new SysOutLogger();
49+
static final Logger logger;
4750

4851
static {
4952
LogConfigurator.loadLog4jPlugins();
50-
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
53+
54+
// necessary otherwise the es.logger.level system configuration in build.gradle is ignored
55+
ProcessInfo pinfo = ProcessInfo.fromSystem();
56+
Map<String, String> sysprops = pinfo.sysprops();
57+
String loggerLevel = sysprops.getOrDefault("es.logger.level", Level.INFO.name());
58+
Settings settings = Settings.builder().put("logger.level", loggerLevel).build();
59+
LogConfigurator.configureWithoutConfig(settings);
60+
61+
logger = LogManager.getLogger(KnnIndexTester.class);
5162
}
5263

5364
static final String INDEX_DIR = "target/knn_index";
@@ -163,7 +174,7 @@ public static void main(String[] args) throws Exception {
163174
FormattedResults formattedResults = new FormattedResults();
164175
for (CmdLineArgs cmdLineArgs : cmdLineArgsList) {
165176
Results result = new Results(cmdLineArgs.indexType().name().toLowerCase(Locale.ROOT), cmdLineArgs.numDocs());
166-
System.out.println("Running KNN index tester with arguments: " + cmdLineArgs);
177+
logger.info("Running KNN index tester with arguments: " + cmdLineArgs);
167178
Codec codec = createCodec(cmdLineArgs);
168179
Path indexPath = PathUtils.get(formatIndexPath(cmdLineArgs));
169180
if (cmdLineArgs.reindex() || cmdLineArgs.forceMerge()) {
@@ -195,8 +206,7 @@ public static void main(String[] args) throws Exception {
195206
}
196207
formattedResults.results.add(result);
197208
}
198-
System.out.println("Results:");
199-
System.out.println(formattedResults);
209+
logger.info("Results: \n" + formattedResults);
200210
}
201211

202212
static class FormattedResults {
@@ -326,57 +336,6 @@ static class Results {
326336
}
327337
}
328338

329-
static final class SysOutLogger {
330-
331-
void warn(String message) {
332-
if (LOG_LEVEL.ordinal() >= Level.WARN.ordinal()) {
333-
System.out.println(message);
334-
}
335-
}
336-
337-
void warn(String message, Object... params) {
338-
if (LOG_LEVEL.ordinal() >= Level.WARN.ordinal()) {
339-
System.out.println(String.format(Locale.ROOT, message, params));
340-
}
341-
}
342-
343-
void info(String message) {
344-
if (LOG_LEVEL.ordinal() >= Level.INFO.ordinal()) {
345-
System.out.println(message);
346-
}
347-
}
348-
349-
void info(String message, Object... params) {
350-
if (LOG_LEVEL.ordinal() >= Level.INFO.ordinal()) {
351-
System.out.println(String.format(Locale.ROOT, message, params));
352-
}
353-
}
354-
355-
void debug(String message) {
356-
if (LOG_LEVEL.ordinal() >= Level.DEBUG.ordinal()) {
357-
System.out.println(message);
358-
}
359-
}
360-
361-
void debug(String message, Object... params) {
362-
if (LOG_LEVEL.ordinal() >= Level.DEBUG.ordinal()) {
363-
System.out.println(String.format(Locale.ROOT, message, params));
364-
}
365-
}
366-
367-
void trace(String message) {
368-
if (LOG_LEVEL == Level.TRACE) {
369-
System.out.println(message);
370-
}
371-
}
372-
373-
void trace(String message, Object... params) {
374-
if (LOG_LEVEL == Level.TRACE) {
375-
System.out.println(String.format(Locale.ROOT, message, params));
376-
}
377-
}
378-
}
379-
380339
static final class ThreadDetails {
381340
private static final ThreadMXBean threadBean = (ThreadMXBean) java.lang.management.ManagementFactory.getThreadMXBean();
382341
public final long[] threadIDs;

qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexer.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -117,15 +117,15 @@ public boolean isEnabled(String component) {
117117
}
118118
});
119119
logger.debug(
120-
"KnnIndexer: using codec=%s, vectorEncoding=%s, dim=%d, similarityFunction=%s",
120+
"KnnIndexer: using codec={}, vectorEncoding={}, dim={}, similarityFunction={}",
121121
codec.getName(),
122122
vectorEncoding,
123123
dim,
124124
similarityFunction
125125
);
126126

127127
if (Files.exists(indexPath)) {
128-
logger.debug("KnnIndexer: existing index at %s", indexPath);
128+
logger.debug("KnnIndexer: existing index at {}", indexPath);
129129
} else {
130130
Files.createDirectories(indexPath);
131131
}
@@ -143,7 +143,7 @@ public boolean isEnabled(String component) {
143143
);
144144
}
145145
logger.info(
146-
"docsPathSizeInBytes=%d, dim=%d, vectorEncoding=%s, byteSize=%d",
146+
"docsPathSizeInBytes={}, dim={}, vectorEncoding={}, byteSize={}",
147147
docsPathSizeInBytes,
148148
dim,
149149
vectorEncoding,
@@ -170,7 +170,7 @@ public boolean isEnabled(String component) {
170170
}
171171

172172
long elapsed = System.nanoTime() - start;
173-
logger.debug("Indexing took %d ms for %d docs", TimeUnit.NANOSECONDS.toMillis(elapsed), numDocs);
173+
logger.debug("Indexing took {} ms for {} docs", TimeUnit.NANOSECONDS.toMillis(elapsed), numDocs);
174174
result.indexTimeMS = TimeUnit.NANOSECONDS.toMillis(elapsed);
175175
}
176176

@@ -183,14 +183,14 @@ public boolean isEnabled(String component) {
183183
}
184184
});
185185
iwc.setCodec(codec);
186-
logger.debug("KnnIndexer: forceMerge in %s", indexPath);
186+
logger.debug("KnnIndexer: forceMerge in {}", indexPath);
187187
long startNS = System.nanoTime();
188188
try (IndexWriter iw = new IndexWriter(FSDirectory.open(indexPath), iwc)) {
189189
iw.forceMerge(1);
190190
}
191191
long endNS = System.nanoTime();
192192
long elapsedNSec = (endNS - startNS);
193-
logger.info("forceMerge took %d ms", TimeUnit.NANOSECONDS.toMillis(elapsedNSec));
193+
logger.info("forceMerge took {} ms", TimeUnit.NANOSECONDS.toMillis(elapsedNSec));
194194
results.forceMergeTimeMS = TimeUnit.NANOSECONDS.toMillis(elapsedNSec);
195195
}
196196

qa/vector/src/main/java/org/elasticsearch/test/knn/KnnSearcher.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ void runSearch(KnnIndexTester.Results finalResults) throws IOException {
181181
resultIds[i] = getResultIds(results[i], storedFields);
182182
}
183183
logger.info(
184-
"completed %d searches in %d ms: %d QPS CPU time=%dms",
184+
"completed {} searches in {} ms: {} QPS CPU time={}ms",
185185
numQueryVectors,
186186
elapsed,
187187
(1000L * numQueryVectors) / elapsed,

server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java

Lines changed: 19 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,12 @@
1717
import org.apache.lucene.internal.hppc.IntArrayList;
1818
import org.apache.lucene.store.IndexInput;
1919
import org.apache.lucene.store.IndexOutput;
20-
import org.apache.lucene.util.InfoStream;
2120
import org.apache.lucene.util.VectorUtil;
2221
import org.apache.lucene.util.quantization.OptimizedScalarQuantizer;
2322
import org.elasticsearch.index.codec.vectors.cluster.HierarchicalKMeans;
2423
import org.elasticsearch.index.codec.vectors.cluster.KMeansResult;
24+
import org.elasticsearch.logging.LogManager;
25+
import org.elasticsearch.logging.Logger;
2526
import org.elasticsearch.simdvec.ES91OSQVectorsScorer;
2627

2728
import java.io.IOException;
@@ -31,14 +32,14 @@
3132
import static org.apache.lucene.codecs.lucene102.Lucene102BinaryQuantizedVectorsFormat.INDEX_BITS;
3233
import static org.apache.lucene.util.quantization.OptimizedScalarQuantizer.discretize;
3334
import static org.apache.lucene.util.quantization.OptimizedScalarQuantizer.packAsBinary;
34-
import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.IVF_VECTOR_COMPONENT;
3535

3636
/**
3737
* Default implementation of {@link IVFVectorsWriter}. It uses {@link HierarchicalKMeans} algorithm to
3838
* partition the vector space, and then stores the centroids and posting list in a sequential
3939
* fashion.
4040
*/
4141
public class DefaultIVFVectorsWriter extends IVFVectorsWriter {
42+
private static final Logger logger = LogManager.getLogger(DefaultIVFVectorsWriter.class);
4243

4344
private final int vectorPerCluster;
4445

@@ -53,7 +54,6 @@ long[] buildAndWritePostingsLists(
5354
CentroidSupplier centroidSupplier,
5455
FloatVectorValues floatVectorValues,
5556
IndexOutput postingsOutput,
56-
InfoStream infoStream,
5757
IntArrayList[] assignmentsByCluster
5858
) throws IOException {
5959
// write the posting lists
@@ -79,14 +79,14 @@ long[] buildAndWritePostingsLists(
7979
writePostingList(cluster, postingsOutput, binarizedByteVectorValues);
8080
}
8181

82-
if (infoStream.isEnabled(IVF_VECTOR_COMPONENT)) {
83-
printClusterQualityStatistics(assignmentsByCluster, infoStream);
82+
if (logger.isDebugEnabled()) {
83+
printClusterQualityStatistics(assignmentsByCluster);
8484
}
8585

8686
return offsets;
8787
}
8888

89-
private static void printClusterQualityStatistics(IntArrayList[] clusters, InfoStream infoStream) {
89+
private static void printClusterQualityStatistics(IntArrayList[] clusters) {
9090
float min = Float.MAX_VALUE;
9191
float max = Float.MIN_VALUE;
9292
float mean = 0;
@@ -105,20 +105,14 @@ private static void printClusterQualityStatistics(IntArrayList[] clusters, InfoS
105105
max = Math.max(max, cluster.size());
106106
}
107107
float variance = m2 / (clusters.length - 1);
108-
infoStream.message(
109-
IVF_VECTOR_COMPONENT,
110-
"Centroid count: "
111-
+ clusters.length
112-
+ " min: "
113-
+ min
114-
+ " max: "
115-
+ max
116-
+ " mean: "
117-
+ mean
118-
+ " stdDev: "
119-
+ Math.sqrt(variance)
120-
+ " variance: "
121-
+ variance
108+
logger.debug(
109+
"Centroid count: {} min: {} max: {} mean: {} stdDev: {} variance: {}",
110+
clusters.length,
111+
min,
112+
max,
113+
mean,
114+
Math.sqrt(variance),
115+
variance
122116
);
123117
}
124118

@@ -208,17 +202,16 @@ CentroidAssignments calculateAndWriteCentroids(
208202
float[] globalCentroid
209203
) throws IOException {
210204
// TODO: take advantage of prior generated clusters from mergeState in the future
211-
return calculateAndWriteCentroids(fieldInfo, floatVectorValues, centroidOutput, mergeState.infoStream, globalCentroid, false);
205+
return calculateAndWriteCentroids(fieldInfo, floatVectorValues, centroidOutput, globalCentroid, false);
212206
}
213207

214208
CentroidAssignments calculateAndWriteCentroids(
215209
FieldInfo fieldInfo,
216210
FloatVectorValues floatVectorValues,
217211
IndexOutput centroidOutput,
218-
InfoStream infoStream,
219212
float[] globalCentroid
220213
) throws IOException {
221-
return calculateAndWriteCentroids(fieldInfo, floatVectorValues, centroidOutput, infoStream, globalCentroid, true);
214+
return calculateAndWriteCentroids(fieldInfo, floatVectorValues, centroidOutput, globalCentroid, true);
222215
}
223216

224217
/**
@@ -228,7 +221,6 @@ CentroidAssignments calculateAndWriteCentroids(
228221
* @param fieldInfo merging field info
229222
* @param floatVectorValues the float vector values to merge
230223
* @param centroidOutput the centroid output
231-
* @param infoStream the merge state
232224
* @param globalCentroid the global centroid, calculated by this method and used to quantize the centroids
233225
* @param cacheCentroids whether the centroids are kept or discarded once computed
234226
* @return the vector assignments, soar assignments, and if asked the centroids themselves that were computed
@@ -238,7 +230,6 @@ CentroidAssignments calculateAndWriteCentroids(
238230
FieldInfo fieldInfo,
239231
FloatVectorValues floatVectorValues,
240232
IndexOutput centroidOutput,
241-
InfoStream infoStream,
242233
float[] globalCentroid,
243234
boolean cacheCentroids
244235
) throws IOException {
@@ -266,12 +257,9 @@ CentroidAssignments calculateAndWriteCentroids(
266257
// write centroids
267258
writeCentroids(centroids, fieldInfo, globalCentroid, centroidOutput);
268259

269-
if (infoStream.isEnabled(IVF_VECTOR_COMPONENT)) {
270-
infoStream.message(
271-
IVF_VECTOR_COMPONENT,
272-
"calculate centroids and assign vectors time ms: " + ((System.nanoTime() - nanoTime) / 1000000.0)
273-
);
274-
infoStream.message(IVF_VECTOR_COMPONENT, "final centroid count: " + centroids.length);
260+
if (logger.isDebugEnabled()) {
261+
logger.debug("calculate centroids and assign vectors time ms: {}", (System.nanoTime() - nanoTime) / 1000000.0);
262+
logger.debug("final centroid count: {}", centroids.length);
275263
}
276264

277265
IntArrayList[] assignmentsByCluster = new IntArrayList[centroids.length];

server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,8 +242,8 @@ static final class ESFlatVectorsScorer implements FlatVectorsScorer {
242242
final FlatVectorsScorer delegate;
243243
final VectorScorerFactory factory;
244244

245-
ESFlatVectorsScorer(FlatVectorsScorer delegte) {
246-
this.delegate = delegte;
245+
ESFlatVectorsScorer(FlatVectorsScorer delegate) {
246+
this.delegate = delegate;
247247
factory = VectorScorerFactory.instance().orElse(null);
248248
}
249249

server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormat.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
*/
4646
public class IVFVectorsFormat extends KnnVectorsFormat {
4747

48-
public static final String IVF_VECTOR_COMPONENT = "IVF";
4948
public static final String NAME = "IVFVectorsFormat";
5049
// centroid ordinals -> centroid values, offsets
5150
public static final String CENTROID_EXTENSION = "cenivf";

0 commit comments

Comments
 (0)