/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.orc;
import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Text;
import org.apache.orc.OrcFile.Version;
import org.apache.orc.OrcFile.WriterOptions;
import org.apache.orc.impl.DataReaderProperties;
import org.apache.orc.impl.InStream;
import org.apache.orc.impl.KeyProvider;
import org.apache.orc.impl.MemoryManagerImpl;
import org.apache.orc.impl.OrcCodecPool;
import org.apache.orc.impl.OrcIndex;
import org.apache.orc.impl.ReaderImpl;
import org.apache.orc.impl.RecordReaderImpl;
import org.apache.orc.impl.RecordReaderUtils;
import org.apache.orc.impl.WriterImpl;
import org.apache.orc.impl.reader.ReaderEncryption;
import org.apache.orc.impl.reader.StripePlanner;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.TestInfo;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import org.mockito.Mockito;
import java.io.File;
import java.io.IOException;
import java.math.BigInteger;
import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.sql.Date;
import java.sql.Timestamp;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.TimeZone;
import java.util.UUID;
import java.util.function.IntFunction;
import java.util.stream.Stream;
import static org.apache.orc.impl.ReaderImpl.DEFAULT_COMPRESSION_BLOCK_SIZE;
import static org.apache.orc.impl.mask.SHA256MaskFactory.printHexBinary;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNotSame;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertSame;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;
import static org.junit.jupiter.api.Assumptions.assumeTrue;
/**
* Tests for the vectorized reader and writer for ORC files.
*/
public class TestVectorOrcFile {
private static Stream<Arguments> data() {
return Stream.of(
Arguments.of(Version.V_0_11),
Arguments.of(Version.V_0_12),
Arguments.of(Version.UNSTABLE_PRE_2_0)
);
}
public static String getFileFromClasspath(String name) {
URL url = ClassLoader.getSystemResource(name);
if (url == null) {
throw new IllegalArgumentException("Could not find " + name);
}
return url.getPath();
}
public static class InnerStruct {
int int1;
Text string1 = new Text();
InnerStruct(int int1, Text string1) {
this.int1 = int1;
this.string1.set(string1);
}
InnerStruct(int int1, String string1) {
this.int1 = int1;
this.string1.set(string1);
}
public String toString() {
return "{" + int1 + ", " + string1 + "}";
}
}
public static class MiddleStruct {
List<InnerStruct> list = new ArrayList<InnerStruct>();
MiddleStruct(InnerStruct... items) {
list.clear();
list.addAll(Arrays.asList(items));
}
}
private static InnerStruct inner(int i, String s) {
return new InnerStruct(i, s);
}
private static Map<String, InnerStruct> map(InnerStruct... items) {
Map<String, InnerStruct> result = new HashMap<String, InnerStruct>();
for(InnerStruct i: items) {
result.put(i.string1.toString(), i);
}
return result;
}
private static List<InnerStruct> list(InnerStruct... items) {
List<InnerStruct> result = new ArrayList<InnerStruct>();
result.addAll(Arrays.asList(items));
return result;
}
protected static BytesWritable bytes(int... items) {
BytesWritable result = new BytesWritable();
result.setSize(items.length);
for(int i=0; i < items.length; ++i) {
result.getBytes()[i] = (byte) items[i];
}
return result;
}
protected static byte[] bytesArray(int... items) {
byte[] result = new byte[items.length];
for(int i=0; i < items.length; ++i) {
result[i] = (byte) items[i];
}
return result;
}
private static ByteBuffer byteBuf(int... items) {
ByteBuffer result = ByteBuffer.allocate(items.length);
for(int item: items) {
result.put((byte) item);
}
result.flip();
return result;
}
Path workDir = new Path(System.getProperty("test.tmp.dir",
"target" + File.separator + "test" + File.separator + "tmp"));
Configuration conf;
FileSystem fs;
Path testFilePath;
@BeforeEach
public void openFileSystem(TestInfo testInfo) throws Exception {
conf = new Configuration();
fs = FileSystem.getLocal(conf);
testFilePath = new Path(workDir, "TestVectorOrcFile." +
testInfo.getTestMethod().get().getName().replaceFirst("\\[[0-9]+\\]", "")
+ "." + UUID.randomUUID() + ".orc");
fs.delete(testFilePath, false);
}
@ParameterizedTest
@MethodSource("data")
public void testReadFormat_0_11(Version fileFormat) throws Exception {
assumeTrue(fileFormat == Version.V_0_11);
Path oldFilePath =
new Path(getFileFromClasspath("orc-file-11-format.orc"));
Reader reader = OrcFile.createReader(oldFilePath,
OrcFile.readerOptions(conf).filesystem(fs));
int stripeCount = 0;
int rowCount = 0;
long currentOffset = -1;
for(StripeInformation stripe : reader.getStripes()) {
stripeCount += 1;
rowCount += stripe.getNumberOfRows();
if (currentOffset < 0) {
currentOffset = stripe.getOffset() + stripe.get