package org.apache.pinot.hadoop.job.preprocess;

import com.google.common.base.Preconditions;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
import org.apache.orc.OrcConf;
import org.apache.orc.OrcFile;
import org.apache.orc.Reader;
import org.apache.orc.RecordReader;
import org.apache.orc.TypeDescription;
import org.apache.orc.mapred.OrcStruct;
import org.apache.orc.mapred.OrcValue;
import org.apache.orc.mapreduce.OrcInputFormat;
import org.apache.orc.mapreduce.OrcOutputFormat;
import org.apache.pinot.hadoop.job.mappers.OrcDataPreprocessingMapper;
import org.apache.pinot.hadoop.job.partitioners.OrcDataPreprocessingPartitioner;
import org.apache.pinot.hadoop.job.reducers.OrcDataPreprocessingReducer;
import org.apache.pinot.hadoop.utils.preprocess.HadoopUtils;
import org.apache.pinot.segment.spi.partition.PartitionFunctionFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/pinot/hadoop/job/preprocess/OrcDataPreprocessingHelper.class */
public class OrcDataPreprocessingHelper extends DataPreprocessingHelper {
    private static final Logger LOGGER = LoggerFactory.getLogger(OrcDataPreprocessingHelper.class);

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: org.apache.pinot.hadoop.job.preprocess.OrcDataPreprocessingHelper$1, reason: invalid class name */
    /* loaded from: input_file:org/apache/pinot/hadoop/job/preprocess/OrcDataPreprocessingHelper$1.class */
    public static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$org$apache$orc$TypeDescription$Category = new int[TypeDescription.Category.values().length];

        static {
            try {
                $SwitchMap$org$apache$orc$TypeDescription$Category[TypeDescription.Category.BOOLEAN.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$apache$orc$TypeDescription$Category[TypeDescription.Category.BYTE.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$org$apache$orc$TypeDescription$Category[TypeDescription.Category.SHORT.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            try {
                $SwitchMap$org$apache$orc$TypeDescription$Category[TypeDescription.Category.INT.ordinal()] = 4;
            } catch (NoSuchFieldError e4) {
            }
            try {
                $SwitchMap$org$apache$orc$TypeDescription$Category[TypeDescription.Category.LONG.ordinal()] = 5;
            } catch (NoSuchFieldError e5) {
            }
            try {
                $SwitchMap$org$apache$orc$TypeDescription$Category[TypeDescription.Category.DATE.ordinal()] = 6;
            } catch (NoSuchFieldError e6) {
            }
            try {
                $SwitchMap$org$apache$orc$TypeDescription$Category[TypeDescription.Category.TIMESTAMP.ordinal()] = 7;
            } catch (NoSuchFieldError e7) {
            }
            try {
                $SwitchMap$org$apache$orc$TypeDescription$Category[TypeDescription.Category.FLOAT.ordinal()] = 8;
            } catch (NoSuchFieldError e8) {
            }
            try {
                $SwitchMap$org$apache$orc$TypeDescription$Category[TypeDescription.Category.DOUBLE.ordinal()] = 9;
            } catch (NoSuchFieldError e9) {
            }
            try {
                $SwitchMap$org$apache$orc$TypeDescription$Category[TypeDescription.Category.STRING.ordinal()] = 10;
            } catch (NoSuchFieldError e10) {
            }
            try {
                $SwitchMap$org$apache$orc$TypeDescription$Category[TypeDescription.Category.VARCHAR.ordinal()] = 11;
            } catch (NoSuchFieldError e11) {
            }
            try {
                $SwitchMap$org$apache$orc$TypeDescription$Category[TypeDescription.Category.CHAR.ordinal()] = 12;
            } catch (NoSuchFieldError e12) {
            }
            try {
                $SwitchMap$org$apache$orc$TypeDescription$Category[TypeDescription.Category.BINARY.ordinal()] = 13;
            } catch (NoSuchFieldError e13) {
            }
        }
    }

    public OrcDataPreprocessingHelper(List<Path> list, Path path) {
        super(list, path);
    }

    @Override // org.apache.pinot.hadoop.job.preprocess.DataPreprocessingHelper
    Class<? extends Partitioner> getPartitioner() {
        return OrcDataPreprocessingPartitioner.class;
    }

    @Override // org.apache.pinot.hadoop.job.preprocess.DataPreprocessingHelper
    void setUpMapperReducerConfigs(Job job) {
        TypeDescription orcSchema = getOrcSchema(this._sampleRawDataPath);
        String typeDescription = orcSchema.toString();
        LOGGER.info("Orc schema is: {}", typeDescription);
        validateConfigsAgainstSchema(orcSchema);
        job.setInputFormatClass(OrcInputFormat.class);
        job.setMapperClass(OrcDataPreprocessingMapper.class);
        job.setMapOutputValueClass(OrcValue.class);
        Configuration configuration = job.getConfiguration();
        OrcConf.MAPRED_SHUFFLE_VALUE_SCHEMA.setString(configuration, typeDescription);
        job.setReducerClass(OrcDataPreprocessingReducer.class);
        LazyOutputFormat.setOutputFormatClass(job, OrcOutputFormat.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(OrcStruct.class);
        OrcConf.MAPRED_OUTPUT_SCHEMA.setString(configuration, typeDescription);
    }

    @Override // org.apache.pinot.hadoop.job.preprocess.DataPreprocessingHelper
    String getSampleTimeColumnValue(String str) throws IOException {
        Reader createReader = OrcFile.createReader(this._sampleRawDataPath, OrcFile.readerOptions(HadoopUtils.DEFAULT_CONFIGURATION));
        try {
            Reader.Options options = new Reader.Options();
            options.range(0L, 1L);
            RecordReader rows = createReader.rows(options);
            TypeDescription schema = createReader.getSchema();
            VectorizedRowBatch createRowBatch = schema.createRowBatch();
            if (rows.nextBatch(createRowBatch)) {
                List fieldNames = schema.getFieldNames();
                int size = fieldNames.size();
                for (int i = 0; i < size; i++) {
                    String str2 = (String) fieldNames.get(i);
                    if (str.equals(str2)) {
                        String value = getValue(str2, createRowBatch.cols[i], ((TypeDescription) schema.getChildren().get(i)).getCategory());
                        if (createReader != null) {
                            createReader.close();
                        }
                        return value;
                    }
                }
            }
            if (createReader == null) {
                return null;
            }
            createReader.close();
            return null;
        } catch (Throwable th) {
            if (createReader != null) {
                try {
                    createReader.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    private String getValue(String str, ColumnVector columnVector, TypeDescription.Category category) {
        switch (AnonymousClass1.$SwitchMap$org$apache$orc$TypeDescription$Category[category.ordinal()]) {
            case 1:
                LongColumnVector longColumnVector = (LongColumnVector) columnVector;
                if (longColumnVector.noNulls || !longColumnVector.isNull[0]) {
                    return Boolean.toString(longColumnVector.vector[0] == 1);
                }
                return null;
            case 2:
            case 3:
            case 4:
                LongColumnVector longColumnVector2 = (LongColumnVector) columnVector;
                if (longColumnVector2.noNulls || !longColumnVector2.isNull[0]) {
                    return Integer.toString((int) longColumnVector2.vector[0]);
                }
                return null;
            case 5:
            case 6:
                LongColumnVector longColumnVector3 = (LongColumnVector) columnVector;
                if (longColumnVector3.noNulls || !longColumnVector3.isNull[0]) {
                    return Long.toString(longColumnVector3.vector[0]);
                }
                return null;
            case 7:
                TimestampColumnVector timestampColumnVector = (TimestampColumnVector) columnVector;
                if (timestampColumnVector.noNulls || !timestampColumnVector.isNull[0]) {
                    return Long.toString(timestampColumnVector.time[0]);
                }
                return null;
            case 8:
                DoubleColumnVector doubleColumnVector = (DoubleColumnVector) columnVector;
                if (doubleColumnVector.noNulls || !doubleColumnVector.isNull[0]) {
                    return Float.toString((float) doubleColumnVector.vector[0]);
                }
                return null;
            case 9:
                DoubleColumnVector doubleColumnVector2 = (DoubleColumnVector) columnVector;
                if (doubleColumnVector2.noNulls || !doubleColumnVector2.isNull[0]) {
                    return Double.toString(doubleColumnVector2.vector[0]);
                }
                return null;
            case 10:
            case 11:
            case 12:
                BytesColumnVector bytesColumnVector = (BytesColumnVector) columnVector;
                if (bytesColumnVector.noNulls || !bytesColumnVector.isNull[0]) {
                    return new String(bytesColumnVector.vector[0], bytesColumnVector.start[0], bytesColumnVector.length[0], StandardCharsets.UTF_8);
                }
                return null;
            case 13:
                BytesColumnVector bytesColumnVector2 = (BytesColumnVector) columnVector;
                if (!bytesColumnVector2.noNulls && bytesColumnVector2.isNull[0]) {
                    return null;
                }
                int i = bytesColumnVector2.length[0];
                byte[] bArr = new byte[i];
                System.arraycopy(bytesColumnVector2.vector[0], bytesColumnVector2.start[0], bArr, 0, i);
                return new String(bArr, StandardCharsets.UTF_8);
            default:
                throw new IllegalStateException("Unsupported field type: " + category + " for field: " + str);
        }
    }

    private TypeDescription getOrcSchema(Path path) {
        try {
            Reader createReader = OrcFile.createReader(path, OrcFile.readerOptions(HadoopUtils.DEFAULT_CONFIGURATION));
            try {
                TypeDescription schema = createReader.getSchema();
                if (createReader != null) {
                    createReader.close();
                }
                return schema;
            } finally {
            }
        } catch (Exception e) {
            throw new IllegalStateException("Caught exception while extracting ORC schema from file: " + path, e);
        }
    }

    private void validateConfigsAgainstSchema(TypeDescription typeDescription) {
        List fieldNames = typeDescription.getFieldNames();
        if (this._partitionColumn != null) {
            Preconditions.checkArgument(fieldNames.contains(this._partitionColumn), String.format("Partition column: %s is not found from the schema of input files.", this._partitionColumn));
            Preconditions.checkArgument(this._numPartitions > 0, String.format("Number of partitions should be positive. Current value: %s", Integer.valueOf(this._numPartitions)));
            Preconditions.checkArgument(this._partitionFunction != null, "Partition function should not be null!");
            try {
                PartitionFunctionFactory.PartitionFunctionType.fromString(this._partitionFunction);
            } catch (IllegalArgumentException e) {
                LOGGER.error("Partition function needs to be one of Modulo, Murmur, ByteArray, HashCode, it is currently {}", this._partitionColumn);
                throw new IllegalArgumentException(e);
            }
        }
        if (this._sortingColumn != null) {
            Preconditions.checkArgument(fieldNames.contains(this._sortingColumn), String.format("Sorted column: %s is not found from the schema of input files.", this._sortingColumn));
        }
    }
}
