package org.apache.pinot.hadoop.job.preprocess;

import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.pinot.hadoop.job.HadoopSegmentPreprocessingJob;
import org.apache.pinot.hadoop.job.InternalConfigConstants;
import org.apache.pinot.hadoop.job.partitioners.GenericPartitioner;
import org.apache.pinot.hadoop.utils.preprocess.HadoopUtils;
import org.apache.pinot.hadoop.utils.preprocess.TextComparator;
import org.apache.pinot.spi.config.table.SegmentsValidationAndRetentionConfig;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.data.DateTimeFieldSpec;
import org.apache.pinot.spi.data.DateTimeFormatSpec;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.utils.IngestionConfigUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/pinot/hadoop/job/preprocess/DataPreprocessingHelper.class */
public abstract class DataPreprocessingHelper {
    private static final Logger LOGGER = LoggerFactory.getLogger(DataPreprocessingHelper.class);
    String _partitionColumn;
    int _numPartitions;
    String _partitionFunction;
    String _partitionColumnDefaultNullValue;
    String _sortingColumn;
    private FieldSpec.DataType _sortingColumnType;
    String _sortingColumnDefaultNullValue;
    private int _numOutputFiles;
    private int _maxNumRecordsPerFile;
    private TableConfig _tableConfig;
    private Schema _pinotTableSchema;
    List<Path> _inputDataPaths;
    Path _sampleRawDataPath;
    Path _outputPath;

    /* renamed from: org.apache.pinot.hadoop.job.preprocess.DataPreprocessingHelper$1, reason: invalid class name */
    /* loaded from: input_file:org/apache/pinot/hadoop/job/preprocess/DataPreprocessingHelper$1.class */
    static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$org$apache$pinot$spi$data$FieldSpec$DataType = new int[FieldSpec.DataType.values().length];

        static {
            try {
                $SwitchMap$org$apache$pinot$spi$data$FieldSpec$DataType[FieldSpec.DataType.INT.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$apache$pinot$spi$data$FieldSpec$DataType[FieldSpec.DataType.LONG.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$org$apache$pinot$spi$data$FieldSpec$DataType[FieldSpec.DataType.FLOAT.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            try {
                $SwitchMap$org$apache$pinot$spi$data$FieldSpec$DataType[FieldSpec.DataType.DOUBLE.ordinal()] = 4;
            } catch (NoSuchFieldError e4) {
            }
            try {
                $SwitchMap$org$apache$pinot$spi$data$FieldSpec$DataType[FieldSpec.DataType.STRING.ordinal()] = 5;
            } catch (NoSuchFieldError e5) {
            }
        }
    }

    public DataPreprocessingHelper(List<Path> list, Path path) {
        this._inputDataPaths = list;
        this._sampleRawDataPath = list.get(0);
        this._outputPath = path;
    }

    public void registerConfigs(TableConfig tableConfig, Schema schema, String str, int i, String str2, String str3, String str4, FieldSpec.DataType dataType, String str5, int i2, int i3) {
        this._tableConfig = tableConfig;
        this._pinotTableSchema = schema;
        this._partitionColumn = str;
        this._numPartitions = i;
        this._partitionFunction = str2;
        this._partitionColumnDefaultNullValue = str3;
        this._sortingColumn = str4;
        this._sortingColumnType = dataType;
        this._sortingColumnDefaultNullValue = str5;
        this._numOutputFiles = i2;
        this._maxNumRecordsPerFile = i3;
    }

    public Job setUpJob() throws IOException {
        int size;
        LOGGER.info("Initializing a pre-processing job");
        Job job = Job.getInstance(HadoopUtils.DEFAULT_CONFIGURATION);
        Configuration configuration = job.getConfiguration();
        configuration.setInt("mapreduce.job.maps", this._inputDataPaths.size());
        setValidationConfigs(job, this._sampleRawDataPath);
        Iterator<Path> it = this._inputDataPaths.iterator();
        while (it.hasNext()) {
            FileInputFormat.addInputPath(job, it.next());
        }
        setHadoopJobConfigs(job);
        if (this._sortingColumn != null) {
            LOGGER.info("Adding sorting column: {} to job config", this._sortingColumn);
            configuration.set(InternalConfigConstants.SORTING_COLUMN_CONFIG, this._sortingColumn);
            configuration.set(InternalConfigConstants.SORTING_COLUMN_TYPE, this._sortingColumnType.name());
            configuration.set(InternalConfigConstants.SORTING_COLUMN_DEFAULT_NULL_VALUE, this._sortingColumnDefaultNullValue);
            switch (AnonymousClass1.$SwitchMap$org$apache$pinot$spi$data$FieldSpec$DataType[this._sortingColumnType.ordinal()]) {
                case 1:
                    job.setMapOutputKeyClass(IntWritable.class);
                    break;
                case 2:
                    job.setMapOutputKeyClass(LongWritable.class);
                    break;
                case 3:
                    job.setMapOutputKeyClass(FloatWritable.class);
                    break;
                case 4:
                    job.setMapOutputKeyClass(DoubleWritable.class);
                    break;
                case 5:
                    job.setMapOutputKeyClass(Text.class);
                    job.setSortComparatorClass(TextComparator.class);
                    break;
                default:
                    throw new IllegalStateException();
            }
        } else {
            job.setMapOutputKeyClass(NullWritable.class);
        }
        if (this._partitionColumn != null) {
            size = this._numPartitions;
            configuration.set(InternalConfigConstants.ENABLE_PARTITIONING, "true");
            job.setPartitionerClass(GenericPartitioner.class);
            configuration.set(InternalConfigConstants.PARTITION_COLUMN_CONFIG, this._partitionColumn);
            if (this._partitionFunction != null) {
                configuration.set(InternalConfigConstants.PARTITION_FUNCTION_CONFIG, this._partitionFunction);
            }
            configuration.set(InternalConfigConstants.PARTITION_COLUMN_DEFAULT_NULL_VALUE, this._partitionColumnDefaultNullValue);
            configuration.setInt(InternalConfigConstants.NUM_PARTITIONS_CONFIG, size);
        } else {
            size = this._numOutputFiles > 0 ? this._numOutputFiles : this._inputDataPaths.size();
        }
        job.setPartitionerClass(getPartitioner());
        configuration.set(InternalConfigConstants.PREPROCESSING_MAX_NUM_RECORDS_PER_FILE, Integer.toString(this._maxNumRecordsPerFile));
        LOGGER.info("Number of reduce tasks for pre-processing job: {}", Integer.valueOf(size));
        job.setNumReduceTasks(size);
        setUpMapperReducerConfigs(job);
        return job;
    }

    abstract Class<? extends Partitioner> getPartitioner();

    abstract void setUpMapperReducerConfigs(Job job) throws IOException;

    abstract String getSampleTimeColumnValue(String str) throws IOException;

    private void setValidationConfigs(Job job, Path path) throws IOException {
        DateTimeFieldSpec specForTimeColumn;
        SegmentsValidationAndRetentionConfig validationConfig = this._tableConfig.getValidationConfig();
        if (IngestionConfigUtils.getBatchSegmentIngestionType(this._tableConfig).equalsIgnoreCase("APPEND")) {
            job.getConfiguration().set(InternalConfigConstants.IS_APPEND, "true");
            String timeColumnName = validationConfig.getTimeColumnName();
            job.getConfiguration().set(InternalConfigConstants.TIME_COLUMN_CONFIG, timeColumnName);
            if (timeColumnName != null && (specForTimeColumn = this._pinotTableSchema.getSpecForTimeColumn(timeColumnName)) != null) {
                DateTimeFormatSpec formatSpec = specForTimeColumn.getFormatSpec();
                job.getConfiguration().set(InternalConfigConstants.SEGMENT_TIME_TYPE, formatSpec.getColumnUnit().toString());
                job.getConfiguration().set(InternalConfigConstants.SEGMENT_TIME_FORMAT, formatSpec.getTimeFormat().toString());
                if (formatSpec.getSDFPattern() != null) {
                    job.getConfiguration().set(InternalConfigConstants.SEGMENT_TIME_SDF_PATTERN, formatSpec.getSDFPattern());
                }
            }
            job.getConfiguration().set(InternalConfigConstants.SEGMENT_PUSH_FREQUENCY, IngestionConfigUtils.getBatchSegmentIngestionFrequency(this._tableConfig));
            String sampleTimeColumnValue = getSampleTimeColumnValue(timeColumnName);
            if (sampleTimeColumnValue != null) {
                job.getConfiguration().set(InternalConfigConstants.TIME_COLUMN_VALUE, sampleTimeColumnValue);
            }
        }
    }

    private void setHadoopJobConfigs(Job job) {
        job.setJarByClass(HadoopSegmentPreprocessingJob.class);
        job.setJobName(getClass().getName());
        FileOutputFormat.setOutputPath(job, this._outputPath);
        job.getConfiguration().set("mapreduce.job.name", getClass().getName());
        job.getConfiguration().set("mapreduce.job.user.classpath.first", "true");
        job.getConfiguration().set("mapreduce.fileoutputcommitter.marksuccessfuljobs", "false");
        String str = System.getenv("HADOOP_TOKEN_FILE_LOCATION");
        if (str != null) {
            job.getConfiguration().set("mapreduce.job.credentials.binary", str);
        }
    }
}
