package org.apache.pinot.hadoop.job;

import com.google.common.base.Preconditions;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.pinot.hadoop.job.preprocess.DataPreprocessingHelper;
import org.apache.pinot.hadoop.job.preprocess.DataPreprocessingHelperFactory;
import org.apache.pinot.hadoop.utils.PinotHadoopJobPreparationHelper;
import org.apache.pinot.hadoop.utils.preprocess.DataPreprocessingUtils;
import org.apache.pinot.hadoop.utils.preprocess.HadoopUtils;
import org.apache.pinot.ingestion.common.ControllerRestApi;
import org.apache.pinot.ingestion.jobs.SegmentPreprocessingJob;
import org.apache.pinot.spi.config.table.FieldConfig;
import org.apache.pinot.spi.config.table.SegmentPartitionConfig;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.config.table.TableCustomConfig;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.Schema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/pinot/hadoop/job/HadoopSegmentPreprocessingJob.class */
public class HadoopSegmentPreprocessingJob extends SegmentPreprocessingJob {
    private static final Logger LOGGER = LoggerFactory.getLogger(HadoopSegmentPreprocessingJob.class);
    private String _partitionColumn;
    private int _numPartitions;
    private String _partitionFunction;
    private String _partitionColumnDefaultNullValue;
    private String _sortingColumn;
    private FieldSpec.DataType _sortingColumnType;
    private String _sortingColumnDefaultNullValue;
    private int _numOutputFiles;
    private int _maxNumRecordsPerFile;
    private TableConfig _tableConfig;
    private Schema _pinotTableSchema;
    private Set<DataPreprocessingUtils.Operation> _preprocessingOperations;

    public HadoopSegmentPreprocessingJob(Properties properties) {
        super(properties);
    }

    public void run() throws Exception {
        if (!this._enablePreprocessing) {
            LOGGER.info("Pre-processing job is disabled.");
            return;
        }
        LOGGER.info("Starting {}", getClass().getSimpleName());
        setTableConfigAndSchema();
        fetchPreProcessingOperations();
        fetchPartitioningConfig();
        fetchSortingConfig();
        fetchResizingConfig();
        cleanUpPreprocessedOutputs(this._preprocessedOutputDir);
        DataPreprocessingHelper generateDataPreprocessingHelper = DataPreprocessingHelperFactory.generateDataPreprocessingHelper(this._inputSegmentDir, this._preprocessedOutputDir);
        generateDataPreprocessingHelper.registerConfigs(this._tableConfig, this._pinotTableSchema, this._partitionColumn, this._numPartitions, this._partitionFunction, this._partitionColumnDefaultNullValue, this._sortingColumn, this._sortingColumnType, this._sortingColumnDefaultNullValue, this._numOutputFiles, this._maxNumRecordsPerFile);
        Job upJob = generateDataPreprocessingHelper.setUpJob();
        LOGGER.info("HDFS class path: " + this._pathToDependencyJar);
        if (this._pathToDependencyJar != null) {
            LOGGER.info("Copying jars locally.");
            PinotHadoopJobPreparationHelper.addDepsJarToDistributedCacheHelper(HadoopUtils.DEFAULT_FILE_SYSTEM, upJob, this._pathToDependencyJar);
        } else {
            LOGGER.info("Property '{}' not specified.", "path.to.deps.jar");
        }
        long currentTimeMillis = System.currentTimeMillis();
        upJob.waitForCompletion(true);
        if (!upJob.isSuccessful()) {
            throw new RuntimeException("Job failed : " + upJob);
        }
        LOGGER.info("Finished pre-processing job in {}ms", Long.valueOf(System.currentTimeMillis() - currentTimeMillis));
    }

    private void fetchPreProcessingOperations() {
        Map customConfigs;
        this._preprocessingOperations = new HashSet();
        TableCustomConfig customConfig = this._tableConfig.getCustomConfig();
        if (customConfig == null || (customConfigs = customConfig.getCustomConfigs()) == null || customConfigs.isEmpty()) {
            return;
        }
        DataPreprocessingUtils.getOperations(this._preprocessingOperations, (String) customConfigs.getOrDefault(InternalConfigConstants.PREPROCESS_OPERATIONS, ""));
    }

    private void fetchPartitioningConfig() {
        if (!this._preprocessingOperations.contains(DataPreprocessingUtils.Operation.PARTITION)) {
            LOGGER.info("Partitioning is disabled.");
            return;
        }
        SegmentPartitionConfig segmentPartitionConfig = this._tableConfig.getIndexingConfig().getSegmentPartitionConfig();
        if (segmentPartitionConfig == null) {
            LOGGER.info("Segment partition config is null for table: {}", this._tableConfig.getTableName());
            return;
        }
        Map columnPartitionMap = segmentPartitionConfig.getColumnPartitionMap();
        Preconditions.checkArgument(columnPartitionMap.size() <= 1, "There should be at most 1 partition setting in the table.");
        if (columnPartitionMap.size() == 1) {
            this._partitionColumn = (String) columnPartitionMap.keySet().iterator().next();
            this._numPartitions = segmentPartitionConfig.getNumPartitions(this._partitionColumn);
            this._partitionFunction = segmentPartitionConfig.getFunctionName(this._partitionColumn);
            this._partitionColumnDefaultNullValue = this._pinotTableSchema.getFieldSpecFor(this._partitionColumn).getDefaultNullValueString();
        }
    }

    private void fetchSortingConfig() {
        if (!this._preprocessingOperations.contains(DataPreprocessingUtils.Operation.SORT)) {
            LOGGER.info("Sorting is disabled.");
            return;
        }
        ArrayList arrayList = new ArrayList();
        List<FieldConfig> fieldConfigList = this._tableConfig.getFieldConfigList();
        if (fieldConfigList != null && !fieldConfigList.isEmpty()) {
            for (FieldConfig fieldConfig : fieldConfigList) {
                if (fieldConfig.getIndexType() == FieldConfig.IndexType.SORTED) {
                    arrayList.add(fieldConfig.getName());
                }
            }
        }
        if (!arrayList.isEmpty()) {
            Preconditions.checkArgument(arrayList.size() == 1, "There should be at most 1 sorted column in the table.");
            this._sortingColumn = (String) arrayList.get(0);
            return;
        }
        List sortedColumn = this._tableConfig.getIndexingConfig().getSortedColumn();
        if (sortedColumn != null) {
            Preconditions.checkArgument(sortedColumn.size() <= 1, "There should be at most 1 sorted column in the table.");
            if (sortedColumn.size() == 1) {
                this._sortingColumn = (String) sortedColumn.get(0);
                FieldSpec fieldSpecFor = this._pinotTableSchema.getFieldSpecFor(this._sortingColumn);
                Preconditions.checkState(fieldSpecFor != null, "Failed to find sorting column: {} in the schema", this._sortingColumn);
                Preconditions.checkState(fieldSpecFor.isSingleValueField(), "Cannot sort on multi-value column: %s", this._sortingColumn);
                this._sortingColumnType = fieldSpecFor.getDataType();
                Preconditions.checkState(this._sortingColumnType.canBeASortedColumn(), "Cannot sort on %s column: %s", this._sortingColumnType, this._sortingColumn);
                LOGGER.info("Sorting the data with column: {} of type: {}", this._sortingColumn, this._sortingColumnType);
            }
        }
        if (this._sortingColumn != null) {
            this._sortingColumnDefaultNullValue = this._pinotTableSchema.getFieldSpecFor(this._sortingColumn).getDefaultNullValueString();
        }
    }

    private void fetchResizingConfig() {
        int parseInt;
        if (!this._preprocessingOperations.contains(DataPreprocessingUtils.Operation.RESIZE)) {
            LOGGER.info("Resizing is disabled.");
            return;
        }
        TableCustomConfig customConfig = this._tableConfig.getCustomConfig();
        if (customConfig == null) {
            this._numOutputFiles = 0;
            return;
        }
        Map customConfigs = customConfig.getCustomConfigs();
        if (customConfigs == null || !customConfigs.containsKey(InternalConfigConstants.PREPROCESSING_NUM_REDUCERS)) {
            this._numOutputFiles = 0;
        } else {
            this._numOutputFiles = Integer.parseInt((String) customConfigs.get(InternalConfigConstants.PREPROCESSING_NUM_REDUCERS));
            Preconditions.checkState(this._numOutputFiles > 0, String.format("The value of %s should be positive! Current value: %s", InternalConfigConstants.PREPROCESSING_NUM_REDUCERS, Integer.valueOf(this._numOutputFiles)));
        }
        if (customConfigs != null) {
            if (customConfigs.containsKey(InternalConfigConstants.PARTITION_MAX_RECORDS_PER_FILE)) {
                LOGGER.warn("The config: {} from custom config is deprecated. Use {} instead.", InternalConfigConstants.PARTITION_MAX_RECORDS_PER_FILE, InternalConfigConstants.PREPROCESSING_MAX_NUM_RECORDS_PER_FILE);
                parseInt = Integer.parseInt((String) customConfigs.get(InternalConfigConstants.PARTITION_MAX_RECORDS_PER_FILE));
            } else if (!customConfigs.containsKey(InternalConfigConstants.PREPROCESSING_MAX_NUM_RECORDS_PER_FILE)) {
                return;
            } else {
                parseInt = Integer.parseInt((String) customConfigs.get(InternalConfigConstants.PREPROCESSING_MAX_NUM_RECORDS_PER_FILE));
            }
            Preconditions.checkArgument(parseInt > 0, "The value of preprocessing.max.num.records.per.file should be positive. Current value: " + parseInt);
            LOGGER.info("Setting {} to {}", InternalConfigConstants.PREPROCESSING_MAX_NUM_RECORDS_PER_FILE, Integer.valueOf(parseInt));
            this._maxNumRecordsPerFile = parseInt;
        }
    }

    protected Schema getSchema() throws IOException {
        ControllerRestApi controllerRestApi = getControllerRestApi();
        try {
            if (controllerRestApi != null) {
                Schema schema = controllerRestApi.getSchema();
                if (controllerRestApi != null) {
                    controllerRestApi.close();
                }
                return schema;
            }
            FSDataInputStream open = FileSystem.get(this._schemaFile.toUri(), getConf()).open(this._schemaFile);
            try {
                Schema fromInputStream = Schema.fromInputStream(open);
                if (open != null) {
                    open.close();
                }
                if (controllerRestApi != null) {
                    controllerRestApi.close();
                }
                return fromInputStream;
            } finally {
            }
        } catch (Throwable th) {
            if (controllerRestApi != null) {
                try {
                    controllerRestApi.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    protected void addAdditionalJobProperties(Job job) {
    }

    private void setTableConfigAndSchema() throws IOException {
        this._tableConfig = getTableConfig();
        this._pinotTableSchema = getSchema();
        Preconditions.checkState(this._tableConfig != null, "Table config cannot be null.");
        Preconditions.checkState(this._pinotTableSchema != null, "Schema cannot be null");
    }

    public static void cleanUpPreprocessedOutputs(Path path) throws IOException {
        if (HadoopUtils.DEFAULT_FILE_SYSTEM.exists(path)) {
            LOGGER.warn("Found output folder {}, deleting", path);
            HadoopUtils.DEFAULT_FILE_SYSTEM.delete(path, true);
        }
    }
}
