package org.apache.pinot.segment.local.segment.creator.impl;

import com.google.common.base.Preconditions;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.io.FileUtils;
import org.apache.pinot.segment.local.recordtransformer.ComplexTypeTransformer;
import org.apache.pinot.segment.local.recordtransformer.RecordTransformer;
import org.apache.pinot.segment.local.segment.creator.RecordReaderSegmentCreationDataSource;
import org.apache.pinot.segment.local.segment.creator.TransformPipeline;
import org.apache.pinot.segment.local.segment.index.converter.SegmentFormatConverterFactory;
import org.apache.pinot.segment.local.segment.index.dictionary.DictionaryIndexType;
import org.apache.pinot.segment.local.segment.index.loader.IndexLoadingConfig;
import org.apache.pinot.segment.local.segment.readers.PinotSegmentRecordReader;
import org.apache.pinot.segment.local.startree.v2.builder.MultipleTreesBuilder;
import org.apache.pinot.segment.local.utils.CrcUtils;
import org.apache.pinot.segment.local.utils.IngestionUtils;
import org.apache.pinot.segment.spi.IndexSegment;
import org.apache.pinot.segment.spi.creator.ColumnIndexCreationInfo;
import org.apache.pinot.segment.spi.creator.ColumnStatistics;
import org.apache.pinot.segment.spi.creator.SegmentCreationDataSource;
import org.apache.pinot.segment.spi.creator.SegmentCreator;
import org.apache.pinot.segment.spi.creator.SegmentGeneratorConfig;
import org.apache.pinot.segment.spi.creator.SegmentIndexCreationDriver;
import org.apache.pinot.segment.spi.creator.SegmentPreIndexStatsContainer;
import org.apache.pinot.segment.spi.creator.SegmentVersion;
import org.apache.pinot.segment.spi.creator.StatsCollectorConfig;
import org.apache.pinot.segment.spi.index.IndexService;
import org.apache.pinot.segment.spi.index.IndexType;
import org.apache.pinot.segment.spi.index.creator.SegmentIndexCreationInfo;
import org.apache.pinot.segment.spi.loader.SegmentDirectoryLoaderContext;
import org.apache.pinot.segment.spi.loader.SegmentDirectoryLoaderRegistry;
import org.apache.pinot.segment.spi.store.SegmentDirectory;
import org.apache.pinot.segment.spi.store.SegmentDirectoryPaths;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.IngestionSchemaValidator;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.data.SchemaValidatorFactory;
import org.apache.pinot.spi.data.readers.FileFormat;
import org.apache.pinot.spi.data.readers.GenericRow;
import org.apache.pinot.spi.data.readers.RecordReader;
import org.apache.pinot.spi.data.readers.RecordReaderFactory;
import org.apache.pinot.spi.env.PinotConfiguration;
import org.apache.pinot.spi.recordenricher.RecordEnricherPipeline;
import org.apache.pinot.spi.utils.ByteArray;
import org.apache.pinot.spi.utils.ReadMode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/pinot/segment/local/segment/creator/impl/SegmentIndexCreationDriverImpl.class */
public class SegmentIndexCreationDriverImpl implements SegmentIndexCreationDriver {
    private static final Logger LOGGER = LoggerFactory.getLogger(SegmentIndexCreationDriverImpl.class);
    private SegmentGeneratorConfig _config;
    private RecordReader _recordReader;
    private SegmentPreIndexStatsContainer _segmentStats;
    private TreeMap<String, ColumnIndexCreationInfo> _indexCreationInfoMap;
    private SegmentCreator _indexCreator;
    private SegmentIndexCreationInfo _segmentIndexCreationInfo;
    private Schema _dataSchema;
    private RecordEnricherPipeline _recordEnricherPipeline;
    private TransformPipeline _transformPipeline;
    private IngestionSchemaValidator _ingestionSchemaValidator;
    private File _tempIndexDir;
    private String _segmentName;
    private boolean _continueOnError;
    private int _totalDocs = 0;
    private long _totalRecordReadTimeNs = 0;
    private long _totalIndexTime = 0;
    private long _totalStatsCollectorTime = 0;

    public void init(SegmentGeneratorConfig segmentGeneratorConfig) throws Exception {
        init(segmentGeneratorConfig, getRecordReader(segmentGeneratorConfig));
    }

    private RecordReader getRecordReader(SegmentGeneratorConfig segmentGeneratorConfig) throws Exception {
        File file = new File(segmentGeneratorConfig.getInputFilePath());
        Preconditions.checkState(file.exists(), "Input file: " + file.getAbsolutePath() + " does not exist");
        Schema schema = segmentGeneratorConfig.getSchema();
        TableConfig tableConfig = segmentGeneratorConfig.getTableConfig();
        FileFormat format = segmentGeneratorConfig.getFormat();
        String recordReaderPath = segmentGeneratorConfig.getRecordReaderPath();
        Set<String> fieldsForRecordExtractor = IngestionUtils.getFieldsForRecordExtractor(tableConfig.getIngestionConfig(), segmentGeneratorConfig.getSchema());
        if (recordReaderPath == null) {
            return format == FileFormat.PINOT ? new PinotSegmentRecordReader(file, schema, segmentGeneratorConfig.getColumnSortOrder()) : RecordReaderFactory.getRecordReader(format, file, fieldsForRecordExtractor, segmentGeneratorConfig.getReaderConfig());
        }
        if (format != FileFormat.OTHER) {
            LOGGER.warn("Using class: {} to read segment, ignoring configured file format: {}", recordReaderPath, format);
        }
        return RecordReaderFactory.getRecordReaderByClass(recordReaderPath, file, fieldsForRecordExtractor, segmentGeneratorConfig.getReaderConfig());
    }

    public RecordReader getRecordReader() {
        return this._recordReader;
    }

    public void init(SegmentGeneratorConfig segmentGeneratorConfig, RecordReader recordReader) throws Exception {
        init(segmentGeneratorConfig, new RecordReaderSegmentCreationDataSource(recordReader), RecordEnricherPipeline.fromTableConfig(segmentGeneratorConfig.getTableConfig()), new TransformPipeline(segmentGeneratorConfig.getTableConfig(), segmentGeneratorConfig.getSchema()));
    }

    @Deprecated
    public void init(SegmentGeneratorConfig segmentGeneratorConfig, SegmentCreationDataSource segmentCreationDataSource, RecordTransformer recordTransformer, @Nullable ComplexTypeTransformer complexTypeTransformer) throws Exception {
        init(segmentGeneratorConfig, segmentCreationDataSource, RecordEnricherPipeline.fromTableConfig(segmentGeneratorConfig.getTableConfig()), new TransformPipeline(recordTransformer, complexTypeTransformer));
    }

    public void init(SegmentGeneratorConfig segmentGeneratorConfig, SegmentCreationDataSource segmentCreationDataSource, RecordEnricherPipeline recordEnricherPipeline, TransformPipeline transformPipeline) throws Exception {
        this._config = segmentGeneratorConfig;
        this._recordReader = segmentCreationDataSource.getRecordReader();
        this._dataSchema = segmentGeneratorConfig.getSchema();
        this._continueOnError = segmentGeneratorConfig.isContinueOnError();
        if (segmentGeneratorConfig.isFailOnEmptySegment()) {
            Preconditions.checkState(this._recordReader.hasNext(), "No record in data source");
        }
        this._recordEnricherPipeline = recordEnricherPipeline;
        this._transformPipeline = transformPipeline;
        if (segmentCreationDataSource instanceof RecordReaderSegmentCreationDataSource) {
            ((RecordReaderSegmentCreationDataSource) segmentCreationDataSource).setRecordEnricherPipeline(recordEnricherPipeline);
            ((RecordReaderSegmentCreationDataSource) segmentCreationDataSource).setTransformPipeline(transformPipeline);
        }
        this._segmentStats = segmentCreationDataSource.gatherStats(new StatsCollectorConfig(segmentGeneratorConfig.getTableConfig(), this._dataSchema, segmentGeneratorConfig.getSegmentPartitionConfig()));
        this._totalDocs = this._segmentStats.getTotalDocCount();
        this._segmentIndexCreationInfo = new SegmentIndexCreationInfo();
        this._indexCreationInfoMap = new TreeMap<>();
        this._indexCreator = new SegmentColumnarIndexCreator();
        File file = new File(segmentGeneratorConfig.getOutDir());
        if (!file.exists()) {
            file.mkdirs();
        }
        this._ingestionSchemaValidator = SchemaValidatorFactory.getSchemaValidator(this._dataSchema, this._recordReader.getClass().getName(), segmentGeneratorConfig.getInputFilePath());
        this._tempIndexDir = new File(file, "tmp-" + UUID.randomUUID());
        LOGGER.debug("tempIndexDir:{}", this._tempIndexDir);
    }

    public void build() throws Exception {
        LOGGER.debug("Start building StatsCollector!");
        buildIndexCreationInfo();
        LOGGER.info("Finished building StatsCollector!");
        LOGGER.info("Collected stats for {} documents", Integer.valueOf(this._totalDocs));
        int i = 0;
        try {
            try {
                this._indexCreator.init(this._config, this._segmentIndexCreationInfo, this._indexCreationInfoMap, this._dataSchema, this._tempIndexDir);
                this._recordReader.rewind();
                LOGGER.info("Start building IndexCreator!");
                GenericRow genericRow = new GenericRow();
                TransformPipeline.Result result = new TransformPipeline.Result();
                while (this._recordReader.hasNext()) {
                    System.nanoTime();
                    genericRow.clear();
                    try {
                        System.nanoTime();
                        GenericRow next = this._recordReader.next(genericRow);
                        long nanoTime = System.nanoTime();
                        this._recordEnricherPipeline.run(next);
                        this._transformPipeline.processRow(next, result);
                        long nanoTime2 = System.nanoTime();
                        this._totalRecordReadTimeNs += nanoTime2 - nanoTime;
                        Iterator<GenericRow> it = result.getTransformedRows().iterator();
                        while (it.hasNext()) {
                            this._indexCreator.indexRow(it.next());
                        }
                        this._totalIndexTime += System.currentTimeMillis() - nanoTime2;
                        i += result.getIncompleteRowCount();
                    } catch (Exception e) {
                        if (!this._continueOnError) {
                            throw new RuntimeException("Error occurred while reading row during indexing", e);
                        }
                        i++;
                        LOGGER.debug("Error occurred while reading row during indexing", e);
                    }
                }
                if (i > 0) {
                    LOGGER.warn("Incomplete data found for {} records. This can be due to error during reader or transformations", Integer.valueOf(i));
                }
                LOGGER.info("Finished records indexing in IndexCreator!");
                handlePostCreation();
            } catch (Exception e2) {
                this._indexCreator.close();
                throw e2;
            }
        } finally {
            this._recordReader.close();
        }
    }

    public void buildByColumn(IndexSegment indexSegment) throws Exception {
        LOGGER.debug("Start building StatsCollector!");
        buildIndexCreationInfo();
        LOGGER.info("Finished building StatsCollector!");
        LOGGER.info("Collected stats for {} documents", Integer.valueOf(this._totalDocs));
        try {
            try {
                this._indexCreator.init(this._config, this._segmentIndexCreationInfo, this._indexCreationInfoMap, this._dataSchema, this._tempIndexDir);
                LOGGER.info("Start building Index by column");
                TreeSet physicalColumnNames = this._dataSchema.getPhysicalColumnNames();
                int[] sortedDocIds = ((PinotSegmentRecordReader) this._recordReader).getSortedDocIds();
                Iterator it = physicalColumnNames.iterator();
                while (it.hasNext()) {
                    this._indexCreator.indexColumn((String) it.next(), sortedDocIds, indexSegment);
                }
                LOGGER.info("Finished records indexing by column in IndexCreator!");
                handlePostCreation();
            } catch (Exception e) {
                this._indexCreator.close();
                throw e;
            }
        } finally {
            this._recordReader.close();
        }
    }

    private void handlePostCreation() throws Exception {
        long currentTimeMillis;
        ColumnStatistics columnProfileFor = this._segmentStats.getColumnProfileFor(this._config.getTimeColumnName());
        int sequenceId = this._config.getSequenceId();
        if (columnProfileFor == null) {
            this._segmentName = this._config.getSegmentNameGenerator().generateSegmentName(sequenceId, (Object) null, (Object) null);
        } else if (this._totalDocs > 0) {
            this._segmentName = this._config.getSegmentNameGenerator().generateSegmentName(sequenceId, columnProfileFor.getMinValue(), columnProfileFor.getMaxValue());
        } else {
            Preconditions.checkArgument(!this._config.isFailOnEmptySegment(), "Failing the empty segment creation as the option 'failOnEmptySegment' is set to: " + this._config.isFailOnEmptySegment());
            long currentTimeMillis2 = System.currentTimeMillis();
            this._segmentName = this._config.getSegmentNameGenerator().generateSegmentName(sequenceId, Long.valueOf(currentTimeMillis2), Long.valueOf(currentTimeMillis2));
        }
        try {
            this._indexCreator.setSegmentName(this._segmentName);
            this._indexCreator.seal();
            this._indexCreator.close();
            LOGGER.info("Finished segment seal!");
            File file = new File(new File(this._config.getOutDir()), this._segmentName);
            if (file.exists()) {
                FileUtils.deleteDirectory(file);
            }
            FileUtils.moveDirectory(this._tempIndexDir, file);
            FileUtils.deleteQuietly(this._tempIndexDir);
            convertFormatIfNecessary(file);
            if (this._totalDocs > 0) {
                buildStarTreeV2IfNecessary(file);
            }
            updatePostSegmentCreationIndexes(file);
            long computeCrc = CrcUtils.forAllFilesInFolder(file).computeCrc();
            String creationTime = this._config.getCreationTime();
            if (creationTime != null) {
                try {
                    currentTimeMillis = Long.parseLong(creationTime);
                } catch (Exception e) {
                    LOGGER.error("Caught exception while parsing creation time in config, use current time as creation time");
                    currentTimeMillis = System.currentTimeMillis();
                }
            } else {
                currentTimeMillis = System.currentTimeMillis();
            }
            persistCreationMeta(file, computeCrc, currentTimeMillis);
            LOGGER.info("Driver, record read time : {}", Long.valueOf(TimeUnit.NANOSECONDS.toMillis(this._totalRecordReadTimeNs)));
            LOGGER.info("Driver, stats collector time : {}", Long.valueOf(this._totalStatsCollectorTime));
            LOGGER.info("Driver, indexing time : {}", Long.valueOf(this._totalIndexTime));
        } catch (Throwable th) {
            this._indexCreator.close();
            throw th;
        }
    }

    private void updatePostSegmentCreationIndexes(File file) throws Exception {
        Set set = (Set) IndexService.getInstance().getAllIndexes().stream().filter(indexType -> {
            return indexType.getIndexBuildLifecycle() == IndexType.BuildLifecycle.POST_SEGMENT_CREATION;
        }).collect(Collectors.toSet());
        if (set.size() > 0) {
            HashMap hashMap = new HashMap();
            hashMap.put(IndexLoadingConfig.READ_MODE_KEY, ReadMode.mmap);
            SegmentDirectoryLoaderContext build = new SegmentDirectoryLoaderContext.Builder().setTableConfig(this._config.getTableConfig()).setSchema(this._config.getSchema()).setSegmentName(this._segmentName).setSegmentDirectoryConfigs(new PinotConfiguration(hashMap)).build();
            IndexLoadingConfig indexLoadingConfig = new IndexLoadingConfig(null, this._config.getTableConfig(), this._config.getSchema());
            SegmentDirectory load = SegmentDirectoryLoaderRegistry.getDefaultSegmentDirectoryLoader().load(file.toURI(), build);
            try {
                SegmentDirectory.Writer createWriter = load.createWriter();
                try {
                    Iterator it = set.iterator();
                    while (it.hasNext()) {
                        ((IndexType) it.next()).createIndexHandler(load, indexLoadingConfig.getFieldIndexConfigByColName(), this._config.getSchema(), this._config.getTableConfig()).updateIndices(createWriter);
                    }
                    if (createWriter != null) {
                        createWriter.close();
                    }
                    if (load != null) {
                        load.close();
                    }
                } finally {
                }
            } catch (Throwable th) {
                if (load != null) {
                    try {
                        load.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
                throw th;
            }
        }
    }

    private void buildStarTreeV2IfNecessary(File file) throws Exception {
        List starTreeIndexConfigs = this._config.getStarTreeIndexConfigs();
        boolean isEnableDefaultStarTree = this._config.isEnableDefaultStarTree();
        if (CollectionUtils.isNotEmpty(starTreeIndexConfigs) || isEnableDefaultStarTree) {
            MultipleTreesBuilder multipleTreesBuilder = new MultipleTreesBuilder(starTreeIndexConfigs, isEnableDefaultStarTree, file, this._config.isOnHeap() ? MultipleTreesBuilder.BuildMode.ON_HEAP : MultipleTreesBuilder.BuildMode.OFF_HEAP);
            try {
                multipleTreesBuilder.build();
                multipleTreesBuilder.close();
            } catch (Throwable th) {
                try {
                    multipleTreesBuilder.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
                throw th;
            }
        }
    }

    private void convertFormatIfNecessary(File file) throws Exception {
        if (this._config.getSegmentVersion().equals(SegmentVersion.v1)) {
            return;
        }
        SegmentFormatConverterFactory.getConverter(SegmentVersion.v1, SegmentVersion.v3).convert(file);
    }

    public ColumnStatistics getColumnStatisticsCollector(String str) throws Exception {
        return this._segmentStats.getColumnProfileFor(str);
    }

    public static void persistCreationMeta(File file, long j, long j2) throws IOException {
        DataOutputStream dataOutputStream = new DataOutputStream(new FileOutputStream(new File(SegmentDirectoryPaths.findSegmentDirectory(file), "creation.meta")));
        try {
            dataOutputStream.writeLong(j);
            dataOutputStream.writeLong(j2);
            dataOutputStream.close();
        } catch (Throwable th) {
            try {
                dataOutputStream.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }

    void buildIndexCreationInfo() throws Exception {
        HashSet hashSet = new HashSet(this._config.getVarLengthDictionaryColumns());
        Set rawIndexCreationColumns = this._config.getRawIndexCreationColumns();
        Set keySet = this._config.getRawIndexCompressionType().keySet();
        for (FieldSpec fieldSpec : this._dataSchema.getAllFieldSpecs()) {
            if (!fieldSpec.isVirtualColumn()) {
                String name = fieldSpec.getName();
                FieldSpec.DataType storedType = fieldSpec.getDataType().getStoredType();
                ColumnStatistics columnProfileFor = this._segmentStats.getColumnProfileFor(name);
                boolean shouldUseVarLengthDictionary = shouldUseVarLengthDictionary(name, hashSet, storedType, columnProfileFor);
                Object defaultNullValue = fieldSpec.getDefaultNullValue();
                if (storedType == FieldSpec.DataType.BYTES) {
                    defaultNullValue = new ByteArray((byte[]) defaultNullValue);
                }
                this._indexCreationInfoMap.put(name, new ColumnIndexCreationInfo(columnProfileFor, (rawIndexCreationColumns.contains(name) || keySet.contains(name)) ? false : true, shouldUseVarLengthDictionary, false, defaultNullValue));
            }
        }
        this._segmentIndexCreationInfo.setTotalDocs(this._totalDocs);
    }

    @Deprecated
    public static boolean shouldUseVarLengthDictionary(String str, Set<String> set, FieldSpec.DataType dataType, ColumnStatistics columnStatistics) {
        return DictionaryIndexType.shouldUseVarLengthDictionary(str, set, dataType, columnStatistics);
    }

    public String getSegmentName() {
        return this._segmentName;
    }

    public File getOutputDirectory() {
        return new File(new File(this._config.getOutDir()), this._segmentName);
    }

    public IngestionSchemaValidator getIngestionSchemaValidator() {
        return this._ingestionSchemaValidator;
    }

    public SegmentPreIndexStatsContainer getSegmentStats() {
        return this._segmentStats;
    }
}
