package org.apache.pinot.shaded.org.apache.parquet.hadoop.util;

import com.ctc.wstx.cfg.InputConfigFlags;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.pinot.shaded.org.apache.parquet.ParquetReadOptions;
import org.apache.pinot.shaded.org.apache.parquet.bytes.BytesInput;
import org.apache.pinot.shaded.org.apache.parquet.column.ColumnDescriptor;
import org.apache.pinot.shaded.org.apache.parquet.column.impl.ColumnReadStoreImpl;
import org.apache.pinot.shaded.org.apache.parquet.column.page.DictionaryPage;
import org.apache.pinot.shaded.org.apache.parquet.column.page.PageReadStore;
import org.apache.pinot.shaded.org.apache.parquet.column.statistics.Statistics;
import org.apache.pinot.shaded.org.apache.parquet.compression.CompressionCodecFactory;
import org.apache.pinot.shaded.org.apache.parquet.format.DataPageHeader;
import org.apache.pinot.shaded.org.apache.parquet.format.DataPageHeaderV2;
import org.apache.pinot.shaded.org.apache.parquet.format.DictionaryPageHeader;
import org.apache.pinot.shaded.org.apache.parquet.format.PageHeader;
import org.apache.pinot.shaded.org.apache.parquet.format.Util;
import org.apache.pinot.shaded.org.apache.parquet.format.converter.ParquetMetadataConverter;
import org.apache.pinot.shaded.org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.pinot.shaded.org.apache.parquet.hadoop.ParquetFileWriter;
import org.apache.pinot.shaded.org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
import org.apache.pinot.shaded.org.apache.parquet.hadoop.metadata.ColumnPath;
import org.apache.pinot.shaded.org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.pinot.shaded.org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.pinot.shaded.org.apache.parquet.internal.column.columnindex.ColumnIndex;
import org.apache.pinot.shaded.org.apache.parquet.internal.column.columnindex.OffsetIndex;
import org.apache.pinot.shaded.org.apache.parquet.io.InputFile;
import org.apache.pinot.shaded.org.apache.parquet.io.ParquetEncodingException;
import org.apache.pinot.shaded.org.apache.parquet.io.SeekableInputStream;
import org.apache.pinot.shaded.org.apache.parquet.io.api.Converter;
import org.apache.pinot.shaded.org.apache.parquet.io.api.GroupConverter;
import org.apache.pinot.shaded.org.apache.parquet.io.api.PrimitiveConverter;
import org.apache.pinot.shaded.org.apache.parquet.schema.MessageType;
import org.apache.pinot.shaded.org.apache.parquet.schema.PrimitiveType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/pinot/shaded/org/apache/parquet/hadoop/util/CompressionConverter.class */
public class CompressionConverter {
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) CompressionConverter.class);
    private final int pageBufferSize = InputConfigFlags.CFG_XMLID_TYPING;
    private byte[] pageBuffer = new byte[InputConfigFlags.CFG_XMLID_TYPING];

    /* loaded from: input_file:org/apache/pinot/shaded/org/apache/parquet/hadoop/util/CompressionConverter$DummyConverter.class */
    private static final class DummyConverter extends PrimitiveConverter {
        private DummyConverter() {
        }

        @Override // org.apache.pinot.shaded.org.apache.parquet.io.api.Converter
        public GroupConverter asGroupConverter() {
            return new DummyGroupConverter();
        }
    }

    /* loaded from: input_file:org/apache/pinot/shaded/org/apache/parquet/hadoop/util/CompressionConverter$DummyGroupConverter.class */
    private static final class DummyGroupConverter extends GroupConverter {
        private DummyGroupConverter() {
        }

        @Override // org.apache.pinot.shaded.org.apache.parquet.io.api.GroupConverter
        public void start() {
        }

        @Override // org.apache.pinot.shaded.org.apache.parquet.io.api.GroupConverter
        public void end() {
        }

        @Override // org.apache.pinot.shaded.org.apache.parquet.io.api.GroupConverter
        public Converter getConverter(int i) {
            return new DummyConverter();
        }
    }

    /* loaded from: input_file:org/apache/pinot/shaded/org/apache/parquet/hadoop/util/CompressionConverter$TransParquetFileReader.class */
    public static final class TransParquetFileReader extends ParquetFileReader {
        public TransParquetFileReader(InputFile inputFile, ParquetReadOptions parquetReadOptions) throws IOException {
            super(inputFile, parquetReadOptions);
        }

        public void setStreamPosition(long j) throws IOException {
            this.f.seek(j);
        }

        public void blockRead(byte[] bArr, int i, int i2) throws IOException {
            this.f.readFully(bArr, i, i2);
        }

        public PageHeader readPageHeader() throws IOException {
            return Util.readPageHeader(this.f);
        }

        public long getPos() throws IOException {
            return this.f.getPos();
        }

        public SeekableInputStream getStream() {
            return this.f;
        }
    }

    public void processBlocks(TransParquetFileReader transParquetFileReader, ParquetFileWriter parquetFileWriter, ParquetMetadata parquetMetadata, MessageType messageType, String str, CompressionCodecName compressionCodecName) throws IOException {
        int i = 0;
        PageReadStore readNextRowGroup = transParquetFileReader.readNextRowGroup();
        while (readNextRowGroup != null) {
            parquetFileWriter.startBlock(readNextRowGroup.getRowCount());
            List<ColumnChunkMetaData> columns = parquetMetadata.getBlocks().get(i).getColumns();
            Map map = (Map) messageType.getColumns().stream().collect(Collectors.toMap(columnDescriptor -> {
                return ColumnPath.get(columnDescriptor.getPath());
            }, columnDescriptor2 -> {
                return columnDescriptor2;
            }));
            for (int i2 = 0; i2 < columns.size(); i2++) {
                ColumnChunkMetaData columnChunkMetaData = columns.get(i2);
                ColumnReadStoreImpl columnReadStoreImpl = new ColumnReadStoreImpl(readNextRowGroup, new DummyGroupConverter(), messageType, str);
                ColumnDescriptor columnDescriptor3 = (ColumnDescriptor) map.get(columnChunkMetaData.getPath());
                parquetFileWriter.startColumn(columnDescriptor3, columnReadStoreImpl.getColumnReader(columnDescriptor3).getTotalValueCount(), compressionCodecName);
                processChunk(transParquetFileReader, parquetFileWriter, columnChunkMetaData, str, compressionCodecName);
                parquetFileWriter.endColumn();
            }
            parquetFileWriter.endBlock();
            readNextRowGroup = transParquetFileReader.readNextRowGroup();
            i++;
        }
    }

    private void processChunk(TransParquetFileReader transParquetFileReader, ParquetFileWriter parquetFileWriter, ColumnChunkMetaData columnChunkMetaData, String str, CompressionCodecName compressionCodecName) throws IOException {
        CompressionCodecFactory newFactory = HadoopCodecs.newFactory(0);
        CompressionCodecFactory.BytesInputDecompressor decompressor = newFactory.getDecompressor(columnChunkMetaData.getCodec());
        CompressionCodecFactory.BytesInputCompressor compressor = newFactory.getCompressor(compressionCodecName);
        ColumnIndex readColumnIndex = transParquetFileReader.readColumnIndex(columnChunkMetaData);
        OffsetIndex readOffsetIndex = transParquetFileReader.readOffsetIndex(columnChunkMetaData);
        transParquetFileReader.setStreamPosition(columnChunkMetaData.getStartingPos());
        long j = 0;
        ParquetMetadataConverter parquetMetadataConverter = new ParquetMetadataConverter();
        int i = 0;
        long valueCount = columnChunkMetaData.getValueCount();
        while (j < valueCount) {
            PageHeader readPageHeader = transParquetFileReader.readPageHeader();
            int compressed_page_size = readPageHeader.getCompressed_page_size();
            switch (readPageHeader.type) {
                case DICTIONARY_PAGE:
                    if (0 == 0) {
                        DictionaryPageHeader dictionaryPageHeader = readPageHeader.dictionary_page_header;
                        parquetFileWriter.writeDictionaryPage(new DictionaryPage(BytesInput.from(translatePageLoad(transParquetFileReader, true, compressor, decompressor, readPageHeader.getCompressed_page_size(), readPageHeader.getUncompressed_page_size())), readPageHeader.getUncompressed_page_size(), dictionaryPageHeader.getNum_values(), parquetMetadataConverter.getEncoding(dictionaryPageHeader.getEncoding())));
                        break;
                    } else {
                        throw new IOException("has more than one dictionary page in column chunk");
                    }
                case DATA_PAGE:
                    DataPageHeader dataPageHeader = readPageHeader.data_page_header;
                    byte[] translatePageLoad = translatePageLoad(transParquetFileReader, true, compressor, decompressor, readPageHeader.getCompressed_page_size(), readPageHeader.getUncompressed_page_size());
                    Statistics convertStatistics = convertStatistics(str, columnChunkMetaData.getPrimitiveType(), dataPageHeader.getStatistics(), readColumnIndex, i, parquetMetadataConverter);
                    j += dataPageHeader.getNum_values();
                    if (readOffsetIndex != null) {
                        parquetFileWriter.writeDataPage(toIntWithCheck(dataPageHeader.getNum_values()), readPageHeader.getUncompressed_page_size(), BytesInput.from(translatePageLoad), convertStatistics, toIntWithCheck((1 + readOffsetIndex.getLastRowIndex(i, valueCount)) - readOffsetIndex.getFirstRowIndex(i)), parquetMetadataConverter.getEncoding(dataPageHeader.getRepetition_level_encoding()), parquetMetadataConverter.getEncoding(dataPageHeader.getDefinition_level_encoding()), parquetMetadataConverter.getEncoding(dataPageHeader.getEncoding()));
                    } else {
                        parquetFileWriter.writeDataPage(toIntWithCheck(dataPageHeader.getNum_values()), readPageHeader.getUncompressed_page_size(), BytesInput.from(translatePageLoad), convertStatistics, parquetMetadataConverter.getEncoding(dataPageHeader.getRepetition_level_encoding()), parquetMetadataConverter.getEncoding(dataPageHeader.getDefinition_level_encoding()), parquetMetadataConverter.getEncoding(dataPageHeader.getEncoding()));
                    }
                    i++;
                    break;
                case DATA_PAGE_V2:
                    DataPageHeaderV2 dataPageHeaderV2 = readPageHeader.data_page_header_v2;
                    int repetition_levels_byte_length = dataPageHeaderV2.getRepetition_levels_byte_length();
                    BytesInput readBlockAllocate = readBlockAllocate(repetition_levels_byte_length, transParquetFileReader);
                    int definition_levels_byte_length = dataPageHeaderV2.getDefinition_levels_byte_length();
                    BytesInput readBlockAllocate2 = readBlockAllocate(definition_levels_byte_length, transParquetFileReader);
                    int compressed_page_size2 = (readPageHeader.getCompressed_page_size() - repetition_levels_byte_length) - definition_levels_byte_length;
                    int uncompressed_page_size = (readPageHeader.getUncompressed_page_size() - repetition_levels_byte_length) - definition_levels_byte_length;
                    j += dataPageHeaderV2.getNum_values();
                    parquetFileWriter.writeDataPageV2(dataPageHeaderV2.getNum_rows(), dataPageHeaderV2.getNum_nulls(), dataPageHeaderV2.getNum_values(), readBlockAllocate, readBlockAllocate2, parquetMetadataConverter.getEncoding(dataPageHeaderV2.getEncoding()), BytesInput.from(translatePageLoad(transParquetFileReader, dataPageHeaderV2.is_compressed, compressor, decompressor, compressed_page_size2, uncompressed_page_size)), uncompressed_page_size, convertStatistics(str, columnChunkMetaData.getPrimitiveType(), dataPageHeaderV2.getStatistics(), readColumnIndex, i, parquetMetadataConverter));
                    i++;
                    break;
                default:
                    LOG.debug("skipping page of type {} of size {}", readPageHeader.getType(), Integer.valueOf(compressed_page_size));
                    break;
            }
        }
    }

    private Statistics convertStatistics(String str, PrimitiveType primitiveType, org.apache.pinot.shaded.org.apache.parquet.format.Statistics statistics, ColumnIndex columnIndex, int i, ParquetMetadataConverter parquetMetadataConverter) throws IOException {
        if (columnIndex == null) {
            if (statistics != null) {
                return parquetMetadataConverter.fromParquetStatistics(str, statistics, primitiveType);
            }
            return null;
        }
        if (columnIndex.getNullPages() == null) {
            throw new IOException("columnIndex has null variable 'nullPages' which indicates corrupted data for type: " + primitiveType.getName());
        }
        if (i > columnIndex.getNullPages().size()) {
            throw new IOException("There are more pages " + i + " found in the column than in the columnIndex " + columnIndex.getNullPages().size());
        }
        Statistics.Builder builderForReading = Statistics.getBuilderForReading(primitiveType);
        builderForReading.withNumNulls(columnIndex.getNullCounts().get(i).longValue());
        if (!columnIndex.getNullPages().get(i).booleanValue()) {
            builderForReading.withMin((byte[]) columnIndex.getMinValues().get(i).array().clone());
            builderForReading.withMax((byte[]) columnIndex.getMaxValues().get(i).array().clone());
        }
        return builderForReading.build();
    }

    private byte[] translatePageLoad(TransParquetFileReader transParquetFileReader, boolean z, CompressionCodecFactory.BytesInputCompressor bytesInputCompressor, CompressionCodecFactory.BytesInputDecompressor bytesInputDecompressor, int i, int i2) throws IOException {
        BytesInput readBlock = readBlock(i, transParquetFileReader);
        if (z) {
            readBlock = bytesInputDecompressor.decompress(readBlock, i2);
        }
        return bytesInputCompressor.compress(readBlock).toByteArray();
    }

    public BytesInput readBlock(int i, TransParquetFileReader transParquetFileReader) throws IOException {
        byte[] bArr = i > 2097152 ? new byte[i] : this.pageBuffer;
        transParquetFileReader.blockRead(bArr, 0, i);
        return BytesInput.from(bArr, 0, i);
    }

    public BytesInput readBlockAllocate(int i, TransParquetFileReader transParquetFileReader) throws IOException {
        byte[] bArr = new byte[i];
        transParquetFileReader.blockRead(bArr, 0, i);
        return BytesInput.from(bArr, 0, i);
    }

    private int toIntWithCheck(long j) {
        if (((int) j) != j) {
            throw new ParquetEncodingException("size is bigger than 2147483647 bytes: " + j);
        }
        return (int) j;
    }
}
