package org.apache.pinot.tools.anonymizer;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.pinot.common.request.context.FilterContext;
import org.apache.pinot.core.query.request.context.utils.QueryContextConverterUtils;
import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
import org.apache.pinot.segment.local.segment.readers.PinotSegmentRecordReader;
import org.apache.pinot.segment.spi.ColumnMetadata;
import org.apache.pinot.segment.spi.ImmutableSegment;
import org.apache.pinot.segment.spi.SegmentMetadata;
import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
import org.apache.pinot.segment.spi.index.reader.Dictionary;
import org.apache.pinot.shaded.com.google.common.base.Stopwatch;
import org.apache.pinot.spi.data.DateTimeFieldSpec;
import org.apache.pinot.spi.data.DimensionFieldSpec;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.MetricFieldSpec;
import org.apache.pinot.spi.data.Schema;
import org.apache.pinot.spi.data.TimeFieldSpec;
import org.apache.pinot.spi.data.readers.GenericRow;
import org.apache.pinot.spi.utils.ReadMode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/pinot/tools/anonymizer/PinotDataAndQueryAnonymizer.class */
public class PinotDataAndQueryAnonymizer {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) PinotDataAndQueryAnonymizer.class);
    private static final float FLOAT_BASE_VALUE = 100.23f;
    private static final double DOUBLE_BASE_VALUE = 1000.2375d;
    private static final String COLUMN_MAPPING_FILE_KEY = "columns.mapping";
    private static final String COLUMN_MAPPING_SEPARATOR = ":";
    private final String _outputDir;
    private int _numFilesToGenerate;
    private final String _segmentDir;
    private final String _filePrefix;
    private final GlobalDictionaries _globalDictionaries;
    private final Map<String, String> _origToDerivedColumnsMap;
    private final Stopwatch _timeToBuildDictionaries = Stopwatch.createUnstarted();
    private final Stopwatch _timeToGenerateAvroFiles = Stopwatch.createUnstarted();
    private Schema _pinotSchema = null;
    private org.apache.avro.Schema _avroSchema = null;
    private final Map<String, FieldSpec> _columnToFieldSpecMap;
    private final Map<String, Integer> _globalDictionaryColumns;
    private final Set<String> _columnsNotAnonymized;
    private String[] _segmentDirectories;

    /* loaded from: input_file:org/apache/pinot/tools/anonymizer/PinotDataAndQueryAnonymizer$FilterColumnExtractor.class */
    public static class FilterColumnExtractor {
        public static Set<String> extractColumnsUsedInFilter(String str, String str2) throws Exception {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(str + "/" + str2))));
            HashSet hashSet = new HashSet();
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    return hashSet;
                }
                examineWhereClause(readLine, hashSet);
            }
        }

        private static void examineWhereClause(String str, Set<String> set) {
            FilterContext filter = QueryContextConverterUtils.getQueryContext(str).getFilter();
            if (filter != null) {
                filter.getColumns(set);
            }
        }
    }

    public PinotDataAndQueryAnonymizer(String str, String str2, String str3, Map<String, Integer> map, Set<String> set, boolean z) {
        this._outputDir = str2;
        this._segmentDir = str;
        this._filePrefix = str3;
        this._globalDictionaries = z ? new MapBasedGlobalDictionaries() : new ArrayBasedGlobalDictionaries();
        this._origToDerivedColumnsMap = new HashMap();
        this._columnToFieldSpecMap = new HashMap();
        this._globalDictionaryColumns = map;
        this._columnsNotAnonymized = set;
        Iterator<String> it2 = set.iterator();
        while (it2.hasNext()) {
            this._globalDictionaryColumns.remove(it2.next());
        }
        StringBuilder sb = new StringBuilder();
        sb.append("Columns to retain data for: ");
        Iterator<String> it3 = this._columnsNotAnonymized.iterator();
        while (it3.hasNext()) {
            sb.append(it3.next()).append(", ");
        }
        LOGGER.info(sb.toString());
        StringBuilder sb2 = new StringBuilder();
        sb2.append("Columns to build global dictionary for: ");
        for (Map.Entry<String, Integer> entry : this._globalDictionaryColumns.entrySet()) {
            sb2.append("Column: ").append(entry.getKey()).append(" Cardinality: ").append(entry.getValue()).append(", ");
        }
        LOGGER.info(sb2.toString());
    }

    public void buildGlobalDictionaries() throws Exception {
        this._segmentDirectories = new File(this._segmentDir).list();
        this._numFilesToGenerate = this._segmentDirectories.length;
        LOGGER.info("Total number of segments: " + this._numFilesToGenerate);
        if (this._globalDictionaryColumns.isEmpty()) {
            LOGGER.info("Set of global dictionary columns is empty. Not building global dictionaries");
            getSchemaFromFirstSegment(this._segmentDir + "/" + this._segmentDirectories[0]);
            writeColumnMapping();
            return;
        }
        this._timeToBuildDictionaries.start();
        for (String str : this._segmentDirectories) {
            readDictionariesFromSegment(this._segmentDir + "/" + str);
        }
        this._globalDictionaries.sortOriginalValuesInGlobalDictionaries();
        this._globalDictionaries.addDerivedValuesToGlobalDictionaries();
        this._timeToBuildDictionaries.stop();
        writeGlobalDictionariesAndColumnMapping();
        LOGGER.info("Finished building global dictionaries. Time taken: {}secs", Long.valueOf(this._timeToBuildDictionaries.elapsed(TimeUnit.SECONDS)));
    }

    private void getSchemaFromFirstSegment(String str) throws Exception {
        LOGGER.info("Reading metadata from segment: " + str);
        pinotToAvroSchema(new SegmentMetadataImpl(new File(str)));
    }

    private void pinotToAvroSchema(SegmentMetadata segmentMetadata) {
        if (this._pinotSchema == null) {
            this._pinotSchema = segmentMetadata.getSchema();
            anonymizeColumnNames(this._pinotSchema);
            this._avroSchema = getAvroSchemaFromPinotSchema(this._pinotSchema);
            LOGGER.info("Pinot schema: " + this._pinotSchema.toPrettyJsonString());
            LOGGER.info("Avro schema: " + this._avroSchema.toString(true));
        }
    }

    private void readDictionariesFromSegment(String str) throws Exception {
        File file = new File(str);
        SegmentMetadataImpl segmentMetadataImpl = new SegmentMetadataImpl(file);
        pinotToAvroSchema(segmentMetadataImpl);
        ImmutableSegment load = ImmutableSegmentLoader.load(file, ReadMode.mmap);
        for (Map.Entry<String, ColumnMetadata> entry : segmentMetadataImpl.getColumnMetadataMap().entrySet()) {
            String key = entry.getKey();
            if (this._globalDictionaryColumns.containsKey(key)) {
                int intValue = this._globalDictionaryColumns.get(key).intValue();
                ColumnMetadata value = entry.getValue();
                Dictionary dictionary = load.getDictionary(key);
                if (dictionary == null) {
                    throw new UnsupportedOperationException("Data generator currently does not support filter columns without dictionary");
                }
                for (int i = 0; i < value.getCardinality(); i++) {
                    this._globalDictionaries.addOrigValueToGlobalDictionary(dictionary.get(i), key, value, intValue);
                }
            }
        }
    }

    private void writeGlobalDictionariesAndColumnMapping() throws Exception {
        Stopwatch createUnstarted = Stopwatch.createUnstarted();
        createUnstarted.start();
        writeColumnMapping();
        this._globalDictionaries.serialize(this._outputDir);
        createUnstarted.stop();
        LOGGER.info("Finished writing global dictionaries and column name mapping to disk. Time taken: {}secs. Please see the files in {}", Long.valueOf(createUnstarted.elapsed(TimeUnit.SECONDS)), this._outputDir);
    }

    private void writeColumnMapping() throws Exception {
        PrintWriter printWriter = new PrintWriter(new BufferedWriter(new FileWriter(this._outputDir + "/columns.mapping")));
        for (Map.Entry<String, String> entry : this._origToDerivedColumnsMap.entrySet()) {
            printWriter.println(entry.getKey() + ":" + entry.getValue());
        }
        printWriter.flush();
    }

    public void generateAvroFiles() throws Exception {
        this._timeToGenerateAvroFiles.start();
        int i = 0;
        for (int i2 = 0; i2 < this._numFilesToGenerate; i2++) {
            String str = this._segmentDir + "/" + this._segmentDirectories[i2];
            PinotSegmentRecordReader pinotSegmentRecordReader = new PinotSegmentRecordReader(new File(str), this._pinotSchema, null);
            try {
                DataFileWriter<GenericData.Record> dataFileWriter = new DataFileWriter<>(new GenericDatumWriter(this._avroSchema));
                try {
                    String str2 = this._outputDir + "/" + this._filePrefix + i2;
                    LOGGER.info("Using segment {} to generate Avro file {}", str, str2);
                    dataFileWriter.create(this._avroSchema, new File(str2));
                    GenericRow genericRow = new GenericRow();
                    int i3 = 0;
                    while (pinotSegmentRecordReader.hasNext()) {
                        genericRow = pinotSegmentRecordReader.next(genericRow);
                        buildAvroRow(dataFileWriter, genericRow);
                        i3++;
                        genericRow.clear();
                    }
                    i += i3;
                    LOGGER.info("Generated Avro File {} with {} rows", str2, Integer.valueOf(i3));
                    dataFileWriter.close();
                    pinotSegmentRecordReader.close();
                } finally {
                }
            } catch (Throwable th) {
                try {
                    pinotSegmentRecordReader.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
                throw th;
            }
        }
        this._timeToGenerateAvroFiles.stop();
        LOGGER.info("Finished generating {} rows across {} avro files. Time taken {}secs", Integer.valueOf(i), Integer.valueOf(this._numFilesToGenerate), Long.valueOf(this._timeToGenerateAvroFiles.elapsed(TimeUnit.SECONDS)));
    }

    private void buildAvroRow(DataFileWriter<GenericData.Record> dataFileWriter, GenericRow genericRow) throws Exception {
        GenericData.Record record = new GenericData.Record(this._avroSchema);
        for (Map.Entry<String, Object> entry : genericRow.getFieldToValueMap().entrySet()) {
            String key = entry.getKey();
            Object value = entry.getValue();
            String str = this._origToDerivedColumnsMap.get(key);
            boolean isSingleValueField = this._columnToFieldSpecMap.get(key).isSingleValueField();
            if (this._columnsNotAnonymized.contains(key)) {
                if (isSingleValueField) {
                    record.put(str, value);
                } else {
                    record.put(str, Arrays.asList((Object[]) value));
                }
            } else if (!this._globalDictionaryColumns.containsKey(key)) {
                Object generateRandomDerivedValue = generateRandomDerivedValue(value, this._columnToFieldSpecMap.get(key));
                if (isSingleValueField) {
                    record.put(str, generateRandomDerivedValue);
                } else {
                    record.put(str, Arrays.asList((Object[]) generateRandomDerivedValue));
                }
            } else if (isSingleValueField) {
                record.put(str, this._globalDictionaries.getDerivedValueForOrigValueSV(key, value));
            } else if (value == null) {
                record.put(str, (Object) null);
            } else {
                record.put(str, Arrays.asList(this._globalDictionaries.getDerivedValuesForOrigValuesMV(key, (Object[]) value)));
            }
        }
        dataFileWriter.append(record);
    }

    private Object generateRandomDerivedValue(Object obj, FieldSpec fieldSpec) {
        if (fieldSpec.isSingleValueField()) {
            return generateDerivedRandomValueHelper(obj, fieldSpec.getDataType());
        }
        if (obj == null) {
            return null;
        }
        Object[] objArr = (Object[]) obj;
        int length = objArr.length;
        Object[] objArr2 = new Object[length];
        for (int i = 0; i < length; i++) {
            objArr2[i] = generateDerivedRandomValueHelper(objArr[i], fieldSpec.getDataType());
        }
        return objArr2;
    }

    private Object generateDerivedRandomValueHelper(Object obj, FieldSpec.DataType dataType) {
        Random random = new Random();
        switch (dataType) {
            case INT:
                return Integer.valueOf(random.nextInt());
            case LONG:
                return Long.valueOf(random.nextLong());
            case FLOAT:
                return Float.valueOf(FLOAT_BASE_VALUE + random.nextFloat());
            case DOUBLE:
                return Double.valueOf(DOUBLE_BASE_VALUE + random.nextDouble());
            case STRING:
                String str = (String) obj;
                return (str == null || str.equals("") || str.equals(" ") || str.equals("null")) ? str : RandomStringUtils.randomAlphanumeric(str.length());
            case BYTES:
                byte[] bArr = (byte[]) obj;
                if (bArr == null || bArr.length == 0) {
                    return bArr;
                }
                byte[] bArr2 = new byte[bArr.length];
                random.nextBytes(bArr2);
                return bArr2;
            default:
                throw new IllegalStateException("Unexpected data type");
        }
    }

    private void anonymizeColumnNames(Schema schema) {
        int i = 0;
        String str = "";
        for (Map.Entry<String, FieldSpec> entry : schema.getFieldSpecMap().entrySet()) {
            String key = entry.getKey();
            FieldSpec value = entry.getValue();
            if (value instanceof DimensionFieldSpec) {
                str = "DIMENSION";
            } else if (value instanceof MetricFieldSpec) {
                str = "METRIC";
            } else if (value instanceof TimeFieldSpec) {
                str = "TIME";
            } else if (value instanceof DateTimeFieldSpec) {
                str = "DATE_TIME";
            }
            str = value.isSingleValueField() ? str + "_SV" : str + "_MV";
            this._origToDerivedColumnsMap.put(key, str + "_COL_" + i);
            i++;
        }
    }

    private org.apache.avro.Schema getAvroSchemaFromPinotSchema(Schema schema) {
        SchemaBuilder.FieldAssembler<org.apache.avro.Schema> fields = SchemaBuilder.record("record").fields();
        for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
            FieldSpec.DataType dataType = fieldSpec.getDataType();
            String name = fieldSpec.getName();
            String str = this._origToDerivedColumnsMap.get(name);
            this._columnToFieldSpecMap.put(name, fieldSpec);
            if (fieldSpec.isSingleValueField()) {
                switch (dataType) {
                    case INT:
                        fields = fields.name(str).type().intType().noDefault();
                        break;
                    case LONG:
                        fields = fields.name(str).type().longType().noDefault();
                        break;
                    case FLOAT:
                        fields = fields.name(str).type().floatType().noDefault();
                        break;
                    case DOUBLE:
                        fields = fields.name(str).type().doubleType().noDefault();
                        break;
                    case STRING:
                        fields = fields.name(str).type().stringType().noDefault();
                        break;
                    case BYTES:
                        fields = fields.name(str).type().bytesType().noDefault();
                        break;
                    default:
                        throw new UnsupportedOperationException("Data generator does not support type: " + dataType);
                }
            } else {
                switch (dataType) {
                    case INT:
                        fields = fields.name(str).type().array().items().intType().noDefault();
                        break;
                    case LONG:
                        fields = fields.name(str).type().array().items().longType().noDefault();
                        break;
                    case FLOAT:
                        fields = fields.name(str).type().array().items().floatType().noDefault();
                        break;
                    case DOUBLE:
                        fields = fields.name(str).type().array().items().doubleType().noDefault();
                        break;
                    case STRING:
                        fields = fields.name(str).type().array().items().stringType().noDefault();
                        break;
                    default:
                        throw new UnsupportedOperationException("Data generator does not support type: " + dataType);
                }
            }
        }
        return fields.endRecord();
    }
}
