package com.alibaba.alink.operator.batch.feature;

import com.alibaba.alink.common.MLEnvironmentFactory;
import com.alibaba.alink.common.annotation.InputPorts;
import com.alibaba.alink.common.annotation.NameCn;
import com.alibaba.alink.common.annotation.NameEn;
import com.alibaba.alink.common.annotation.OutputPorts;
import com.alibaba.alink.common.annotation.PortDesc;
import com.alibaba.alink.common.annotation.PortSpec;
import com.alibaba.alink.common.annotation.PortType;
import com.alibaba.alink.common.annotation.SelectedColsWithFirstInputSpec;
import com.alibaba.alink.common.utils.JsonConverter;
import com.alibaba.alink.common.utils.TableUtil;
import com.alibaba.alink.common.viz.AlinkViz;
import com.alibaba.alink.common.viz.VizDataWriterInterface;
import com.alibaba.alink.operator.batch.BatchOperator;
import com.alibaba.alink.operator.batch.statistics.utils.StatisticsHelper;
import com.alibaba.alink.operator.batch.utils.DataSetConversionUtil;
import com.alibaba.alink.operator.batch.utils.DataSetUtil;
import com.alibaba.alink.operator.batch.utils.WithModelInfoBatchOp;
import com.alibaba.alink.operator.common.dataproc.StringIndexerUtil;
import com.alibaba.alink.operator.common.evaluation.ClassificationEvaluationUtil;
import com.alibaba.alink.operator.common.feature.BinningModelDataConverter;
import com.alibaba.alink.operator.common.feature.BinningModelInfo;
import com.alibaba.alink.operator.common.feature.BinningModelMapper;
import com.alibaba.alink.operator.common.feature.OneHotModelDataConverter;
import com.alibaba.alink.operator.common.feature.QuantileDiscretizerModelDataConverter;
import com.alibaba.alink.operator.common.feature.binning.BinDivideType;
import com.alibaba.alink.operator.common.feature.binning.BinTypes;
import com.alibaba.alink.operator.common.feature.binning.BinningModelInfoBatchOp;
import com.alibaba.alink.operator.common.feature.binning.Bins;
import com.alibaba.alink.operator.common.feature.binning.FeatureBinsCalculator;
import com.alibaba.alink.operator.common.feature.binning.FeatureBinsUtil;
import com.alibaba.alink.operator.common.nlp.WordCountUtil;
import com.alibaba.alink.operator.common.outlier.TimeSeriesAnomsUtils;
import com.alibaba.alink.operator.common.similarity.SerializableComparator;
import com.alibaba.alink.operator.common.statistics.basicstatistic.TableSummary;
import com.alibaba.alink.operator.common.statistics.statistics.IntervalCalculator;
import com.alibaba.alink.operator.common.tree.Criteria;
import com.alibaba.alink.params.dataproc.HasHandleInvalid;
import com.alibaba.alink.params.feature.HasDropLast;
import com.alibaba.alink.params.feature.HasEncodeWithoutWoe;
import com.alibaba.alink.params.feature.QuantileDiscretizerTrainParams;
import com.alibaba.alink.params.finance.BinningTrainParams;
import com.alibaba.alink.params.finance.HasBinningMethod;
import com.alibaba.alink.params.shared.HasMLEnvironmentId;
import com.alibaba.alink.params.shared.colname.HasSelectedCols;
import com.alibaba.alink.pipeline.EstimatorTrainerAnnotation;
import com.alibaba.alink.pipeline.PipelineModel;
import com.alibaba.alink.pipeline.TransformerBase;
import com.alibaba.alink.pipeline.feature.OneHotEncoderModel;
import com.alibaba.alink.pipeline.feature.QuantileDiscretizerModel;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.TreeSet;
import org.apache.flink.api.common.functions.GroupReduceFunction;
import org.apache.flink.api.common.functions.JoinFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.MapPartitionFunction;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.api.common.functions.RichMapPartitionFunction;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.operators.Operator;
import org.apache.flink.api.java.operators.SingleInputUdfOperator;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.tuple.Tuple5;
import org.apache.flink.ml.api.misc.param.ParamInfo;
import org.apache.flink.ml.api.misc.param.Params;
import org.apache.flink.types.Row;
import org.apache.flink.util.Collector;
import org.apache.flink.util.Preconditions;

@InputPorts(values = {@PortSpec(PortType.DATA)})
@OutputPorts(values = {@PortSpec(value = PortType.MODEL, desc = PortDesc.OUTPUT_RESULT)})
@SelectedColsWithFirstInputSpec
@NameCn("分箱训练")
@NameEn("binning trainer")
@EstimatorTrainerAnnotation(estimatorName = "com.alibaba.alink.pipeline.feature.Binning")
/* loaded from: input_file:com/alibaba/alink/operator/batch/feature/BinningTrainBatchOp.class */
public final class BinningTrainBatchOp extends BatchOperator<BinningTrainBatchOp> implements BinningTrainParams<BinningTrainBatchOp>, AlinkViz<BinningTrainBatchOp>, WithModelInfoBatchOp<BinningModelInfo, BinningTrainBatchOp, BinningModelInfoBatchOp> {
    private static final long serialVersionUID = 2424584385121349839L;
    private static String FEATURE_DELIMITER = ",";
    private static String KEY_VALUE_DELIMITER = TimeSeriesAnomsUtils.VAL_DELIMITER;
    private static int BUCKET_NUMBER = WordCountUtil.BOUND_SIZE;
    private static long DATA_ID_MAP = 1;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/alibaba/alink/operator/batch/feature/BinningTrainBatchOp$FeatureBinsKey.class */
    public static class FeatureBinsKey implements KeySelector<FeatureBinsCalculator, String> {
        private static final long serialVersionUID = 4363650897636276540L;

        private FeatureBinsKey() {
        }

        public String getKey(FeatureBinsCalculator featureBinsCalculator) {
            return featureBinsCalculator.getFeatureName();
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/alibaba/alink/operator/batch/feature/BinningTrainBatchOp$IntervalStatistics.class */
    public static class IntervalStatistics implements Serializable {
        private static final long serialVersionUID = 7680582727625414983L;
        Integer total;
        Integer positive;

        public IntervalStatistics(Integer num, Integer num2) {
            this.total = num;
            this.positive = num2;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/alibaba/alink/operator/batch/feature/BinningTrainBatchOp$SerializeFeatureBinsModel.class */
    public static class SerializeFeatureBinsModel extends RichMapPartitionFunction<FeatureBinsCalculator, Row> {
        private static final long serialVersionUID = 2703991631667194660L;

        SerializeFeatureBinsModel() {
        }

        public void mapPartition(Iterable<FeatureBinsCalculator> iterable, Collector<Row> collector) {
            new BinningModelDataConverter().save2(iterable, collector);
        }
    }

    public BinningTrainBatchOp() {
    }

    public BinningTrainBatchOp(Params params) {
        super(params);
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // com.alibaba.alink.operator.batch.BatchOperator
    public BinningTrainBatchOp linkFrom(BatchOperator<?>... batchOperatorArr) {
        BatchOperator<?> checkAndGetFirst = checkAndGetFirst(batchOperatorArr);
        ArrayList arrayList = new ArrayList();
        DataSet dataSet = null;
        if (getFromUserDefined().booleanValue()) {
            Preconditions.checkNotNull(getUserDefinedBin(), "User defined bin is empty!");
            Tuple5<List<FeatureBinsCalculator>, HashSet<String>, List<FeatureBinsCalculator>, HashSet<String>, String[]> parseUserDefined = parseUserDefined(getParams());
            Tuple2<BinDivideType, Boolean> parseUserDefinedNumeric = parseUserDefinedNumeric((List) parseUserDefined.f0);
            setSelectedCols((String[]) parseUserDefined.f4).setBinningMethod(HasBinningMethod.BinningMethod.valueOf(((BinDivideType) parseUserDefinedNumeric.f0).name())).setLeftOpen((Boolean) parseUserDefinedNumeric.f1);
            if (((List) parseUserDefined.f2).size() > 0) {
                Tuple2<DataSet<FeatureBinsCalculator>, OneHotEncoderModel> discreteFromUserDefined = discreteFromUserDefined((List) parseUserDefined.f2, getMLEnvironmentId().longValue(), (String[]) ((HashSet) parseUserDefined.f3).toArray(new String[0]));
                dataSet = (DataSet) discreteFromUserDefined.f0;
                arrayList.add(discreteFromUserDefined.f1);
            }
            if (((List) parseUserDefined.f0).size() > 0) {
                Tuple2<DataSet<FeatureBinsCalculator>, QuantileDiscretizerModel> numericFromUserDefined = numericFromUserDefined((List) parseUserDefined.f0, getMLEnvironmentId().longValue(), (String[]) ((HashSet) parseUserDefined.f1).toArray(new String[0]));
                arrayList.add(numericFromUserDefined.f1);
                dataSet = null == dataSet ? (DataSet) numericFromUserDefined.f0 : dataSet.union((DataSet) numericFromUserDefined.f0);
            }
        } else {
            Tuple2<List<String>, List<String>> distinguishNumericDiscrete = BinningModelMapper.distinguishNumericDiscrete(getSelectedCols(), checkAndGetFirst.getSchema());
            if (((List) distinguishNumericDiscrete.f1).size() > 0) {
                Tuple2<DataSet<FeatureBinsCalculator>, OneHotEncoderModel> discreteFromTrain = discreteFromTrain(checkAndGetFirst, (String[]) ((List) distinguishNumericDiscrete.f1).toArray(new String[0]), getParams());
                dataSet = (DataSet) discreteFromTrain.f0;
                arrayList.add(discreteFromTrain.f1);
            }
            if (((List) distinguishNumericDiscrete.f0).size() > 0) {
                Tuple2<DataSet<FeatureBinsCalculator>, QuantileDiscretizerModel> numericFromTrain = numericFromTrain(checkAndGetFirst, (String[]) ((List) distinguishNumericDiscrete.f0).toArray(new String[0]), getParams());
                arrayList.add(numericFromTrain.f1);
                dataSet = null == dataSet ? (DataSet) numericFromTrain.f0 : dataSet.union((DataSet) numericFromTrain.f0);
            }
        }
        Preconditions.checkNotNull(dataSet, "No binning is generated, please check input!");
        DataSet<FeatureBinsCalculator> featureBinsStatistics = featureBinsStatistics(dataSet, new PipelineModel((TransformerBase<?>[]) arrayList.toArray(new TransformerBase[0])).transform(checkAndGetFirst), getParams());
        Operator name = featureBinsStatistics.mapPartition(new SerializeFeatureBinsModel()).name("SerializeModel");
        VizDataWriterInterface vizDataWriter = getVizDataWriter();
        if (vizDataWriter != null) {
            writeVizData(featureBinsStatistics, vizDataWriter, getSelectedCols());
            writePreciseVizData(checkAndGetFirst, featureBinsStatistics, getParams(), vizDataWriter);
        }
        setOutput((DataSet<Row>) name, new BinningModelDataConverter().getModelSchema());
        return this;
    }

    static DataSet<FeatureBinsCalculator> featureBinsStatistics(DataSet<FeatureBinsCalculator> dataSet, BatchOperator batchOperator, Params params) {
        if (null == params.get(BinningTrainParams.LABEL_COL)) {
            return setFeatureBinsTotal(dataSet, batchOperator, (String[]) params.get(BinningTrainParams.SELECTED_COLS));
        }
        Preconditions.checkArgument(params.contains(BinningTrainParams.POS_LABEL_VAL_STR), "PositiveValue is not set!");
        return setFeatureBinsTotalAndWoe(dataSet, batchOperator, params);
    }

    public static Tuple2<DataSet<FeatureBinsCalculator>, QuantileDiscretizerModel> numericFromTrain(BatchOperator batchOperator, String[] strArr, Params params) {
        HasBinningMethod.BinningMethod binningMethod = (HasBinningMethod.BinningMethod) params.get(BinningTrainParams.BINNIG_METHOD);
        switch (binningMethod) {
            case QUANTILE:
                return quantileTrain(batchOperator, strArr, params);
            case BUCKET:
                return bucketTrain(batchOperator, strArr, params);
            default:
                throw new IllegalArgumentException("Not support binningMethod: " + binningMethod);
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static Tuple2<DataSet<FeatureBinsCalculator>, OneHotEncoderModel> discreteFromTrain(BatchOperator batchOperator, String[] strArr, Params params) {
        OneHotTrainBatchOp linkFrom = new OneHotTrainBatchOp().setSelectedCols(strArr).setDiscreteThresholdsArray(getValueArray(params, BinningTrainParams.DISCRETE_THRESHOLDS, BinningTrainParams.DISCRETE_THRESHOLDS_ARRAY, BinningTrainParams.DISCRETE_THRESHOLDS_MAP, strArr)).linkFrom((BatchOperator<?>[]) new BatchOperator[]{batchOperator});
        return Tuple2.of(OneHotTrainBatchOp.transformModelToFeatureBins(linkFrom.getDataSet()), setOneHotModelData(linkFrom, strArr, ((Long) params.get(ML_ENVIRONMENT_ID)).longValue()));
    }

    /* JADX WARN: Multi-variable type inference failed */
    private static Tuple2<DataSet<FeatureBinsCalculator>, QuantileDiscretizerModel> bucketTrain(BatchOperator batchOperator, String[] strArr, Params params) {
        EqualWidthDiscretizerTrainBatchOp linkFrom = new EqualWidthDiscretizerTrainBatchOp(generateNumericTrainParams(params, strArr)).linkFrom((BatchOperator<?>[]) new BatchOperator[]{batchOperator});
        return Tuple2.of(QuantileDiscretizerTrainBatchOp.transformModelToFeatureBins(linkFrom.getDataSet(), BinDivideType.BUCKET), setQuantileDiscretizerModelData(linkFrom, strArr, ((Long) params.get(ML_ENVIRONMENT_ID)).longValue()));
    }

    /* JADX WARN: Multi-variable type inference failed */
    private static Tuple2<DataSet<FeatureBinsCalculator>, QuantileDiscretizerModel> quantileTrain(BatchOperator batchOperator, String[] strArr, Params params) {
        QuantileDiscretizerTrainBatchOp linkFrom = new QuantileDiscretizerTrainBatchOp(generateNumericTrainParams(params, strArr)).linkFrom((BatchOperator<?>[]) new BatchOperator[]{batchOperator});
        return Tuple2.of(QuantileDiscretizerTrainBatchOp.transformModelToFeatureBins(linkFrom.getDataSet(), BinDivideType.QUANTILE), setQuantileDiscretizerModelData(linkFrom, strArr, ((Long) params.get(ML_ENVIRONMENT_ID)).longValue()));
    }

    private static Tuple2<DataSet<FeatureBinsCalculator>, OneHotEncoderModel> discreteFromUserDefined(List<FeatureBinsCalculator> list, long j, String[] strArr) {
        Operator name = MLEnvironmentFactory.get(Long.valueOf(j)).getExecutionEnvironment().fromCollection(list).name("DiscreteFromUserDefined");
        return Tuple2.of(name, setOneHotModelData(BatchOperator.fromTable(DataSetConversionUtil.toTable(Long.valueOf(j), OneHotTrainBatchOp.transformFeatureBinsToModel(name), new OneHotModelDataConverter().getModelSchema())), strArr, j));
    }

    private static Tuple2<DataSet<FeatureBinsCalculator>, QuantileDiscretizerModel> numericFromUserDefined(List<FeatureBinsCalculator> list, long j, String[] strArr) {
        Operator name = MLEnvironmentFactory.get(Long.valueOf(j)).getExecutionEnvironment().fromCollection(list).name("NumericFromUserDefined");
        return Tuple2.of(name, setQuantileDiscretizerModelData(BatchOperator.fromTable(DataSetConversionUtil.toTable(Long.valueOf(j), QuantileDiscretizerTrainBatchOp.transformFeatureBinsToModel(name), new QuantileDiscretizerModelDataConverter().getModelSchema())), strArr, j));
    }

    private static Tuple5<List<FeatureBinsCalculator>, HashSet<String>, List<FeatureBinsCalculator>, HashSet<String>, String[]> parseUserDefined(Params params) {
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        String[] strArr = (String[]) params.get(SELECTED_COLS);
        Tuple2<List<FeatureBinsCalculator>, List<FeatureBinsCalculator>> distinguishNumericDiscrete = BinningModelMapper.distinguishNumericDiscrete(FeatureBinsUtil.deSerialize((String) params.get(USER_DEFINED_BIN)), strArr, hashSet, hashSet2);
        String[] strArr2 = new String[hashSet2.size() + hashSet.size()];
        int i = 0;
        for (String str : strArr) {
            if (hashSet2.contains(str) || hashSet.contains(str)) {
                int i2 = i;
                i++;
                strArr2[i2] = str;
            }
        }
        return Tuple5.of(distinguishNumericDiscrete.f0, hashSet, distinguishNumericDiscrete.f1, hashSet2, strArr2);
    }

    private static Tuple2<BinDivideType, Boolean> parseUserDefinedNumeric(List<FeatureBinsCalculator> list) {
        BinDivideType binDivideType = null;
        Boolean bool = null;
        for (FeatureBinsCalculator featureBinsCalculator : list) {
            Preconditions.checkArgument(featureBinsCalculator.isNumeric(), "parseUserDefinedNumeric only supports numeric bins!");
            if (null == binDivideType) {
                binDivideType = featureBinsCalculator.getBinDivideType();
                bool = featureBinsCalculator.getLeftOpen();
            } else {
                Preconditions.checkArgument(binDivideType.equals(featureBinsCalculator.getBinDivideType()), "Features have different BinDivideType!");
                Preconditions.checkArgument(bool.equals(featureBinsCalculator.getLeftOpen()), "Features have different leftOpen params!");
            }
        }
        return Tuple2.of(binDivideType, bool);
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static DataSet<FeatureBinsCalculator> setFeatureBinsTotalAndWoe(DataSet<FeatureBinsCalculator> dataSet, BatchOperator batchOperator, Params params) {
        Preconditions.checkArgument(TableUtil.findColIndex((String[]) params.get(SELECTED_COLS), (String) params.get(LABEL_COL)) < 0, "labelCol is included in selectedCols");
        return WoeTrainBatchOp.setFeatureBinsWoe(dataSet, new WoeTrainBatchOp(params).linkFrom((BatchOperator<?>[]) new BatchOperator[]{batchOperator}).getDataSet());
    }

    static DataSet<FeatureBinsCalculator> setFeatureBinsTotal(DataSet<FeatureBinsCalculator> dataSet, BatchOperator batchOperator, final String[] strArr) {
        return dataSet.map(new MapFunction<FeatureBinsCalculator, Tuple2<String, FeatureBinsCalculator>>() { // from class: com.alibaba.alink.operator.batch.feature.BinningTrainBatchOp.1
            private static final long serialVersionUID = 5564348245552253677L;

            public Tuple2<String, FeatureBinsCalculator> map(FeatureBinsCalculator featureBinsCalculator) {
                return Tuple2.of(featureBinsCalculator.getFeatureName(), featureBinsCalculator);
            }
        }).join(StringIndexerUtil.countTokens(batchOperator.select(strArr).getDataSet(), true).groupBy(new int[]{0}).reduceGroup(new GroupReduceFunction<Tuple3<Integer, String, Long>, Tuple2<String, Map<Long, Long>>>() { // from class: com.alibaba.alink.operator.batch.feature.BinningTrainBatchOp.2
            private static final long serialVersionUID = -3648772506771320958L;

            public void reduce(Iterable<Tuple3<Integer, String, Long>> iterable, Collector<Tuple2<String, Map<Long, Long>>> collector) {
                String str = null;
                HashMap hashMap = new HashMap();
                for (Tuple3<Integer, String, Long> tuple3 : iterable) {
                    str = strArr[((Integer) tuple3.f0).intValue()];
                    hashMap.put(Long.valueOf((String) tuple3.f1), tuple3.f2);
                }
                if (null != str) {
                    collector.collect(Tuple2.of(str, hashMap));
                }
            }
        }).name("GetBinTotalMap")).where(new int[]{0}).equalTo(new int[]{0}).with(new JoinFunction<Tuple2<String, FeatureBinsCalculator>, Tuple2<String, Map<Long, Long>>, FeatureBinsCalculator>() { // from class: com.alibaba.alink.operator.batch.feature.BinningTrainBatchOp.3
            private static final long serialVersionUID = -7326232704777819964L;

            public FeatureBinsCalculator join(Tuple2<String, FeatureBinsCalculator> tuple2, Tuple2<String, Map<Long, Long>> tuple22) {
                FeatureBinsCalculator featureBinsCalculator = (FeatureBinsCalculator) tuple2.f1;
                featureBinsCalculator.setTotal((Map) tuple22.f1);
                return featureBinsCalculator;
            }
        }).name("SetBinTotal");
    }

    private static Params generateNumericTrainParams(Params params, String[] strArr) {
        return new Params().set((ParamInfo<ParamInfo<String[]>>) QuantileDiscretizerTrainParams.SELECTED_COLS, (ParamInfo<String[]>) strArr).set((ParamInfo<ParamInfo<Integer[]>>) QuantileDiscretizerTrainParams.NUM_BUCKETS_ARRAY, (ParamInfo<Integer[]>) getValueArray(params, BinningTrainParams.NUM_BUCKETS, BinningTrainParams.NUM_BUCKETS_ARRAY, BinningTrainParams.NUM_BUCKETS_MAP, strArr)).set((ParamInfo<ParamInfo<Boolean>>) QuantileDiscretizerTrainParams.LEFT_OPEN, (ParamInfo<Boolean>) params.get(BinningTrainParams.LEFT_OPEN));
    }

    private static OneHotEncoderModel setOneHotModelData(BatchOperator<?> batchOperator, String[] strArr, long j) {
        OneHotEncoderModel oneHotEncoderModel = new OneHotEncoderModel(encodeIndexForWoeTrainParams(strArr, j));
        oneHotEncoderModel.setModelData(batchOperator);
        return oneHotEncoderModel;
    }

    static QuantileDiscretizerModel setQuantileDiscretizerModelData(BatchOperator<?> batchOperator, String[] strArr, long j) {
        QuantileDiscretizerModel quantileDiscretizerModel = new QuantileDiscretizerModel(encodeIndexForWoeTrainParams(strArr, j));
        quantileDiscretizerModel.setModelData(batchOperator);
        return quantileDiscretizerModel;
    }

    public static Params encodeIndexForWoeTrainParams(String[] strArr, long j) {
        return new Params().set((ParamInfo<ParamInfo<Long>>) HasMLEnvironmentId.ML_ENVIRONMENT_ID, (ParamInfo<Long>) Long.valueOf(j)).set((ParamInfo<ParamInfo<String[]>>) HasSelectedCols.SELECTED_COLS, (ParamInfo<String[]>) strArr).set((ParamInfo<ParamInfo<HasHandleInvalid.HandleInvalid>>) HasHandleInvalid.HANDLE_INVALID, (ParamInfo<HasHandleInvalid.HandleInvalid>) HasHandleInvalid.HandleInvalid.KEEP).set((ParamInfo<ParamInfo<Boolean>>) HasDropLast.DROP_LAST, (ParamInfo<Boolean>) false).set((ParamInfo<ParamInfo<HasEncodeWithoutWoe.Encode>>) HasEncodeWithoutWoe.ENCODE, (ParamInfo<HasEncodeWithoutWoe.Encode>) HasEncodeWithoutWoe.Encode.INDEX);
    }

    private static void writePreciseVizData(BatchOperator batchOperator, DataSet<FeatureBinsCalculator> dataSet, Params params, VizDataWriterInterface vizDataWriterInterface) {
        Tuple2<List<String>, List<String>> distinguishNumericDiscrete = BinningModelMapper.distinguishNumericDiscrete((String[]) params.get(BinningTrainParams.SELECTED_COLS), batchOperator.getSchema());
        if (((List) distinguishNumericDiscrete.f0).size() > 0) {
            params.set((ParamInfo<ParamInfo<String[]>>) BinningTrainParams.SELECTED_COLS, (ParamInfo<String[]>) ((List) distinguishNumericDiscrete.f0).toArray(new String[0]));
            SingleInputUdfOperator withBroadcastSet = dataSet.flatMap(new RichFlatMapFunction<FeatureBinsCalculator, FeatureBinsCalculator>() { // from class: com.alibaba.alink.operator.batch.feature.BinningTrainBatchOp.4
                private static final long serialVersionUID = -1054858627945256161L;

                public void flatMap(FeatureBinsCalculator featureBinsCalculator, Collector<FeatureBinsCalculator> collector) {
                    BinTypes.ColType colType = featureBinsCalculator.getColType();
                    if (colType.isNumeric) {
                        Number[] splitsArray = featureBinsCalculator.getSplitsArray();
                        boolean equals = colType.equals(BinTypes.ColType.FLOAT);
                        for (Number number : splitsArray) {
                            equals |= (number instanceof Double) || (number instanceof Float);
                        }
                        TreeSet<Number> generateCloseBucket = BinningTrainBatchOp.generateCloseBucket(splitsArray, equals);
                        String featureName = featureBinsCalculator.getFeatureName();
                        TableSummary tableSummary = (TableSummary) getRuntimeContext().getBroadcastVariable("summary").get(0);
                        generateCloseBucket.addAll(BinningTrainBatchOp.generateGivenNumberBucket(tableSummary.minDouble(featureName), tableSummary.maxDouble(featureName), equals));
                        collector.collect(FeatureBinsCalculator.createNumericCalculator(featureBinsCalculator.getBinDivideType(), featureBinsCalculator.getFeatureName(), FeatureBinsUtil.getFlinkType(featureBinsCalculator.getFeatureType()), (Number[]) generateCloseBucket.toArray(new Number[0]), featureBinsCalculator.getLeftOpen()));
                    }
                }

                public /* bridge */ /* synthetic */ void flatMap(Object obj, Collector collector) throws Exception {
                    flatMap((FeatureBinsCalculator) obj, (Collector<FeatureBinsCalculator>) collector);
                }
            }).withBroadcastSet(StatisticsHelper.summary(batchOperator, (String[]) ((List) distinguishNumericDiscrete.f0).toArray(new String[0])), "summary");
            writePreciseVizData(featureBinsStatistics(withBroadcastSet, setQuantileDiscretizerModelData(BatchOperator.fromTable(DataSetConversionUtil.toTable(batchOperator.getMLEnvironmentId(), QuantileDiscretizerTrainBatchOp.transformFeatureBinsToModel(withBroadcastSet), new QuantileDiscretizerModelDataConverter().getModelSchema())), (String[]) params.get(BinningTrainParams.SELECTED_COLS), batchOperator.getMLEnvironmentId().longValue()).transform((BatchOperator<?>) batchOperator), params), dataSet, (String[]) params.get(BinningTrainParams.SELECTED_COLS), vizDataWriterInterface);
        }
    }

    static List<Number> generateGivenNumberBucket(double d, double d2, boolean z) {
        ArrayList arrayList = new ArrayList();
        if (z) {
            IntervalCalculator create = IntervalCalculator.create(new double[]{d - 0.1d, d2 + 0.1d}, BUCKET_NUMBER);
            double doubleValue = create.getLeftBound().doubleValue();
            double doubleValue2 = create.getStep().doubleValue();
            for (int i = 0; i < create.n; i++) {
                arrayList.add(Double.valueOf(doubleValue));
                doubleValue += doubleValue2;
            }
        } else {
            IntervalCalculator create2 = IntervalCalculator.create(new long[]{((long) d) - 1, ((long) d2) + 1}, BUCKET_NUMBER);
            long longValue = create2.getLeftBound().longValue();
            long longValue2 = create2.getStep().longValue();
            for (int i2 = 0; i2 < create2.getCount().length; i2++) {
                arrayList.add(Long.valueOf(longValue));
                longValue += longValue2;
            }
        }
        return arrayList;
    }

    static TreeSet<Number> generateCloseBucket(Number[] numberArr, boolean z) {
        TreeSet<Number> treeSet = new TreeSet<>();
        if (z) {
            for (Number number : numberArr) {
                treeSet.add(Double.valueOf(number.doubleValue()));
                boolean z2 = number.doubleValue() < Criteria.INVALID_GAIN;
                double abs = Math.abs(number.doubleValue());
                if (abs < 1.0d) {
                    int i = 1;
                    while (true) {
                        int i2 = i;
                        double d = abs * i2;
                        if (Double.compare(d, Math.floor(d)) != 0) {
                            double floor = Math.floor(d);
                            treeSet.add(Double.valueOf(z2 ? (-floor) / i2 : floor / i2));
                            treeSet.add(Double.valueOf((z2 ? -(floor + 1.0d) : floor + 1.0d) / i2));
                            i = i2 * 10;
                        }
                    }
                } else {
                    long abs2 = Math.abs(number.longValue());
                    long j = 1;
                    while (true) {
                        long j2 = j;
                        if (j2 <= abs * 10.0d) {
                            long j3 = (abs2 / j2) * j2;
                            treeSet.add(Double.valueOf(z2 ? -j3 : j3));
                            treeSet.add(Double.valueOf(z2 ? -(j3 + j2) : j3 + j2));
                            j = j2 * 10;
                        }
                    }
                }
            }
        } else {
            for (Number number2 : numberArr) {
                treeSet.add(Long.valueOf(number2.longValue()));
                boolean z3 = number2.longValue() < 0;
                long abs3 = Math.abs(number2.longValue());
                long j4 = 1;
                while (true) {
                    long j5 = j4;
                    if (j5 <= abs3 * 10) {
                        long j6 = (abs3 / j5) * j5;
                        treeSet.add(Long.valueOf(z3 ? -j6 : j6));
                        treeSet.add(Long.valueOf(z3 ? -(j6 + j5) : j6 + j5));
                        j4 = j5 * 10;
                    }
                }
            }
        }
        return treeSet;
    }

    private static void writePreciseVizData(DataSet<FeatureBinsCalculator> dataSet, DataSet<FeatureBinsCalculator> dataSet2, final String[] strArr, final VizDataWriterInterface vizDataWriterInterface) {
        HashMap hashMap = new HashMap();
        long j = DATA_ID_MAP + 1;
        for (String str : strArr) {
            long j2 = j;
            j = j2 + 1;
            hashMap.put(hashMap, Long.valueOf(j2));
        }
        vizDataWriterInterface.writeBatchData(DATA_ID_MAP, JsonConverter.toJson(hashMap), System.currentTimeMillis());
        DataSetUtil.linkDummySink(dataSet.join(dataSet2).where(new FeatureBinsKey()).equalTo(new FeatureBinsKey()).with(new JoinFunction<FeatureBinsCalculator, FeatureBinsCalculator, Row>() { // from class: com.alibaba.alink.operator.batch.feature.BinningTrainBatchOp.5
            private static final long serialVersionUID = 4855360130213173442L;

            public Row join(FeatureBinsCalculator featureBinsCalculator, FeatureBinsCalculator featureBinsCalculator2) throws Exception {
                Preconditions.checkArgument(featureBinsCalculator.isNumeric(), "Precise only support numeric bins!");
                featureBinsCalculator.calcStatistics();
                List asList = Arrays.asList(featureBinsCalculator2.getSplitsArray());
                Number[] splitsArray = featureBinsCalculator.getSplitsArray();
                Bins bin = featureBinsCalculator.getBin();
                ArrayList arrayList = new ArrayList();
                ArrayList arrayList2 = new ArrayList();
                Integer num = null;
                int i = 0;
                int i2 = -1;
                for (int i3 = 0; i3 < bin.normBins.size(); i3++) {
                    Bins.BaseBin baseBin = bin.normBins.get(i3);
                    if (baseBin.getTotal().longValue() > 0 || i2 != 0 || asList.contains(splitsArray[i3 - 1])) {
                        i2 = baseBin.getTotal().intValue();
                        i += i2;
                        if (featureBinsCalculator.getPositiveTotal() != null) {
                            num = Integer.valueOf(null == num ? baseBin.getPositive().intValue() : num.intValue() + baseBin.getPositive().intValue());
                        }
                        arrayList.add(new IntervalStatistics(Integer.valueOf(i), num));
                        if (i3 > 0) {
                            arrayList2.add(splitsArray[i3 - 1]);
                        }
                    }
                }
                int findColIndexWithAssert = TableUtil.findColIndexWithAssert(strArr, featureBinsCalculator.getFeatureName());
                Params params = new Params().set("Interval", arrayList2);
                params.set(ClassificationEvaluationUtil.STATISTICS_OUTPUT, arrayList);
                vizDataWriterInterface.writeBatchData(BinningTrainBatchOp.DATA_ID_MAP + 1 + findColIndexWithAssert, params.toJson(), System.currentTimeMillis());
                return new Row(1);
            }
        }));
    }

    private static void writeVizData(DataSet<FeatureBinsCalculator> dataSet, final VizDataWriterInterface vizDataWriterInterface, final String[] strArr) {
        DataSetUtil.linkDummySink(dataSet.mapPartition(new MapPartitionFunction<FeatureBinsCalculator, Row>() { // from class: com.alibaba.alink.operator.batch.feature.BinningTrainBatchOp.6
            private static final long serialVersionUID = 1298967177624132843L;

            public void mapPartition(Iterable<FeatureBinsCalculator> iterable, Collector<Row> collector) {
                ArrayList arrayList = new ArrayList();
                arrayList.getClass();
                iterable.forEach((v1) -> {
                    r1.add(v1);
                });
                arrayList.sort(new SerializableComparator<FeatureBinsCalculator>() { // from class: com.alibaba.alink.operator.batch.feature.BinningTrainBatchOp.6.1
                    private static final long serialVersionUID = -6390285370495541755L;

                    @Override // java.util.Comparator
                    public int compare(FeatureBinsCalculator featureBinsCalculator, FeatureBinsCalculator featureBinsCalculator2) {
                        return TableUtil.findColIndex(strArr, featureBinsCalculator.getFeatureName()) < TableUtil.findColIndex(strArr, featureBinsCalculator2.getFeatureName()) ? -1 : 1;
                    }
                });
                vizDataWriterInterface.writeBatchData(0L, FeatureBinsUtil.serialize((FeatureBinsCalculator[]) arrayList.toArray(new FeatureBinsCalculator[0])), System.currentTimeMillis());
            }
        }).setParallelism(1).name("WriteFeatureBinsViz"));
    }

    static Integer[] getValueArray(Params params, ParamInfo<Integer> paramInfo, ParamInfo<Integer[]> paramInfo2, ParamInfo<String> paramInfo3, String[] strArr) {
        Integer[] numArr;
        Preconditions.checkArgument((params.contains(paramInfo) && params.contains(paramInfo2)) ? false : true, "It can not set " + paramInfo.getName() + " " + paramInfo2.getName() + " at the same time!");
        Preconditions.checkArgument((params.contains(paramInfo2) && params.contains(paramInfo3)) ? false : true, "It can not set " + paramInfo3.getName() + " " + paramInfo2.getName() + " at the same time!");
        if (params.contains(paramInfo2)) {
            numArr = (Integer[]) params.get(paramInfo2);
            Preconditions.checkArgument(numArr.length == strArr.length, "The length of %s must be equal to the length of train cols!", new Object[]{paramInfo2.getName()});
        } else {
            numArr = new Integer[strArr.length];
            Arrays.fill(numArr, params.get(paramInfo));
            if (params.contains(paramInfo3)) {
                for (Map.Entry<String, Integer> entry : parseInputMap((String) params.get(paramInfo3)).entrySet()) {
                    numArr[TableUtil.findColIndexWithAssertAndHint(strArr, entry.getKey())] = entry.getValue();
                }
            }
        }
        return numArr;
    }

    static Map<String, Integer> parseInputMap(String str) {
        String[] split = str.split(FEATURE_DELIMITER);
        HashMap hashMap = new HashMap();
        for (String str2 : split) {
            String[] split2 = str2.split(KEY_VALUE_DELIMITER);
            Preconditions.checkArgument(split2.length == 2, "Input Map parse fail!");
            hashMap.put(split2[0].trim(), Integer.valueOf(split2[1]));
        }
        return hashMap;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // com.alibaba.alink.operator.batch.utils.WithModelInfoBatchOp
    public BinningModelInfoBatchOp getModelInfoBatchOp() {
        return new BinningModelInfoBatchOp().linkFrom(this);
    }

    @Override // com.alibaba.alink.operator.batch.BatchOperator
    public /* bridge */ /* synthetic */ BinningTrainBatchOp linkFrom(BatchOperator[] batchOperatorArr) {
        return linkFrom((BatchOperator<?>[]) batchOperatorArr);
    }
}
