package com.alibaba.alink.operator.local.nlp;

import com.alibaba.alink.common.MTable;
import com.alibaba.alink.common.annotation.InputPorts;
import com.alibaba.alink.common.annotation.NameCn;
import com.alibaba.alink.common.annotation.OutputPorts;
import com.alibaba.alink.common.annotation.ParamSelectColumnSpec;
import com.alibaba.alink.common.annotation.PortSpec;
import com.alibaba.alink.common.annotation.PortType;
import com.alibaba.alink.common.annotation.TypeCollections;
import com.alibaba.alink.common.utils.RowCollector;
import com.alibaba.alink.common.utils.TableUtil;
import com.alibaba.alink.operator.common.nlp.DocHashCountVectorizerModelData;
import com.alibaba.alink.operator.common.nlp.DocHashCountVectorizerModelDataConverter;
import com.alibaba.alink.operator.local.LocalOperator;
import com.alibaba.alink.params.nlp.DocHashCountVectorizerTrainParams;
import com.alibaba.alink.pipeline.EstimatorTrainerAnnotation;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.flink.ml.api.misc.param.Params;
import org.apache.flink.shaded.guava18.com.google.common.hash.HashFunction;
import org.apache.flink.shaded.guava18.com.google.common.hash.Hashing;
import org.apache.flink.types.Row;

@InputPorts(values = {@PortSpec(PortType.DATA)})
@OutputPorts(values = {@PortSpec(PortType.MODEL)})
@ParamSelectColumnSpec(name = "selectedCol", allowedTypeCollections = {TypeCollections.STRING_TYPES})
@NameCn("文本哈希特征生成训练")
@EstimatorTrainerAnnotation(estimatorName = "com.alibaba.alink.pipeline.nlp.DocHashCountVectorizer")
/* loaded from: input_file:com/alibaba/alink/operator/local/nlp/DocHashCountVectorizerTrainLocalOp.class */
public class DocHashCountVectorizerTrainLocalOp extends LocalOperator<DocHashCountVectorizerTrainLocalOp> implements DocHashCountVectorizerTrainParams<DocHashCountVectorizerTrainLocalOp> {
    private static final HashFunction HASH = Hashing.murmur3_32(0);

    public DocHashCountVectorizerTrainLocalOp() {
        super(new Params());
    }

    public DocHashCountVectorizerTrainLocalOp(Params params) {
        super(params);
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // com.alibaba.alink.operator.local.LocalOperator
    public DocHashCountVectorizerTrainLocalOp linkFrom(LocalOperator<?>... localOperatorArr) {
        LocalOperator<?> checkAndGetFirst = checkAndGetFirst(localOperatorArr);
        int findColIndexWithAssertAndHint = TableUtil.findColIndexWithAssertAndHint(checkAndGetFirst.getColNames(), getSelectedCol());
        List<Row> rows = checkAndGetFirst.getOutputTable().getRows();
        int intValue = getNumFeatures().intValue();
        HashMap<Integer, Double> hashMap = new HashMap<>(intValue);
        long j = 0;
        Iterator<Row> it = rows.iterator();
        while (it.hasNext()) {
            j++;
            for (String str : ((String) it.next().getField(findColIndexWithAssertAndHint)).split(" ")) {
                hashMap.merge(Integer.valueOf(Math.floorMod(Math.abs(HASH.hashUnencodedChars(str).asInt()), intValue)), Double.valueOf(1.0d), (v0, v1) -> {
                    return Double.sum(v0, v1);
                });
            }
        }
        double minDF = getMinDF();
        String name = getFeatureType().name();
        double minTF = getMinTF();
        double d = minDF >= 1.0d ? minDF : minDF * j;
        Iterator<Map.Entry<Integer, Double>> it2 = hashMap.entrySet().iterator();
        while (it2.hasNext()) {
            Map.Entry<Integer, Double> next = it2.next();
            if (next.getValue().doubleValue() >= d) {
                next.setValue(Double.valueOf(Math.log((j + 1.0d) / (next.getValue().doubleValue() + 1.0d))));
            } else {
                it2.remove();
            }
        }
        DocHashCountVectorizerModelData docHashCountVectorizerModelData = new DocHashCountVectorizerModelData();
        docHashCountVectorizerModelData.numFeatures = intValue;
        docHashCountVectorizerModelData.minTF = minTF;
        docHashCountVectorizerModelData.featureType = name;
        docHashCountVectorizerModelData.idfMap = hashMap;
        RowCollector rowCollector = new RowCollector();
        new DocHashCountVectorizerModelDataConverter().save(docHashCountVectorizerModelData, rowCollector);
        setOutputTable(new MTable(rowCollector.getRows(), new DocHashCountVectorizerModelDataConverter().getModelSchema()));
        return this;
    }

    @Override // com.alibaba.alink.operator.local.LocalOperator
    public /* bridge */ /* synthetic */ DocHashCountVectorizerTrainLocalOp linkFrom(LocalOperator[] localOperatorArr) {
        return linkFrom((LocalOperator<?>[]) localOperatorArr);
    }
}
