package com.alibaba.alink.operator.common.nlp;

import com.alibaba.alink.common.mapper.SISOMapper;
import com.alibaba.alink.params.nlp.StopWordsRemoverParams;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashSet;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.ml.api.misc.param.Params;
import org.apache.flink.table.api.TableSchema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/alibaba/alink/operator/common/nlp/StopWordsRemoverMapper.class */
public class StopWordsRemoverMapper extends SISOMapper {
    private static final long serialVersionUID = -1436770924854778074L;
    private transient HashSet<String> stopWordsSet;
    private final boolean caseSensitive;
    private static final Logger LOG = LoggerFactory.getLogger(StopWordsRemoverMapper.class);

    public StopWordsRemoverMapper(TableSchema tableSchema, Params params) {
        super(tableSchema, params);
        this.stopWordsSet = null;
        this.caseSensitive = ((Boolean) this.params.get(StopWordsRemoverParams.CASE_SENSITIVE)).booleanValue();
    }

    @Override // com.alibaba.alink.common.mapper.Mapper
    public void open() {
        this.stopWordsSet = new HashSet<>();
        String[] strArr = (String[]) this.params.get(StopWordsRemoverParams.STOP_WORDS);
        if (null != strArr) {
            for (String str : strArr) {
                this.stopWordsSet.add(this.caseSensitive ? str : str.toLowerCase());
            }
        }
        loadDefaultStopWords();
    }

    private void loadDefaultStopWords() {
        InputStream resourceAsStream = getClass().getResourceAsStream(TextRankConst.STOPWORDDICT);
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(resourceAsStream));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    resourceAsStream.close();
                    return;
                } else if (readLine.length() > 0) {
                    this.stopWordsSet.add(this.caseSensitive ? readLine : readLine.toLowerCase());
                }
            }
        } catch (IOException e) {
            LOG.warn("Load default stopWords failure!");
        }
    }

    @Override // com.alibaba.alink.common.mapper.SISOMapper
    protected Object mapColumn(Object obj) {
        if (null == obj) {
            return null;
        }
        StringBuilder sb = new StringBuilder();
        for (String str : ((String) obj).split(" ")) {
            if (!str.isEmpty()) {
                if (!this.stopWordsSet.contains(this.caseSensitive ? str : str.toLowerCase())) {
                    sb.append(str).append(" ");
                }
            }
        }
        return sb.toString().trim();
    }

    @Override // com.alibaba.alink.common.mapper.SISOMapper
    protected TypeInformation initOutputColType() {
        return Types.STRING;
    }
}
