package com.mayabot.nlp.module.nwd;

import com.mayabot.nlp.Mynlp;
import com.mayabot.nlp.algorithm.collection.dat.DoubleArrayTrieMap;
import com.mayabot.nlp.common.Lists;
import com.mayabot.nlp.common.ParagraphReaderSmart;
import com.mayabot.nlp.common.utils.Characters;
import com.mayabot.nlp.segment.common.BaseSegmentComponent;
import com.mayabot.nlp.segment.lexer.bigram.CoreDictionary;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.TuplesKt;
import kotlin.Unit;
import kotlin.collections.CollectionsKt;
import kotlin.collections.MapsKt;
import kotlin.comparisons.ComparisonsKt;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.math.MathKt;
import kotlin.text.CharsKt;
import org.jetbrains.annotations.NotNull;

/* compiled from: NewWordFindEngine.kt */
@Metadata(mv = {1, 4, 1}, bv = {1, 0, 3}, k = 1, d1 = {"��d\n\u0002\u0018\u0002\n\u0002\u0010��\n��\n\u0002\u0010\b\n\u0002\b\u0003\n\u0002\u0010\u000b\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0010\u000e\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0006\n\u0002\u0010\u0015\n\u0002\b\b\n\u0002\u0018\u0002\n\u0002\b\n\n\u0002\u0010\u0002\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0010\u0007\n\u0002\b\u0003\u0018��2\u00020\u0001B-\u0012\b\b\u0002\u0010\u0002\u001a\u00020\u0003\u0012\b\b\u0002\u0010\u0004\u001a\u00020\u0003\u0012\b\b\u0002\u0010\u0005\u001a\u00020\u0003\u0012\b\b\u0002\u0010\u0006\u001a\u00020\u0007¢\u0006\u0002\u0010\bJ\u0006\u00100\u001a\u000201J\u0006\u00102\u001a\u000201J\u000e\u00103\u001a\u0002012\u0006\u00104\u001a\u00020\u000bJ \u00105\u001a\b\u0012\u0004\u0012\u000207062\b\b\u0002\u00108\u001a\u0002092\b\b\u0002\u0010:\u001a\u000209J\u000e\u0010;\u001a\u0002012\u0006\u00104\u001a\u00020\u000bR&\u0010\t\u001a\u000e\u0012\u0004\u0012\u00020\u000b\u0012\u0004\u0012\u00020\f0\nX\u0086\u000e¢\u0006\u000e\n��\u001a\u0004\b\r\u0010\u000e\"\u0004\b\u000f\u0010\u0010R\u0011\u0010\u0011\u001a\u00020\u0012¢\u0006\b\n��\u001a\u0004\b\u0013\u0010\u0014R\u0014\u0010\u0015\u001a\b\u0012\u0004\u0012\u00020\u000b0\u0016X\u0082\u000e¢\u0006\u0002\n��R\u001a\u0010\u0017\u001a\u00020\u0003X\u0086\u000e¢\u0006\u000e\n��\u001a\u0004\b\u0018\u0010\u0019\"\u0004\b\u001a\u0010\u001bR\u000e\u0010\u0006\u001a\u00020\u0007X\u0082\u0004¢\u0006\u0002\n��R\u0011\u0010\u001c\u001a\u00020\u001d¢\u0006\b\n��\u001a\u0004\b\u001e\u0010\u001fR\u0011\u0010 \u001a\u00020\u001d¢\u0006\b\n��\u001a\u0004\b!\u0010\u001fR\u000e\u0010\u0004\u001a\u00020\u0003X\u0082\u0004¢\u0006\u0002\n��R\u000e\u0010\u0002\u001a\u00020\u0003X\u0082\u0004¢\u0006\u0002\n��R\u001a\u0010\"\u001a\u00020\u0003X\u0086\u000e¢\u0006\u000e\n��\u001a\u0004\b#\u0010\u0019\"\u0004\b$\u0010\u001bR\u000e\u0010%\u001a\u00020&X\u0082\u0004¢\u0006\u0002\n��R\u001a\u0010'\u001a\u00020\u0007X\u0086\u000e¢\u0006\u000e\n��\u001a\u0004\b(\u0010)\"\u0004\b*\u0010+R\u001a\u0010,\u001a\u00020\u0003X\u0086\u000e¢\u0006\u000e\n��\u001a\u0004\b-\u0010\u0019\"\u0004\b.\u0010\u001bR\u000e\u0010/\u001a\u00020\u001dX\u0082\u0004¢\u0006\u0002\n��¨\u0006<"}, d2 = {"Lcom/mayabot/nlp/module/nwd/NewWordFindEngine;", "", "minGroup", "", "maxGroup", "minOccurCount", "excludeCoreDict", "", "(IIIZ)V", "candidateMap", "Ljava/util/HashMap;", "", "Lcom/mayabot/nlp/module/nwd/WordInfo;", "getCandidateMap", "()Ljava/util/HashMap;", "setCandidateMap", "(Ljava/util/HashMap;)V", "coreDictionary", "Lcom/mayabot/nlp/segment/lexer/bigram/CoreDictionary;", "getCoreDictionary", "()Lcom/mayabot/nlp/segment/lexer/bigram/CoreDictionary;", "dict", "Lcom/mayabot/nlp/algorithm/collection/dat/DoubleArrayTrieMap;", "docCount", "getDocCount", "()I", "setDocCount", "(I)V", "filterContainsChar", "", "getFilterContainsChar", "()[I", "filterStartChar", "getFilterStartChar", "partCount", "getPartCount", "setPartCount", "topWordCounter", "Lcom/mayabot/nlp/module/nwd/TopCounter;", "verbose", "getVerbose", "()Z", "setVerbose", "(Z)V", "ziCountTotal", "getZiCountTotal", "setZiCountTotal", "ziFreqArray", "endSecond", "", "finishFirst", "firstScan", "document", "result", "Ljava/util/ArrayList;", "Lcom/mayabot/nlp/module/nwd/NewWord;", "minMi", "", "minEntropy", "secondScan", "mynlp"})
/* loaded from: input_file:com/mayabot/nlp/module/nwd/NewWordFindEngine.class */
public final class NewWordFindEngine {
    private int docCount;

    @NotNull
    private final CoreDictionary coreDictionary;
    private int ziCountTotal;
    private int partCount;
    private final int[] ziFreqArray;
    private final TopCounter topWordCounter;

    @NotNull
    private HashMap<String, WordInfo> candidateMap;

    @NotNull
    private final int[] filterStartChar;

    @NotNull
    private final int[] filterContainsChar;
    private DoubleArrayTrieMap<String> dict;
    private boolean verbose;
    private final int minGroup;
    private final int maxGroup;
    private final boolean excludeCoreDict;

    public final int getDocCount() {
        return this.docCount;
    }

    public final void setDocCount(int i) {
        this.docCount = i;
    }

    @NotNull
    public final CoreDictionary getCoreDictionary() {
        return this.coreDictionary;
    }

    public final int getZiCountTotal() {
        return this.ziCountTotal;
    }

    public final void setZiCountTotal(int i) {
        this.ziCountTotal = i;
    }

    public final int getPartCount() {
        return this.partCount;
    }

    public final void setPartCount(int i) {
        this.partCount = i;
    }

    @NotNull
    public final HashMap<String, WordInfo> getCandidateMap() {
        return this.candidateMap;
    }

    public final void setCandidateMap(@NotNull HashMap<String, WordInfo> hashMap) {
        Intrinsics.checkNotNullParameter(hashMap, "<set-?>");
        this.candidateMap = hashMap;
    }

    @NotNull
    public final int[] getFilterStartChar() {
        return this.filterStartChar;
    }

    @NotNull
    public final int[] getFilterContainsChar() {
        return this.filterContainsChar;
    }

    public final boolean getVerbose() {
        return this.verbose;
    }

    public final void setVerbose(boolean z) {
        this.verbose = z;
    }

    @NotNull
    public final ArrayList<NewWord> result(float f, float f2) {
        ArrayList<NewWord> newArrayListWithExpectedSize = Lists.newArrayListWithExpectedSize(this.candidateMap.size());
        Collection<WordInfo> values = this.candidateMap.values();
        Intrinsics.checkNotNullExpressionValue(values, "candidateMap.values");
        for (WordInfo wordInfo : values) {
            if (wordInfo.getScore() < BaseSegmentComponent.LEVEL5 && wordInfo.getMi() > f && wordInfo.getEntropy() > f2) {
                NewWord newWord = new NewWord(wordInfo.getWord(), wordInfo.getWord().length(), wordInfo.getCount(), wordInfo.getDoc(), wordInfo.getMi(), wordInfo.getMi_avg(), wordInfo.getEntropy(), wordInfo.getLe(), wordInfo.getRe(), wordInfo.getIdf(), wordInfo.isBlock());
                newWord.doScore();
                Unit unit = Unit.INSTANCE;
                newArrayListWithExpectedSize.add(newWord);
            }
        }
        ArrayList<NewWord> arrayList = newArrayListWithExpectedSize;
        if (arrayList.size() > 1) {
            CollectionsKt.sortWith(arrayList, new Comparator<T>() { // from class: com.mayabot.nlp.module.nwd.NewWordFindEngine$result$$inlined$sortByDescending$1
                /* JADX WARN: Multi-variable type inference failed */
                @Override // java.util.Comparator
                public final int compare(T t, T t2) {
                    return ComparisonsKt.compareValues(Float.valueOf(((NewWord) t2).getScore()), Float.valueOf(((NewWord) t).getScore()));
                }
            });
        }
        return newArrayListWithExpectedSize;
    }

    public static /* synthetic */ ArrayList result$default(NewWordFindEngine newWordFindEngine, float f, float f2, int i, Object obj) {
        if ((i & 1) != 0) {
            f = 1.0f;
        }
        if ((i & 2) != 0) {
            f2 = 1.0f;
        }
        return newWordFindEngine.result(f, f2);
    }

    public final void firstScan(@NotNull String str) {
        Intrinsics.checkNotNullParameter(str, "document");
        ParagraphReaderSmart paragraphReaderSmart = new ParagraphReaderSmart(new StringReader(str));
        String next = paragraphReaderSmart.next();
        while (true) {
            String str2 = next;
            if (str2 == null) {
                return;
            }
            char[] charArray = str2.toCharArray();
            Intrinsics.checkNotNullExpressionValue(charArray, "(this as java.lang.String).toCharArray()");
            int length = charArray.length;
            this.ziCountTotal += length;
            String str3 = str2;
            for (int i = 0; i < str3.length(); i++) {
                char charAt = str3.charAt(i);
                int[] iArr = this.ziFreqArray;
                iArr[charAt] = iArr[charAt] + 1;
            }
            for (int i2 = 0; i2 < length; i2++) {
                if (this.filterStartChar[charArray[i2]] != 1) {
                    int i3 = this.minGroup;
                    int i4 = this.maxGroup;
                    if (i3 <= i4) {
                        while (true) {
                            int i5 = i2 + i3;
                            if (i5 <= length && this.filterStartChar[charArray[i5 - 1]] != 1) {
                                boolean z = false;
                                if (i3 <= 5) {
                                    int i6 = i2;
                                    while (true) {
                                        if (i6 >= i5) {
                                            break;
                                        }
                                        if (this.filterStartChar[charArray[i6]] == 1) {
                                            z = true;
                                            break;
                                        }
                                        i6++;
                                    }
                                }
                                if (!z) {
                                    int i7 = i2;
                                    while (true) {
                                        if (i7 >= i5) {
                                            break;
                                        }
                                        if (this.filterContainsChar[charArray[i7]] == 1) {
                                            z = true;
                                            break;
                                        }
                                        i7++;
                                    }
                                    if (!z) {
                                        String substring = str2.substring(i2, i5);
                                        Intrinsics.checkNotNullExpressionValue(substring, "(this as java.lang.Strin…ing(startIndex, endIndex)");
                                        if (!this.excludeCoreDict || !this.coreDictionary.contains(substring)) {
                                            this.topWordCounter.put(substring);
                                            this.partCount++;
                                        }
                                    }
                                }
                            }
                            if (i3 != i4) {
                                i3++;
                            }
                        }
                    }
                }
            }
            next = paragraphReaderSmart.next();
        }
    }

    public final void finishFirst() {
        this.topWordCounter.clean();
        TreeMap treeMap = new TreeMap();
        for (Map.Entry<String, IntCount> entry : this.topWordCounter.getData().entrySet()) {
            if (!this.excludeCoreDict || !this.coreDictionary.contains(entry.getKey())) {
                treeMap.put(entry.getKey(), "");
                this.candidateMap.put(entry.getKey(), new WordInfo(entry.getKey()));
            }
        }
        this.dict = new DoubleArrayTrieMap<>(treeMap);
    }

    public final void secondScan(@NotNull String str) {
        Intrinsics.checkNotNullParameter(str, "document");
        this.docCount++;
        ParagraphReaderSmart paragraphReaderSmart = new ParagraphReaderSmart(new StringReader(str));
        String next = paragraphReaderSmart.next();
        while (true) {
            String str2 = next;
            if (str2 == null) {
                return;
            }
            int length = str2.length();
            int i = 0;
            while (i < length) {
                if (this.filterStartChar[str2.charAt(i)] != 1) {
                    int i2 = this.minGroup;
                    int i3 = this.maxGroup;
                    if (i2 <= i3) {
                        while (true) {
                            int i4 = i + i2;
                            if (i4 <= length && this.dict.get(str2, i, i4 - i) != null) {
                                if (str2 == null) {
                                    throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                                }
                                String substring = str2.substring(i, i4);
                                Intrinsics.checkNotNullExpressionValue(substring, "(this as java.lang.Strin…ing(startIndex, endIndex)");
                                WordInfo wordInfo = this.candidateMap.get(substring);
                                if (wordInfo != null) {
                                    wordInfo.setCount(wordInfo.getCount() + 1);
                                    char charAt = i >= 1 ? str2.charAt(i - 1) : '^';
                                    char charAt2 = i4 < length ? str2.charAt(i4) : '$';
                                    ValueObjectsKt.addTo(wordInfo.getLeft(), charAt, 1);
                                    ValueObjectsKt.addTo(wordInfo.getRight(), charAt2, 1);
                                    wordInfo.getDocSet().add(Integer.valueOf(this.docCount));
                                    if ((charAt == 8220 && charAt2 == 8221) || ((charAt == '\"' && charAt2 == '\"') || (charAt == 12298 && charAt2 == 12299))) {
                                        wordInfo.setBlock(true);
                                    }
                                }
                            }
                            if (i2 != i3) {
                                i2++;
                            }
                        }
                    } else {
                        continue;
                    }
                }
                i++;
            }
            next = paragraphReaderSmart.next();
        }
    }

    public final void endSecond() {
        double d = this.docCount;
        double d2 = this.ziCountTotal;
        Collection<WordInfo> values = this.candidateMap.values();
        Intrinsics.checkNotNullExpressionValue(values, "candidateMap.values");
        for (WordInfo wordInfo : values) {
            float count = wordInfo.getCount() / this.ziCountTotal;
            float f = 1.0f;
            for (int i = 0; i < wordInfo.getWord().length(); i++) {
                f *= this.ziFreqArray[r0.charAt(i)] / this.ziCountTotal;
            }
            wordInfo.setMi(MathKt.log2(count / f));
            wordInfo.setMi_avg(wordInfo.getMi() / wordInfo.getWord().length());
            wordInfo.entropy();
            wordInfo.tfIdf(d, d2);
        }
    }

    public NewWordFindEngine(int i, int i2, int i3, boolean z) {
        this.minGroup = i;
        this.maxGroup = i2;
        this.excludeCoreDict = z;
        this.coreDictionary = (CoreDictionary) Mynlp.Companion.instance().getInstance(CoreDictionary.class);
        this.ziFreqArray = new int[65535];
        this.topWordCounter = new TopCounter(2000000, i3);
        this.candidateMap = new HashMap<>();
        this.filterStartChar = new int[65535];
        this.filterContainsChar = new int[65535];
        this.dict = new DoubleArrayTrieMap<>(new TreeMap(MapsKt.sortedMapOf(new Pair[]{TuplesKt.to("a", "")})));
        this.verbose = true;
        for (int i4 = 0; i4 < 65535; i4++) {
            this.filterStartChar[i4] = 0;
            this.filterContainsChar[i4] = 0;
            char c = (char) i4;
            if (Characters.isPunctuation(c)) {
                this.filterStartChar[i4] = 1;
                this.filterContainsChar[i4] = 1;
            } else if (Characters.isASCII(c)) {
                this.filterStartChar[i4] = 1;
                this.filterContainsChar[i4] = 1;
            } else if (CharsKt.isWhitespace(c)) {
                this.filterStartChar[i4] = 1;
                this.filterContainsChar[i4] = 1;
            }
        }
        char[] charArray = "˦�来将就这的了和与想我你他为或是对并以于由有个之在把等再从及".toCharArray();
        Intrinsics.checkNotNullExpressionValue(charArray, "(this as java.lang.String).toCharArray()");
        for (char c2 : charArray) {
            this.filterStartChar[c2] = 1;
        }
    }

    public /* synthetic */ NewWordFindEngine(int i, int i2, int i3, boolean z, int i4, DefaultConstructorMarker defaultConstructorMarker) {
        this((i4 & 1) != 0 ? 3 : i, (i4 & 2) != 0 ? 12 : i2, (i4 & 4) != 0 ? 5 : i3, (i4 & 8) != 0 ? true : z);
    }

    public NewWordFindEngine() {
        this(0, 0, 0, false, 15, null);
    }
}
