package com.mayabot.nlp.segment.plugins.atom;

import com.mayabot.nlp.algorithm.collection.dat.DoubleArrayTrieStringIntMap;
import com.mayabot.nlp.algorithm.collection.dat.FastDatCharSet;
import com.mayabot.nlp.common.injector.Singleton;
import com.mayabot.nlp.segment.Nature;
import com.mayabot.nlp.segment.WordSplitAlgorithm;
import com.mayabot.nlp.segment.common.BaseSegmentComponent;
import com.mayabot.nlp.segment.common.String2;
import com.mayabot.nlp.segment.wordnet.Wordnet;
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import kotlin.Metadata;
import kotlin.jvm.internal.Intrinsics;
import org.jetbrains.annotations.NotNull;

/* compiled from: AtomSplitAlgorithm.kt */
@Singleton
@Metadata(mv = {1, 4, 1}, bv = {1, 0, 3}, k = 1, d1 = {"��8\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0010\u0002\n��\n\u0002\u0018\u0002\n��\b\u0007\u0018��2\u00020\u00012\u00020\u0002B\r\u0012\u0006\u0010\u0003\u001a\u00020\u0004¢\u0006\u0002\u0010\u0005J\u0010\u0010\u0010\u001a\u00020\u00112\u0006\u0010\u0012\u001a\u00020\u0013H\u0016R\u000e\u0010\u0006\u001a\u00020\u0007X\u0082\u0004¢\u0006\u0002\n��R\u0011\u0010\b\u001a\u00020\t¢\u0006\b\n��\u001a\u0004\b\n\u0010\u000bR\u0016\u0010\f\u001a\n \u000e*\u0004\u0018\u00010\r0\rX\u0082\u0004¢\u0006\u0002\n��R\u0016\u0010\u000f\u001a\n \u000e*\u0004\u0018\u00010\r0\rX\u0082\u0004¢\u0006\u0002\n��¨\u0006\u0014"}, d2 = {"Lcom/mayabot/nlp/segment/plugins/atom/AtomSplitAlgorithm;", "Lcom/mayabot/nlp/segment/common/BaseSegmentComponent;", "Lcom/mayabot/nlp/segment/WordSplitAlgorithm;", "templateProvider", "Lcom/mayabot/nlp/segment/plugins/atom/AtomSplitAlgorithmTemplateProvider;", "(Lcom/mayabot/nlp/segment/plugins/atom/AtomSplitAlgorithmTemplateProvider;)V", "chineseNumSet", "Lcom/mayabot/nlp/algorithm/collection/dat/FastDatCharSet;", "dat", "Lcom/mayabot/nlp/algorithm/collection/dat/DoubleArrayTrieStringIntMap;", "getDat", "()Lcom/mayabot/nlp/algorithm/collection/dat/DoubleArrayTrieStringIntMap;", "emailPattern", "Ljava/util/regex/Pattern;", "kotlin.jvm.PlatformType", "xPattern", "fill", "", "wordnet", "Lcom/mayabot/nlp/segment/wordnet/Wordnet;", "mynlp"})
/* loaded from: input_file:com/mayabot/nlp/segment/plugins/atom/AtomSplitAlgorithm.class */
public final class AtomSplitAlgorithm extends BaseSegmentComponent implements WordSplitAlgorithm {

    @NotNull
    private final DoubleArrayTrieStringIntMap dat;
    private final FastDatCharSet chineseNumSet;
    private final Pattern emailPattern;
    private final Pattern xPattern;

    @NotNull
    public final DoubleArrayTrieStringIntMap getDat() {
        return this.dat;
    }

    @Override // com.mayabot.nlp.segment.WordSplitAlgorithm
    public void fill(@NotNull Wordnet wordnet) {
        Intrinsics.checkNotNullParameter(wordnet, "wordnet");
        char[] charArray = wordnet.getCharArray();
        Intrinsics.checkNotNullExpressionValue(charArray, "wordnet.charArray");
        boolean z = false;
        boolean z2 = false;
        boolean z3 = false;
        for (char c : charArray) {
            if (Intrinsics.compare(c, 123) < 0) {
                if (Intrinsics.compare(c, 48) >= 0 && Intrinsics.compare(c, 57) <= 0) {
                    z = true;
                } else if ((Intrinsics.compare(c, 97) >= 0 && Intrinsics.compare(c, 90) <= 0) || (Intrinsics.compare(c, 97) >= 0 && Intrinsics.compare(c, 122) <= 0)) {
                    z2 = true;
                }
            } else if (this.chineseNumSet.contains(c)) {
                z = true;
            }
            if (z2 || z) {
                break;
            }
        }
        boolean z4 = false;
        if (z || z2) {
            char[] copyOf = Arrays.copyOf(charArray, charArray.length);
            int length = charArray.length;
            for (int i = 0; i < length; i++) {
                char c2 = charArray[i];
                if (Intrinsics.compare(c2, 123) < 0) {
                    if (Intrinsics.compare(c2, 48) >= 0 && Intrinsics.compare(c2, 57) <= 0) {
                        copyOf[i] = 'N';
                    } else if ((Intrinsics.compare(c2, 65) >= 0 && Intrinsics.compare(c2, 90) <= 0) || ((Intrinsics.compare(c2, 97) >= 0 && Intrinsics.compare(c2, 122) <= 0) || c2 == '_')) {
                        copyOf[i] = 'A';
                    } else if (c2 == '.') {
                        copyOf[i] = 'N';
                    } else if (c2 == '@') {
                        z3 = true;
                    }
                } else if (this.chineseNumSet.contains(c2)) {
                    copyOf[i] = 'Z';
                }
            }
            DoubleArrayTrieStringIntMap.DATMapLongMatcherInt matchLong = this.dat.matchLong(copyOf, 0);
            int i2 = -1;
            while (matchLong.next()) {
                Intrinsics.checkNotNullExpressionValue(matchLong, "match");
                int value = matchLong.getValue();
                int begin = matchLong.getBegin();
                int length2 = matchLong.getLength();
                switch (value) {
                    case 0:
                        wordnet.put(begin, length2).setAbsWordNatureAndFreq(Nature.t);
                        break;
                    case 1:
                        if (length2 != 1 || charArray[begin] != '.') {
                            if (begin == i2) {
                                z4 = true;
                            }
                            wordnet.put(begin, length2).setAbsWordNatureAndFreq(Nature.m);
                            break;
                        } else {
                            break;
                        }
                    case 2:
                        wordnet.put(begin, length2).setAbsWordNatureAndFreq(Nature.mq);
                        break;
                    case 3:
                        wordnet.put(begin, length2).setAbsWordNatureAndFreq(Nature.x);
                        i2 = begin + length2;
                        break;
                    case 4:
                        if (begin == i2) {
                            z4 = true;
                            break;
                        } else {
                            break;
                        }
                }
            }
            if (z3) {
                Matcher matcher = this.emailPattern.matcher(new String2(copyOf));
                while (matcher.find()) {
                    wordnet.put(matcher.start(), matcher.end() - matcher.start()).setAbsWordNatureAndFreq(Nature.x);
                }
            }
            if (z4) {
                Matcher matcher2 = this.xPattern.matcher(new String2(copyOf));
                while (matcher2.find()) {
                    wordnet.put(matcher2.start(), matcher2.end() - matcher2.start()).setAbsWordNatureAndFreq(Nature.x);
                }
            }
        }
    }

    /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
    public AtomSplitAlgorithm(@NotNull AtomSplitAlgorithmTemplateProvider atomSplitAlgorithmTemplateProvider) {
        super(BaseSegmentComponent.LEVEL2);
        Intrinsics.checkNotNullParameter(atomSplitAlgorithmTemplateProvider, "templateProvider");
        this.dat = atomSplitAlgorithmTemplateProvider.load();
        this.chineseNumSet = new FastDatCharSet(38646, 19968, 20108, 19977, 22235, 20116, 20845, 19971, 20843, 20061, 20004, 22777, 36144, 21441, 32902, 20237, 38470, 26578, 25420, 29590, 25342, 21313, 30334, 21315, 19975, 20159);
        this.emailPattern = Pattern.compile("[NA]+@[NA]+NA+");
        this.xPattern = Pattern.compile("A+[N\\-][N\\-A]*");
    }
}
