package com.alibaba.alink.operator.common.nlp.bert.tokenizer;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

/* loaded from: input_file:com/alibaba/alink/operator/common/nlp/bert/tokenizer/TokenizationUtils.class */
public class TokenizationUtils {
    private static final Set<Byte> CONTROL_CATEGORY_SET = new HashSet(Arrays.asList((byte) 15, (byte) 16, (byte) 18, (byte) 19, (byte) 0));
    private static final Set<Byte> PUNCTUATION_CATEGORY_SET = new HashSet(Arrays.asList((byte) 23, (byte) 20, (byte) 22, (byte) 30, (byte) 29, (byte) 24, (byte) 21));

    public static boolean isControl(int i) {
        if (i == 9 || i == 10 || i == 13) {
            return false;
        }
        return CONTROL_CATEGORY_SET.contains(Byte.valueOf((byte) Character.getType(i)));
    }

    public static boolean isWhitespace(int i) {
        return i == 32 || i == 9 || i == 10 || i == 13 || 12 == Character.getType(i);
    }

    public static boolean isPunctuation(int i) {
        if (i >= 33 && i <= 47) {
            return true;
        }
        if (i >= 58 && i <= 64) {
            return true;
        }
        if (i >= 91 && i <= 96) {
            return true;
        }
        if (i >= 123 && i <= 126) {
            return true;
        }
        return PUNCTUATION_CATEGORY_SET.contains(Byte.valueOf((byte) Character.getType(i)));
    }

    public static String[] whitespaceTokenize(String str) {
        return str.trim().split("\\s");
    }
}
