/*
 * Decompiled with CFR 0.152.
 */
package nl.mpi.annot.tools.util;

import java.io.FileInputStream;
import java.io.IOException;

public class UTF8Validator {
    private static final boolean OUTPUT = false;
    private static final int[] BITSEQS = new int[]{Integer.parseInt("00000000", 2), Integer.parseInt("11000000", 2), Integer.parseInt("11100000", 2), Integer.parseInt("11110000", 2), Integer.parseInt("11111000", 2), Integer.parseInt("11111100", 2)};
    private static final int[] BITMASKS = new int[]{Integer.parseInt("10000000", 2), Integer.parseInt("11100000", 2), Integer.parseInt("11110000", 2), Integer.parseInt("11111000", 2), Integer.parseInt("11111100", 2), Integer.parseInt("11111110", 2)};
    private static final int CONTSEQ = Integer.parseInt("10000000", 2);
    private static final int CONTMASK = Integer.parseInt("11000000", 2);

    public static int fingerprintChars(String input) {
        int retval = 0;
        int len = input.length();
        for (int i = 0; i < len; ++i) {
            int modulo;
            char ch = Character.toLowerCase(input.charAt(i));
            if (Character.isWhitespace(ch)) {
                retval |= 1;
            }
            retval = (modulo = ch & 0x1F) > 26 ? (retval |= 0x8000000) : (retval |= 1 << modulo);
            if (Character.isDigit(ch)) {
                retval |= 0x10000000;
            }
            if (ch > '\u007f' && ch <= '\u00ff') {
                retval |= 0x20000000;
            }
            if ((ch < 'a' || ch > 'z') && (ch < 'A' || ch > 'Z')) continue;
            retval |= 0x40000000;
        }
        return retval;
    }

    public static int fingerprintLanguages(String input) {
        int retval = 0;
        int len = input.length();
        for (int i = 0; i < len; ++i) {
            char ch = Character.toLowerCase(input.charAt(i));
            int area = 30;
            if (ch <= ' ') {
                area = 0;
                if (ch != '\t' && ch != '\r' && ch != '\n' && ch != ' ') {
                    area = 1;
                }
            } else if (ch <= '\u024f' || ch >= '\u1e00' && ch <= '\u1eff' || ch >= '\u2c60' && ch <= '\u2c7f' || ch >= '\ua720' && ch <= '\ua7ff') {
                area = 2;
            } else if (ch <= '\u02af' || ch >= '\u1d00' && ch <= '\u1dbf') {
                area = 3;
            } else if (ch <= '\u036f' || ch >= '\u1dc0' && ch <= '\u1dff') {
                if (ch >= '\u0300') {
                    area = 4;
                }
            } else {
                area = ch <= '\u03ff' || ch >= '\u1f00' && ch <= '\u1fff' ? 4 : (ch <= '\u052f' || ch >= '\u2de0' && ch <= '\u2dff' || ch >= '\ua640' && ch <= '\ua69f' ? 5 : (ch <= '\u058f' ? 6 : (ch <= '\u05ff' ? 7 : (ch <= '\u06ff' || ch >= '\u0750' && ch <= '\u077f' ? 8 : (ch <= '\u08ff' || ch >= '\u1000' && ch <= '\u109f' ? 9 : (ch <= '\u0dff' ? 10 : (ch <= '\u0eff' ? 11 : (ch <= '\u0fff' ? 12 : (ch <= '\u10ff' || ch >= '\u2d00' && ch <= '\u2d2f' ? 13 : (ch <= '\u11ff' ? 14 : (ch <= '\u139f' || ch >= '\u2d80' && ch <= '\u2ddf' ? 15 : (ch <= '\u16ff' || ch >= '\u1780' && ch <= '\u18ff' ? 16 : (ch <= '\u177f' ? 17 : (ch <= '\u1cff' || ch >= '\u2c00' && ch <= '\u2d7f' ? 18 : (ch <= '\u2bff' || ch >= '\u2e00' && ch <= '\u2e7f' ? 19 : (ch <= '\u32ff' || ch >= '\u9fa6' && ch <= '\u9fff' ? 20 : (ch <= '\u4dff' ? 21 : (ch <= '\u9fff' ? 22 : (ch <= '\ua4cf' ? 23 : (ch <= '\uabff' ? 24 : (ch <= '\ud7ff' ? 25 : (ch <= '\udbff' ? (ch >= '\ud800' && ch <= '\ud83f' ? 26 : (ch >= '\ud840' && ch <= '\ud87f' ? 20 : (ch >= '\ud880' && ch <= '\ud8bf' ? 18 : (ch >= '\udb80' && ch <= '\udbbf' ? 1 : (ch >= '\udbc0' ? 28 : 30))))) : (ch <= '\udfff' ? 27 : (ch <= '\uf8ff' ? 28 : (ch <= '\uffef' ? 29 : 1)))))))))))))))))))))))));
            }
            retval |= 1 << area;
        }
        return retval;
    }

    public static int[] fingerprintBigrams(String input) {
        int previous = 0;
        int[] retval = new int[23];
        for (int i = 0; i < retval.length; ++i) {
            retval[i] = 0;
        }
        int len = input.length();
        for (int i = 0; i < len; ++i) {
            char ch = Character.toLowerCase(input.charAt(i));
            int modulo = ch & 0x1F;
            if (modulo > 26) {
                modulo = 0;
            }
            if (i > 0) {
                int n = previous + modulo >> 5;
                retval[n] = retval[n] | 1 << (previous + modulo & 0x1F);
            }
            previous = modulo * 27;
        }
        return retval;
    }

    public static int[] fingerprintTrigrams(String input) {
        int prevprev = 0;
        int previous = 0;
        int[] retval = new int[54];
        for (int i = 0; i < retval.length; ++i) {
            retval[i] = 0;
        }
        int len = input.length();
        for (int i = 0; i < len; ++i) {
            char ch = Character.toLowerCase(input.charAt(i));
            int modulo = ch % 12;
            if (i > 1) {
                int n = prevprev + previous + modulo >> 5;
                retval[n] = retval[n] | 1 << (prevprev + previous + modulo & 0x1F);
            }
            prevprev = previous * 12;
            previous = modulo * 12;
        }
        return retval;
    }

    public static int[] fingerprint4grams(String input) {
        int i;
        int[] retval = new int[62];
        for (int i2 = 0; i2 < retval.length; ++i2) {
            retval[i2] = 0;
        }
        int len = input.length();
        int[] chars = new int[len];
        for (i = 0; i < len; ++i) {
            chars[i] = Character.toLowerCase(input.charAt(i)) % 251;
        }
        for (i = 0; i < len - 3; ++i) {
            int fnv = -2128831035;
            fnv ^= chars[i];
            fnv *= 16777619;
            fnv ^= chars[i + 1];
            fnv *= 16777619;
            fnv ^= chars[i + 2];
            fnv *= 16777619;
            fnv ^= chars[i + 3];
            fnv = (fnv *= 16777619) > 0 ? fnv : -fnv;
            int n = (fnv %= 1979) >> 5;
            retval[n] = retval[n] | 1 << (fnv & 0x1F);
        }
        return retval;
    }

    public static int binLength(int length) {
        if (length < 32) {
            return length;
        }
        if (length < 48) {
            return (length - 32) / 2 + 32;
        }
        if (length < 80) {
            return (length - 48) / 4 + 40;
        }
        if (length < 112) {
            return (length - 80) / 8 + 48;
        }
        if (length < 176) {
            return (length - 112) / 16 + 52;
        }
        if (length < 304) {
            return (length - 176) / 32 + 56;
        }
        if (length < 432) {
            return (length - 304) / 64 + 60;
        }
        return 62;
    }

    public static String hexifyBits(int[] bits) {
        StringBuilder retval = new StringBuilder(3 + bits.length * 32);
        for (int i = bits.length - 1; i >= 0; --i) {
            String part = Integer.toHexString(bits[i]);
            for (int pad = 0; pad < 8 - part.length(); ++pad) {
                retval.append('0');
            }
            retval.append(part);
        }
        return retval.toString();
    }

    public static String stringifyBits(int[] bits) {
        StringBuilder retval = new StringBuilder(3 + bits.length * 32);
        for (int i = bits.length - 1; i >= 0; --i) {
            retval.append((bits[i] & Integer.MIN_VALUE) != 0 ? (char)'1' : '0');
            for (int bit = 0x40000000; bit != 0; bit >>>= 1) {
                retval.append((bits[i] & bit) != 0 ? (char)'1' : '0');
            }
        }
        return retval.toString();
    }

    public static int validateUTF8(byte[] buf, int size, boolean allowMiddle) {
        int count = 0;
        int follow = 0;
        for (int pos = 0; pos < size; ++pos) {
            int b = UTF8Validator.unsignedByteToInt(buf[pos]);
            if (follow == 0 && b >= 128) {
                for (int i = 1; i <= 5; ++i) {
                    if ((b & BITMASKS[i]) != BITSEQS[i]) continue;
                    follow = i;
                    break;
                }
                if (follow != 0 || allowMiddle && (b & CONTMASK) == CONTSEQ) continue;
                follow = 0;
                count = -1;
                continue;
            }
            if (follow > 0) {
                if ((b & CONTMASK) == CONTSEQ) {
                    --follow;
                    if (count <= -1) continue;
                    ++count;
                    continue;
                }
                follow = 0;
                count = -1;
                continue;
            }
            allowMiddle = false;
        }
        return count;
    }

    public static int unsignedByteToInt(byte b) {
        return b & 0xFF;
    }

    public static boolean validateFile(String filename) {
        boolean valid = true;
        try {
            FileInputStream stream = new FileInputStream(filename);
            byte[] buf = new byte[1000];
            while (stream.available() > 0) {
                stream.read(buf);
                if (UTF8Validator.validateUTF8(buf, 1000, true) >= 0) continue;
                valid = false;
            }
            stream.close();
        }
        catch (IOException e) {
            System.out.println("UTF8Validator error for: " + filename);
            valid = false;
        }
        return valid;
    }

    public static void main(String[] args) {
        String test = args.length > 0 ? args[0] : "example";
        long unibits = UTF8Validator.fingerprintChars(test);
        int len = UTF8Validator.binLength(test.length());
        System.out.println("Fingerprinting '" + test + "': Length slot " + len);
        int[] uni = new int[]{(int)(unibits & 0xFFFFFFFFFFFFFFFFL), (int)(unibits >> 32 & 0xFFFFFFFFFFFFFFFFL)};
        System.out.print("Unigram bits: B'");
        System.out.println(UTF8Validator.stringifyBits(uni) + "' (" + unibits + ")");
        System.out.print("Bigram bits: B'");
        System.out.println(UTF8Validator.stringifyBits(UTF8Validator.fingerprintBigrams(test)) + "'");
        System.out.print("Trigram bits: B'");
        System.out.println(UTF8Validator.stringifyBits(UTF8Validator.fingerprintTrigrams(test)) + "'");
        System.out.print("4-gram bits: B'");
        System.out.println(UTF8Validator.stringifyBits(UTF8Validator.fingerprint4grams(test)) + "'");
        System.out.println();
        System.out.println("Unigrams in '" + test + "': X'" + UTF8Validator.hexifyBits(uni) + "'");
        System.out.println("Bigrams:  X'" + UTF8Validator.hexifyBits(UTF8Validator.fingerprintBigrams(test)) + "'");
        System.out.println("Trigrams: X'" + UTF8Validator.hexifyBits(UTF8Validator.fingerprintTrigrams(test)) + "'");
        System.out.println("4-grams:  X'" + UTF8Validator.hexifyBits(UTF8Validator.fingerprint4grams(test)) + "'");
    }
}

