/*
 * Decompiled with CFR 0.152.
 */
package nl.mpi.annex.util;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import nl.mpi.annex.util.UTF8Validator;
import org.apache.log4j.Logger;

public class UnicodeNormalizer {
    private static Logger _logger = Logger.getLogger((String)"ANNEX.UnicodeNormalizer");
    private static int[][] count = new int[112][4608];
    private static final String[] grave300;
    private static final String[] acute301;
    private static final String[] circum302;
    private static final String[] tilde303;
    private static final String[] bar304;
    private static final String[] umlaut308;
    private static final String[] caron30c;
    private static final String[] doublegrave30f;
    private static final String[] ogonek328;
    private static final String[] underbar331;

    private static String combine(int character, int diacritic, String logTag) {
        if (character == 65535) {
            if (logTag != null) {
                _logger.info((Object)("$ Initial combining diacritic: " + UnicodeNormalizer.escapeUni(diacritic) + " " + logTag));
            } else {
                _logger.info((Object)("$ Initial combining diacritic: " + UnicodeNormalizer.escapeUni(diacritic)));
            }
            character = 32;
        }
        int idx = -1;
        switch (diacritic) {
            case 768: {
                idx = grave300[0].indexOf((char)character);
                break;
            }
            case 769: {
                idx = acute301[0].indexOf((char)character);
                break;
            }
            case 770: {
                idx = circum302[0].indexOf((char)character);
                break;
            }
            case 771: {
                idx = tilde303[0].indexOf((char)character);
                break;
            }
            case 772: {
                idx = bar304[0].indexOf((char)character);
                break;
            }
            case 776: {
                idx = umlaut308[0].indexOf((char)character);
                break;
            }
            case 780: {
                idx = caron30c[0].indexOf((char)character);
                break;
            }
            case 783: {
                idx = doublegrave30f[0].indexOf((char)character);
                break;
            }
            case 808: {
                idx = ogonek328[0].indexOf((char)character);
                break;
            }
            case 817: {
                idx = underbar331[0].indexOf((char)character);
                break;
            }
        }
        if (++idx == 0) {
            if (character == 32 && diacritic == 804) {
                return "\u201e";
            }
            if (character == 32 && diacritic == 778) {
                return "\u00b0";
            }
            if (character == 32 && diacritic == 779) {
                return "\u201c";
            }
            if (character == 32 && diacritic == 800) {
                return "_";
            }
            if (character == 32 && diacritic == 809) {
                return "\u201a";
            }
            if (character == 32 && diacritic == 820) {
                return "~";
            }
            if (character == 97 && diacritic == 805) {
                return "\u1e01";
            }
            if (character == 101 && diacritic == 816) {
                return "\u1e1b";
            }
            if (character == 104 && diacritic == 803) {
                return "\u1e25";
            }
            if (character == 111 && diacritic == 774) {
                return "\u014f";
            }
            if (character == 117 && diacritic == 804) {
                return "\u1e73";
            }
            if (character == 117 && diacritic == 816) {
                return "\u1e75";
            }
            if (character == 117 && diacritic == 778) {
                return "\u016f";
            }
            if (character == 97 && diacritic == 820) {
                return "\u00e3\u0334";
            }
            if (character == 78 && diacritic == 820) {
                return "\u00d1\u0334";
            }
            if (character == 771 && diacritic == 820) {
                return "\u0334\u0303";
            }
            if (character == 820 && diacritic == 771) {
                return "\u0334";
            }
            return "" + (char)character + (char)diacritic;
        }
        switch (diacritic) {
            case 768: {
                return grave300[idx];
            }
            case 769: {
                return acute301[idx];
            }
            case 770: {
                return circum302[idx];
            }
            case 771: {
                return tilde303[idx];
            }
            case 772: {
                return bar304[idx];
            }
            case 776: {
                return umlaut308[idx];
            }
            case 780: {
                return caron30c[idx];
            }
            case 783: {
                return doublegrave30f[idx];
            }
            case 808: {
                return ogonek328[idx];
            }
            case 817: {
                return underbar331[idx];
            }
        }
        return "" + (char)character + (char)diacritic;
    }

    public static String escapeUni(int n) {
        if (n == 65535) {
            return "start ";
        }
        return "\\u" + (n < 16 ? "0" : "") + (n < 256 ? "0" : "") + (n < 4096 ? "0" : "") + Integer.toHexString(n);
    }

    public static String checkNFC(String s) {
        int len = s.length();
        for (int i = 0; i < len; ++i) {
            int c;
            int c0 = c = s.charAt(i);
            String bad = null;
            if (c >= 2392 && c <= 2399) {
                c = 2392;
            }
            if (c >= 2524 && c <= 2527) {
                c = 2524;
            }
            if (c > 8048 && c < 8063 && (c & 1) == 1 || c == 8123 || c == 8137 || c == 8139 || c == 8147 || c == 8155 || c == 8163 || c == 8171) {
                c = 8049;
            }
            if (c >= 63744 && c <= 64255 || c >= 64256 && c <= 64335) continue;
            switch (c) {
                case 832: {
                    bad = "grave tone";
                    break;
                }
                case 833: {
                    bad = "acute tone";
                    break;
                }
                case 835: {
                    bad = "koronis";
                    break;
                }
                case 836: {
                    bad = "dialytika";
                    break;
                }
                case 884: {
                    bad = "greek '";
                    break;
                }
                case 894: {
                    bad = "greek ;";
                    break;
                }
                case 903: {
                    bad = "greek .";
                    break;
                }
                case 2392: {
                    bad = "devanagari with .";
                    break;
                }
                case 2524: {
                    bad = "bengali with .";
                    break;
                }
                case 2611: 
                case 2614: 
                case 2649: 
                case 2651: 
                case 2654: {
                    bad = "gurmukhi with .";
                    break;
                }
                case 2908: 
                case 2909: {
                    bad = "oriya with .";
                    break;
                }
                case 3907: 
                case 3917: 
                case 3922: 
                case 3927: {
                    bad = "tibetan composed";
                    break;
                }
                case 3987: 
                case 3997: 
                case 4002: 
                case 4007: 
                case 4012: 
                case 4025: {
                    bad = "tibetan subjoined";
                    break;
                }
                case 3955: 
                case 3957: 
                case 3958: 
                case 3960: 
                case 3969: {
                    bad = "tibetan sign";
                    break;
                }
                case 8049: {
                    bad = "greek with oxia";
                    break;
                }
                case 8126: {
                    bad = "prosgegrammeni";
                    break;
                }
                case 8174: {
                    bad = "dialytika oxia";
                    break;
                }
                case 8175: {
                    bad = "varia";
                    break;
                }
                case 8192: {
                    bad = "en quad";
                    break;
                }
                case 8193: {
                    bad = "emp quad";
                    break;
                }
                case 8486: {
                    bad = "Ohm";
                    break;
                }
                case 8490: {
                    bad = "Kelvin";
                    break;
                }
                case 8491: {
                    bad = "Angstrom";
                    break;
                }
                case 9001: {
                    bad = "left angle";
                    break;
                }
                case 9002: {
                    bad = "right angle";
                    break;
                }
                case 10972: {
                    bad = "forking";
                    break;
                }
                default: {
                    bad = null;
                }
            }
            if (bad == null) continue;
            return "Not NFC, first non-NFC char: " + UnicodeNormalizer.escapeUni(c0) + " " + bad;
        }
        return null;
    }

    public static String normalizeToNFC(String line, String logTag) {
        int previous = 65535;
        int prepre = 65535;
        StringBuilder nfc = new StringBuilder();
        int len = line.length();
        for (int i = 0; i < len; ++i) {
            char ch = line.charAt(i);
            if (ch >= '\u0300' && ch <= '\u036f') {
                if (previous < 4352) {
                    int[] nArray = count[ch - 768];
                    int n = previous;
                    nArray[n] = nArray[n] + 1;
                } else {
                    int[] nArray = count[ch - 768];
                    int n = 4352 + (previous >> 8);
                    nArray[n] = nArray[n] + 1;
                }
                int[] nArray = count[ch - 768];
                nArray[0] = nArray[0] + 1;
                String combined = UnicodeNormalizer.combine(previous, ch, logTag);
                int comLength = combined.length();
                prepre = previous;
                if (comLength > 1) {
                    prepre = combined.charAt(comLength - 2);
                    nfc.append(combined.substring(0, comLength - 1));
                }
                previous = combined.charAt(comLength - 1);
                continue;
            }
            if (previous != 65535) {
                nfc.append((char)previous);
            }
            prepre = previous;
            previous = ch;
        }
        if (len > 0) {
            nfc.append((char)previous);
        }
        return nfc.toString();
    }

    public static String fixDoubleEncode(String input) {
        byte[] asBytes = new byte[input.length()];
        int code = 0;
        for (int i = 0; i < input.length(); ++i) {
            if (input.charAt(i) < '\u0100') {
                asBytes[i] = (byte)input.charAt(i);
                if ((asBytes[i] & 0x80) == 0) continue;
                code |= 1;
                continue;
            }
            code |= 2;
        }
        if (code == 1 && UTF8Validator.validateUTF8(asBytes, asBytes.length, false) < 0) {
            code = 2;
        } else {
            if (code == 1) {
                try {
                    String output = new String(asBytes, "UTF-8");
                    _logger.warn((Object)("DOUBLE UNICODE query decoded to: " + output));
                    return output;
                }
                catch (UnsupportedEncodingException use) {
                    _logger.error((Object)("No UTF-8 support in this JVM! Unchanged: " + input));
                    return input;
                }
            }
            if (code > 1) {
                _logger.debug((Object)("UNICODE query: " + input));
            }
        }
        return input;
    }

    public static void main(String[] args) throws IOException {
        int nNormalized = 0;
        int nNormal = 0;
        int nNonNormalizable = 0;
        for (int i = 0; i < 112; ++i) {
            for (int j = 0; j < 4608; ++j) {
                UnicodeNormalizer.count[i][j] = 0;
            }
        }
        BufferedReader linestream = new BufferedReader(new InputStreamReader(System.in));
        int n = 1;
        String line = "";
        while (line != null) {
            String nfc = UnicodeNormalizer.normalizeToNFC(UnicodeNormalizer.fixDoubleEncode(line), "in stdin line " + n);
            String whyBad = UnicodeNormalizer.checkNFC(nfc);
            if (whyBad != null) {
                System.out.println("Cannot reach NFC! " + whyBad + " Line: " + line);
                ++nNonNormalizable;
            } else {
                if (!nfc.equals(line)) {
                    ++nNormalized;
                } else {
                    ++nNormal;
                }
                ++n;
            }
            line = linestream.readLine();
        }
        System.out.println(nNormal + " lines already were in NFC normal form, normalized " + nNormalized + " and failed for " + nNonNormalizable);
        System.out.println("Combining diacritic counts for " + (nNormal + nNormalized + nNonNormalizable) + " lines:");
        for (int i = 0; i < 112; ++i) {
            int j;
            if (count[i][0] == 0) continue;
            System.out.println("  ...  " + UnicodeNormalizer.escapeUni(i + 768) + ": " + count[i][0]);
            for (j = 1; j < 4352; ++j) {
                if (count[i][j] == 0) continue;
                System.out.println(UnicodeNormalizer.escapeUni(j) + " " + UnicodeNormalizer.escapeUni(i + 768) + ": " + count[i][j]);
            }
            for (j = 0; j < 256; ++j) {
                if (count[i][j + 4352] == 0) continue;
                System.out.println(UnicodeNormalizer.escapeUni(j * 256).substring(0, 4) + "xx " + UnicodeNormalizer.escapeUni(i + 768) + ": " + count[i][j + 4352]);
            }
        }
    }

    static {
        if ("h\u00fch\u00fc".indexOf(252) != 1 || "h\u00fch\u00fc".charAt(1) != '\u00fc') {
            System.out.println("You must compile this with UTF-8 LANG, e.g. en_US.UTF-8");
            System.exit(1);
        }
        grave300 = new String[]{"aeiouAEIOU yYnNwW\u00fc\u00f6\u00fc", "\u00e0", "\u00e8", "\u00ec", "\u00f2", "\u00f9", "\u00c0", "\u00c8", "\u00cc", "\u00d2", "\u00d9", "`", "\u1ef3", "\u1ef2", "\u01f9", "\u01f8", "\u1e81", "\u1e80", "\u01dc", "\u00f6\u0300", "\u00fc\u0300"};
        acute301 = new String[]{"aeiouAEIOU yYnNmMgGrR\u0113\u014d\u0169", "\u00e1", "\u00e9", "\u00ed", "\u00f3", "\u00fa", "\u00c1", "\u00c9", "\u00cd", "\u00d3", "\u00da", "\u00b4", "\u00fd", "\u00dd", "\u0144", "\u0143", "\u1e3f", "\u1e3e", "\u01f5", "\u01f4", "\u0155", "\u0154", "\u1e17", "\u1e53", "\u1e79"};
        circum302 = new String[]{"aeiouAEIOU sS", "\u00e2", "\u00ea", "\u00ee", "\u00f4", "\u00fb", "\u00c2", "\u00ca", "\u00ce", "\u00d4", "\u00db", "^", "\u015d", "\u015c"};
        tilde303 = new String[]{"aeiouAEIOU nN", "\u00e3", "\u1ebd", "\u0129", "\u00f5", "\u0169", "\u00c3", "\u1ebc", "\u0128", "\u00d5", "\u0168", "~", "\u00f1", "\u00d1"};
        bar304 = new String[]{"aeiouAEIOU \u00f6\u00d6\u00f5\u0438\u0418\u0443\u0423", "\u0101", "\u0113", "\u012b", "\u014d", "\u016b", "\u0100", "\u0112", "\u012a", "\u014c", "\u016a", "\u00af", "\u022b", "\u022a", "\u022d", "\u04e3", "\u04e2", "\u04ef", "\u04ee"};
        umlaut308 = new String[]{"aeiou AEIOU\u0415\u0435\u00f2\u00f9", "\u00e4", "\u00eb", "\u00ef", "\u00f6", "\u00fc", "\u00a8", "\u00c4", "\u00cb", "\u00cf", "\u00d6", "\u00dc", "\u0401", "\u0451", "\u00f2\u0308", "\u00f9\u0308"};
        caron30c = new String[]{"aeiouAEIOU cnsCNSj", "\u01ce", "\u011b", "\u01d0", "\u01d2", "\u01d4", "\u01cd", "\u011a", "\u01cf", "\u01d1", "\u01d3", "\u02c7", "\u010d", "\u0148", "\u0161", "\u010c", "\u0147", "\u0160", "\u01f0"};
        doublegrave30f = new String[]{"aeiou ", "\u0201", "\u0205", "\u0209", "\u020d", "\u0215", "\u2036"};
        ogonek328 = new String[]{"aeiouAEIOU \u00ec\u00f4\u00ed\u00f3", "\u0105", "\u0119", "\u012f", "\u01eb", "\u0173", "\u0104", "\u0118", "\u012e", "\u01ea", "\u0172", "\u02db", "\u012f\u0300", "\u01eb\u0302", "\u012f\u0301", "\u01eb\u0301"};
        underbar331 = new String[]{"BbDdKkLlNnRrTtZzh ", "\u1e06", "\u1e07", "\u1e0e", "\u1e0f", "\u1e34", "\u1e35", "\u1e3a", "\u1e3b", "\u1e48", "\u1e49", "\u1e5e", "\u1e5f", "\u1e6e", "\u1e6f", "\u1e94", "\u1e95", "\u1e96", "_"};
    }
}

