/*
 * Decompiled with CFR 0.152.
 */
package nl.mpi.annot.tools.data;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.regex.Pattern;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import nl.mpi.annot.tools.data.AnnexAnnotation;
import nl.mpi.annot.tools.data.AnnexChatParser;
import nl.mpi.annot.tools.data.AnnexEAFHandler;
import nl.mpi.annot.tools.data.AnnexMediaDescriptor;
import nl.mpi.annot.tools.data.AnnexTier;
import nl.mpi.annot.tools.data.AnnexTranscription;
import nl.mpi.annot.tools.data.CSVParser;
import nl.mpi.annot.tools.data.DataUtil;
import nl.mpi.annot.tools.data.PDFParser;
import nl.mpi.annot.tools.data.ShoeboxParser;
import nl.mpi.annot.tools.data.SubripParser;
import nl.mpi.annot.tools.data.TextgridParser;
import org.apache.log4j.Logger;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class AnnexParser {
    private static boolean _TRACE_PARSERS = false;
    private static File _typFile;
    private static SAXParser _eafParser;
    private static Object _eafParserSyncer;
    private static Logger _logger;
    private static final String htmlEntities = " nbsp \u00a0 iacute \u00ed oacute \u00f3 aacute \u00e1 amp & eacute \u00e9 ntilde \u00f1 uacute \u00fa yacute \u00fd iexcl \u00a1 uuml \u00fc ordm \u00ba Ntilde \u00d1 ccedil \u00e7 Ouml \u00d6 iquest \u00bf egrave \u00e8 agrave \u00e0 acirc \u00e2 ocirc \u00f4 ldquo \u201c rdquo \u201d auml \u00e4 ouml \u00f6 icirc \u00ce shy \u00ad ordf \u00aa ucirc \u00fb lsquo \u2018 rsquo \u2019 Iacute \u00cd quot \" hellip \u2026 yuml \u00ff Uuml \u00dc Eacute \u00c9 igrave \u00ec ecirc \u00ea Uacute \u00da Aacute \u00c1 Oacute \u00d3 gt > ograve \u00f2 lt < Ccedil \u00c7 ugrave \u00f9 cedil \u00b8 Auml \u00c4 acute \u00b4 ndash \u2013 mdash \u2014 middot \u00b7 Iuml \u00cf iuml \u00ef euml \u00eb Euml \u00cb apos ' Agrave \u00c0 Egrave \u00c8 Igrave \u00cc Ograve \u00d2 Ugrave \u00d9 Yacute \u00dd Atilde \u00e3 atilde \u00e3 Otilde \u00d5 otilde \u00f5 #45 - #10 \n #9 \t bull \u2022 prime \u2032 Prime \u2033 circ \u02c6 tilde ~ Scaron \u0160 scaron \u0161 laquo \u00ab raquo \u00bb copy \u00a9 deg \u00b0 #13 \r & &";

    public static boolean isTraceMode() {
        return _TRACE_PARSERS;
    }

    public static void setTypFile(File shoeboxTypFile) {
        _typFile = shoeboxTypFile;
    }

    public static boolean parse(File file, int annotationType, AnnexTranscription transcription) {
        boolean valid = true;
        try {
            switch (annotationType) {
                case 0: {
                    AnnexParser.parseEAF(file, transcription);
                    break;
                }
                case 1: {
                    AnnexChatParser.parse(file, transcription);
                    break;
                }
                case 2: {
                    if (_typFile != null) {
                        new ShoeboxParser(file, _typFile).parse(transcription);
                    } else {
                        new ShoeboxParser(file).parse(transcription);
                    }
                    valid = true;
                    break;
                }
                case 3: {
                    AnnexParser.parseTextLines(DataUtil.readLines(file, DataUtil.getCharsetName(file)), transcription);
                    break;
                }
                case 4: {
                    AnnexParser.parseTextLines(AnnexParser.filterXML(DataUtil.readLines(file, null), true), transcription);
                    break;
                }
                case 5: {
                    AnnexParser.parseTextLines(AnnexParser.filterXML(DataUtil.readLines(file, null), false), transcription);
                    break;
                }
                case 6: {
                    CSVParser.parseCSV(DataUtil.readLines(file, DataUtil.getCharsetName(file)), transcription);
                    break;
                }
                case 7: {
                    PDFParser.parsePDF(file, transcription);
                    break;
                }
                case 8: {
                    SubripParser.parseSubRip(DataUtil.readLines(file, DataUtil.getCharsetName(file)), transcription);
                    break;
                }
                case 9: {
                    TextgridParser.parseTextGrid(DataUtil.readLines(file, DataUtil.getCharsetName(file)), transcription);
                    break;
                }
                case 42: {
                    _logger.debug((Object)("AnnexParser.parse: Magic EOF transcription created: " + transcription.getNodeId()));
                    return valid;
                }
                default: {
                    valid = false;
                    break;
                }
            }
        }
        catch (SAXException saxe) {
            valid = false;
            _logger.warn((Object)("AnnexParser.parse SAXException, skipped file: " + file.getAbsolutePath() + ": " + saxe));
        }
        catch (IllegalArgumentException iae) {
            valid = false;
            _logger.warn((Object)("AnnexParser.parse IllegalArgumentException, skipped file: " + file.getAbsolutePath() + ": " + iae));
        }
        catch (IOException ioe) {
            valid = false;
            _logger.warn((Object)("AnnexParser.parse IOException, skipped file: " + file.getAbsolutePath() + ": " + ioe));
        }
        catch (RuntimeException e) {
            valid = false;
            _logger.error((Object)("AnnexParser.parse RuntimeException, skipped file: " + file.getAbsolutePath() + ": " + e), (Throwable)e);
        }
        return valid;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static void parseEAF(File file, AnnexTranscription transcription) throws SAXException, IOException {
        Object saxFactory;
        Object object = _eafParserSyncer;
        synchronized (object) {
            if (_eafParser == null) {
                saxFactory = SAXParserFactory.newInstance();
                ((SAXParserFactory)saxFactory).setValidating(false);
                ((SAXParserFactory)saxFactory).setNamespaceAware(false);
                try {
                    _eafParser = ((SAXParserFactory)saxFactory).newSAXParser();
                }
                catch (ParserConfigurationException pce) {
                    throw new SAXException("ParserConfigurationException: " + pce);
                }
            }
        }
        AnnexEAFHandler handler = new AnnexEAFHandler(transcription);
        saxFactory = _eafParser;
        synchronized (saxFactory) {
            _eafParser.parse(file, (DefaultHandler)handler);
            _eafParser.reset();
        }
        if (transcription.getMediaDescriptors().size() == 0) {
            String name = file.getName();
            name = name.substring(0, name.length() - 3);
            AnnexMediaDescriptor md = new AnnexMediaDescriptor();
            md.fileName = name + "mpg";
            md.mimeType = "video/x-mpeg1";
            transcription.getMediaDescriptors().add(md);
            md = new AnnexMediaDescriptor();
            md.fileName = name + "wav";
            md.mimeType = "audio/x-wav";
            md.extractedFrom = name + "mpg";
            transcription.getMediaDescriptors().add(md);
        }
    }

    private static String[] filterXML(String[] lines, boolean htmlMode) {
        boolean inBody = false;
        for (int i = 0; i < lines.length; ++i) {
            if (!inBody && htmlMode) {
                if (!lines[i].toLowerCase().contains("body")) {
                    lines[i] = "";
                } else {
                    inBody = true;
                }
            }
            if (!inBody || !lines[i].toLowerCase().contains("/body")) continue;
            inBody = false;
        }
        ArrayList<String> filteredLines = new ArrayList<String>();
        boolean inTag = false;
        for (int i = 0; i < lines.length; ++i) {
            String line = lines[i];
            StringBuilder filteredLine = new StringBuilder(line.length() + 16);
            int entityStart = -1;
            for (int j = 0; j < line.length(); ++j) {
                char c = line.charAt(j);
                if (c == '&' && !inTag) {
                    entityStart = j;
                } else if (c == ' ' && entityStart > -1) {
                    if (entityStart + 1 != j) {
                        _logger.info((Object)("Entity without ; found: " + line.substring(entityStart, j)));
                    }
                    entityStart = -1;
                } else if (c == ';' && entityStart > -1) {
                    String entityName = line.substring(entityStart + 1, j);
                    int what = htmlEntities.indexOf(" " + entityName + " ");
                    if (what == -1) {
                        if (entityName.matches("^#[0-9]{1,5}$") || entityName.toLowerCase().matches("^#x[0-9A-Fa-f]{1,4}$")) {
                            char ch = '\u0000';
                            try {
                                int n = 0;
                                n = entityName.charAt(1) == 'x' || entityName.charAt(1) == 'X' ? Integer.parseInt(entityName.substring(2), 16) : Integer.parseInt(entityName.substring(1), 10);
                                if (n < 1 || n > 65535) {
                                    n = 0;
                                }
                                ch = (char)n;
                            }
                            catch (NumberFormatException nfe) {
                                ch = '\u0000';
                            }
                            if (ch == '\u0000') {
                                _logger.warn((Object)("Error numerical HTML entity out of range: " + entityName));
                                filteredLine.append('&').append(entityName).append(';');
                            } else {
                                filteredLine.append(ch);
                            }
                        } else {
                            _logger.warn((Object)("Unknown HTML entity: " + entityName));
                            filteredLine.append('&').append(entityName).append(';');
                        }
                    } else {
                        what = what + entityName.length() + 2;
                        filteredLine.append(htmlEntities.charAt(what));
                    }
                    entityStart = -1;
                } else if (c == '<') {
                    inTag = true;
                    if (entityStart > -1) {
                        _logger.info((Object)("Entity without ; before tag found: " + line.substring(entityStart, j)));
                    }
                    entityStart = -1;
                } else if (!inTag && entityStart == -1) {
                    filteredLine.append(c);
                }
                if (c != '>') continue;
                inTag = false;
            }
            if (inTag) {
                // empty if block
            }
            if (entityStart > -1) {
                _logger.warn((Object)("Unfinished entity: " + line.substring(entityStart)));
            }
            if (filteredLine.length() <= 0) continue;
            filteredLines.add(filteredLine.toString());
        }
        String[] result = new String[filteredLines.size()];
        for (int i = 0; i < filteredLines.size(); ++i) {
            String line = (String)filteredLines.get(i);
            if (!htmlMode && line.indexOf(38) >= 0) {
                line = line.replaceAll("&quot;", "\"");
                line = line.replaceAll("&lt;", "<");
                line = line.replaceAll("&gt;", ">");
                line = line.replaceAll("&apos;", "'");
                line = line.replaceAll("&amp;", "&");
            }
            result[i] = line;
        }
        return result;
    }

    private static void parseTextLines(String[] lines, AnnexTranscription transcription) throws IOException {
        AnnexTier tier = new AnnexTier("text", "text");
        tier.participant = "unknown";
        tier.hasVirtualTime = true;
        if (lines.length > 1000) {
            int humanEthologyDummy = 0;
            for (int i = 0; i < 10; ++i) {
                if (!lines[i].trim().matches("^TC: [0-9:]* UB: [0-9:]*[ *]*$")) continue;
                ++humanEthologyDummy;
            }
            if (humanEthologyDummy > 8) {
                long oldTime = 0L;
                long estimatedTime = 0L;
                tier.hasVirtualTime = false;
                tier.type = "humanethology_timecodes";
                for (int i = 0; i < lines.length; ++i) {
                    String line = lines[i].trim();
                    estimatedTime += 40L;
                    boolean useful = false;
                    if (line.matches("^TC: [0-9:]* UB: [0-9:]*[ *]*$")) {
                        if (line.endsWith("*")) {
                            useful = true;
                        }
                    } else {
                        _logger.info((Object)("Real content in humanethology timecode-only TC UB file: '" + lines[i] + "'"));
                        useful = true;
                    }
                    if (!useful && i != lines.length - 1) continue;
                    AnnexAnnotation annotation = new AnnexAnnotation(line, oldTime, estimatedTime, true, "line: " + (i + 1));
                    annotation.interpolatedTime = true;
                    tier.annotations.add(annotation);
                    oldTime = estimatedTime;
                }
                transcription.getTiers().add(tier);
                return;
            }
        }
        Pattern textSplitPattern = Pattern.compile("[\\s\\!\\.\\,\\;\\?\\:]+");
        long fakeTime = 0L;
        for (int i = 0; i < lines.length; ++i) {
            String line = lines[i].trim();
            if (line.length() == 0) continue;
            String[] tokens = textSplitPattern.split(line);
            for (int j = 0; j < tokens.length; ++j) {
                String token = tokens[j];
                if (token.length() == 0) continue;
                AnnexAnnotation annotation = new AnnexAnnotation(token, fakeTime, fakeTime + 300L, false, "" + i + "/" + j);
                tier.annotations.add(annotation);
                fakeTime = annotation.endTime;
            }
        }
        transcription.getTiers().add(tier);
    }

    public static boolean parse(URL url, int annotationType, AnnexTranscription transcription) {
        boolean valid = true;
        try {
            InputSource inputSource = new InputSource(url.openStream());
            String uri = url.toString();
            int lastSlash = uri.lastIndexOf("/");
            if (lastSlash >= 0) {
                uri = uri.substring(0, lastSlash);
            }
            inputSource.setSystemId(uri + "/");
            switch (annotationType) {
                case 0: {
                    AnnexParser.parseEAF(inputSource, transcription);
                    break;
                }
                case 1: {
                    AnnexChatParser.parse(url, transcription);
                    break;
                }
                case 3: {
                    AnnexParser.parseTextLines(DataUtil.readLines(inputSource, DataUtil.getCharsetName(inputSource)), transcription);
                    break;
                }
                case 4: {
                    AnnexParser.parseTextLines(AnnexParser.filterXML(DataUtil.readLines(inputSource, null), true), transcription);
                    break;
                }
                case 5: {
                    AnnexParser.parseTextLines(AnnexParser.filterXML(DataUtil.readLines(inputSource, null), false), transcription);
                    break;
                }
                case 6: {
                    CSVParser.parseCSV(DataUtil.readLines(inputSource, DataUtil.getCharsetName(inputSource)), transcription);
                    break;
                }
                case 8: {
                    SubripParser.parseSubRip(DataUtil.readLines(inputSource, DataUtil.getCharsetName(inputSource)), transcription);
                    break;
                }
                case 9: {
                    TextgridParser.parseTextGrid(DataUtil.readLines(inputSource, DataUtil.getCharsetName(inputSource)), transcription);
                    break;
                }
                default: {
                    _logger.error((Object)("AnnexParser.parse does not support remote URL streams for file format of: " + uri));
                    valid = false;
                    break;
                }
            }
        }
        catch (Exception e) {
            valid = false;
            _logger.error((Object)("AnnexParser.parse Exception in stream: " + url.toString() + ": " + e), (Throwable)e);
        }
        return valid;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static void parseEAF(InputSource inputSource, AnnexTranscription transcription) throws SAXException, IOException {
        Object object = _eafParserSyncer;
        synchronized (object) {
            if (_eafParser == null) {
                SAXParserFactory saxFactory = SAXParserFactory.newInstance();
                saxFactory.setValidating(false);
                saxFactory.setNamespaceAware(false);
                try {
                    _eafParser = saxFactory.newSAXParser();
                }
                catch (ParserConfigurationException pce) {
                    throw new SAXException("ParserConfigurationException: " + pce);
                }
            }
        }
        AnnexEAFHandler handler = new AnnexEAFHandler(transcription);
        SAXParser sAXParser = _eafParser;
        synchronized (sAXParser) {
            _eafParser.parse(inputSource, (DefaultHandler)handler);
            _eafParser.reset();
        }
    }

    public static void main(String[] args) {
        _TRACE_PARSERS = true;
        int type = 3;
        if (args.length != 2) {
            System.out.println("Give a type and a file name to test-parse a file");
            System.out.println("Types: eaf chat box txt html xml csv");
        } else {
            String t = args[0].toLowerCase();
            if (t.startsWith("eaf")) {
                type = 0;
            }
            if (t.startsWith("cha")) {
                type = 1;
            }
            if (t.contains("box")) {
                type = 2;
            }
            if (t.contains("xml")) {
                type = 5;
            }
            if (t.contains("htm")) {
                type = 4;
            }
            if (t.contains("csv")) {
                type = 6;
            }
            if (t.contains("txt")) {
                type = 3;
            }
            if (t.contains("pdf")) {
                type = 7;
            }
            if (t.contains("subrip") || t.contains("srt")) {
                type = 8;
            }
            if (t.contains("textgrid") || t.contains("praat")) {
                type = 9;
            }
            AnnexTranscription transcription = new AnnexTranscription("MPI42#", type, new File(args[1]));
            System.out.println("Parse result: " + (transcription.isValid() ? "OKAY" : "FAIL") + " for file: " + args[1]);
            System.out.println("Header info:  " + (transcription.getInfo() == null ? "NONE" : transcription.getInfo()));
            ArrayList<AnnexMediaDescriptor> media = transcription.getMediaDescriptors();
            System.out.println("Media: " + (media.size() == 0 ? "NONE" : "" + media.size()));
            for (int i = 0; i < media.size(); ++i) {
                AnnexMediaDescriptor amd = media.get(i);
                System.out.println("Media[" + i + "] '" + amd.fileName + "' [" + (amd.mimeType == null ? "mimetype:n/a" : amd.mimeType) + "]" + (amd.extractedFrom == null ? "" : " [from: " + amd.extractedFrom + "]") + (amd.offset == 0L ? "" : " offset: " + amd.offset));
            }
            ArrayList<AnnexTier> tiers = transcription.getTiers();
            System.out.println("Tiers: " + tiers.size());
            for (int i = 0; i < tiers.size(); ++i) {
                AnnexTier tier = tiers.get(i);
                ArrayList<AnnexAnnotation> annotations = tier.annotations;
                System.out.println("Tier[" + i + "] '" + tier.name + "'" + (tier.defaultLocale != null ? " locale='" + tier.defaultLocale + "'" : "") + (tier.annotator != null ? " annotator='" + tier.annotator + "'" : "") + " type='" + tier.type + "' who='" + tier.participant + "' " + (tier.hasVirtualTime ? "[VirtualTime] " : "[RealTime] ") + (tier.parentTier != null ? "[below '" + tier.parentTier.name + "']" : "[TOP]") + " " + annotations.size() + " annot.");
                System.out.println("Slot\tStart\tEnd\tANNO: Value");
                for (int j = 0; j < annotations.size(); ++j) {
                    AnnexAnnotation anno = annotations.get(j);
                    if (anno == null) {
                        System.out.println("[[[***null AnnexAnnotation object***]]]");
                        continue;
                    }
                    System.out.println(j + "\t" + anno.beginTime + "\t" + anno.endTime + "\tANNO:" + (anno.isTimeAligned ? "=" : "") + (anno.interpolatedTime ? "i" : "") + " [" + anno.value + "]");
                }
            }
        }
    }

    static {
        _eafParser = null;
        _eafParserSyncer = new Object();
        _logger = Logger.getLogger((String)AnnexParser.class.getName());
    }
}

