/*
 * Decompiled with CFR 0.152.
 */
package nl.mpi.annot.tools.data;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import nl.mpi.annot.tools.data.AnnexAnnotation;
import nl.mpi.annot.tools.data.AnnexParser;
import nl.mpi.annot.tools.data.AnnexTier;
import nl.mpi.annot.tools.data.AnnexTranscription;
import nl.mpi.annot.tools.data.DataUtil;
import nl.mpi.annot.tools.data.FlexEncReader;
import nl.mpi.annot.tools.data.ShoeboxRecord;
import nl.mpi.annot.tools.data.ShoeboxTiers;

public class ShoeboxParser {
    private static Logger _logger = Logger.getLogger(ShoeboxParser.class.getName());
    public String REF_TAG = "ref";
    private ShoeboxTiers tiers;
    private File file;
    private boolean knownTiers;
    private HashMap<String, String> _encodingHash = new HashMap();

    public ShoeboxParser(File shoeboxFile) throws IOException {
        this.file = shoeboxFile;
        this.tiers = new ShoeboxTiers();
        this.knownTiers = false;
        this.guessRefTag();
    }

    public ShoeboxParser(File shoeboxFile, File metaFile) {
        this.knownTiers = true;
        this.file = shoeboxFile;
        if (metaFile.getName().endsWith(".mkr")) {
            this.readMarkerFile(metaFile);
        } else {
            this.readMetaFiles(metaFile, null);
        }
    }

    public void readMarkerFile(File markerFile) {
        _logger.log(Level.FINE, "Reading marker file: " + markerFile);
        this.tiers = new ShoeboxTiers();
        try {
            BufferedReader read = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(markerFile), "UTF-8"));
            String line = null;
            String marker = null;
            String charset = null;
            String stereotype = null;
            String parent = null;
            while ((line = read.readLine()) != null) {
                if (line.startsWith("marker:")) {
                    marker = line.substring(7, line.length()).trim();
                    continue;
                }
                if (line.startsWith("parent:")) {
                    parent = line.substring(7, line.length()).trim();
                    continue;
                }
                if (line.startsWith("stereotype:")) {
                    stereotype = line.substring(11, line.length()).trim();
                    continue;
                }
                if (!line.startsWith("charset:")) continue;
                charset = line.substring(8, line.length()).trim();
                if ("null".equals(parent)) {
                    this.REF_TAG = marker;
                    this.tiers.put("ref", ShoeboxTiers.TierType.REF);
                    continue;
                }
                ShoeboxTiers.TierType type = ShoeboxTiers.TierType.UNKNOWN;
                if ("Symbolic Subdivision".equals(stereotype)) {
                    type = ShoeboxTiers.TierType.ALIGNED;
                } else if ("Time".equals(stereotype)) {
                    type = ShoeboxTiers.TierType.TIME;
                }
                if ("UTF-8".equals(charset)) {
                    this._encodingHash.put(marker, "UTF-8");
                } else {
                    this._encodingHash.put(marker, "ISO-8859-1");
                }
                this.tiers.put(marker, type);
                this.tiers.setParent(marker, parent);
                if (type == ShoeboxTiers.TierType.TIME) {
                    this.tiers.addTimeEvidence(marker, 5);
                    continue;
                }
                this.tiers.addTimeEvidence(marker, -5);
            }
            read.close();
        }
        catch (IOException e) {
            _logger.log(Level.SEVERE, "IOException in readMarkerFile: " + e + " for: " + markerFile);
        }
        _logger.log(Level.INFO, "typ file tier structure: " + this.tiers + " for: " + markerFile);
    }

    public void readMetaFiles(File typFile, File langFile) {
        this.tiers = new ShoeboxTiers();
        try {
            BufferedReader typ = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(typFile), "UTF-8"));
            String line = null;
            String marker = null;
            while ((line = typ.readLine()) != null) {
                if ((line = line.replace('\t', ' ')).startsWith("\\mkrRecord ")) {
                    this.REF_TAG = line.substring(11, line.length()).trim();
                    this.tiers.put("ref", ShoeboxTiers.TierType.REF);
                }
                if (line.startsWith("\\+mkr ")) {
                    marker = line.substring(6, line.length()).trim();
                    this.tiers.put(marker, ShoeboxTiers.TierType.UNKNOWN);
                    continue;
                }
                if (!line.startsWith("\\mkrOverThis ") || marker == null) continue;
                String parent = line.substring(13, line.length()).trim();
                this.tiers.setParent(marker, parent);
                marker = null;
            }
            typ.close();
        }
        catch (IOException e) {
            _logger.log(Level.SEVERE, "IOException in readMetaFiles: " + e + " for: " + typFile);
        }
        _logger.log(Level.INFO, "typ file tier structure: " + this.tiers + " for: " + typFile);
    }

    private void guessRefTag() throws IOException {
        String badFormat = null;
        int hasRef = 0;
        int lineNumber = 0;
        int blockNumber = 0;
        try {
            String line;
            BufferedReader read = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(this.file), "ISO-8859-1"));
            String guessedRefTag = null;
            boolean lastWasEmpty = false;
            while ((line = read.readLine()) != null) {
                line = line.replace('\t', ' ');
                ++lineNumber;
                if (line.startsWith("\\ref ") || "\\ref".equals(line)) {
                    ++hasRef;
                }
                if (line.trim().length() == 0) {
                    lastWasEmpty = true;
                    continue;
                }
                if (lastWasEmpty && line.startsWith("\\")) {
                    lastWasEmpty = false;
                    ++blockNumber;
                    int space = line.indexOf(32);
                    if (space == -1) {
                        space = line.length();
                    }
                    String tag = line.substring(1, space);
                    if (blockNumber < 2) {
                        if (tag.equals("ref")) continue;
                        _logger.log(Level.FINE, "First content block has tag: '" + tag + "' " + (hasRef > 0 ? "[after ref] " : "") + "in: " + this.file);
                        continue;
                    }
                    if (tag.equals(guessedRefTag) && hasRef < 4) {
                        this.REF_TAG = tag;
                        if (!tag.equals("ref")) {
                            _logger.log(Level.INFO, "Heuristically guessed record marker to be: '" + tag + "' " + (hasRef > 0 ? "[after ref] " : "") + "in: " + this.file);
                        }
                        read.close();
                        return;
                    }
                    if (tag.equals("ref") && blockNumber > 2) {
                        this.REF_TAG = tag;
                        read.close();
                        return;
                    }
                    guessedRefTag = tag;
                    continue;
                }
                lastWasEmpty = false;
                if (lineNumber < 42 && line.startsWith("\\_sh") && (line.toLowerCase().contains("grammar") || line.toLowerCase().contains(" v2.0a gr "))) {
                    badFormat = "Cannot parse 'grammar' (" + line + ") in: " + this.file;
                    break;
                }
                if (!line.startsWith("\\ref ") && !line.equals("\\ref")) continue;
                this.REF_TAG = "ref";
                if (line.equals("\\ref") && AnnexParser.isTraceMode()) {
                    _logger.log(Level.FINE, "First record marker ref tag line has no content in: " + this.file);
                }
                read.close();
                return;
            }
            read.close();
        }
        catch (IOException ioe) {
            _logger.log(Level.WARNING, "IOException in guessRefTag: " + ioe + " for: " + this.file);
            throw ioe;
        }
        if (hasRef > 0) {
            _logger.log(Level.INFO, "Short file with " + hasRef + " ref tag" + (hasRef > 1 ? "s" : "") + ": " + lineNumber + " lines, " + blockNumber + " blocks in: " + this.file);
            this.REF_TAG = "ref";
            if (lineNumber > 25) {
                throw new IOException("Too many lines for file without clear structure: " + lineNumber + " (" + blockNumber + " blocks)");
            }
            return;
        }
        if (badFormat != null) {
            _logger.log(Level.WARNING, "Unsupported Shoebox/Toolbox syntax variant: " + badFormat + " in: " + this.file);
            throw new IOException("Unsupported format: " + badFormat);
        }
        _logger.log(Level.WARNING, "No suitable record marker found, not Shoebox? Lines: " + lineNumber + " Blocks: " + blockNumber + " In: " + this.file);
        throw new IOException("Shoebox/Toolbox needs ref or other record markers.");
    }

    public void parse(AnnexTranscription transcription) throws IOException {
        try {
            FlexEncReader reader = null;
            reader = this.knownTiers ? new FlexEncReader(new FileInputStream(this.file), this._encodingHash) : new FlexEncReader(this.file);
            ShoeboxRecord record = null;
            ShoeboxRecord lastRecord = null;
            boolean state = false;
            String line = null;
            int lineNumber = 0;
            StringBuilder header = new StringBuilder(96);
            int emptyCount = 0;
            int emptyLen = 0;
            long lastTime = 0L;
            while ((line = reader.readLine()) != null) {
                ++lineNumber;
                if (line.length() == 0) continue;
                if (line.trim().length() == 0) {
                    ++emptyCount;
                    emptyLen += line.length();
                }
                if (line.indexOf(9) != -1) {
                    if (line.trim().indexOf(9) != -1 || line.length() < 2) {
                        _logger.log(Level.SEVERE, "$ Expanding tabs using 4 char tabstop size in line " + lineNumber + " of: " + this.file);
                        line = DataUtil.expandTabs(line, 4);
                        if (!AnnexParser.isTraceMode()) {
                            reader.close();
                            throw new IOException("Text must not contain tabstop chars in Shoebox/Toolbox");
                        }
                    } else {
                        _logger.log(Level.INFO, "$ Removed trailing tabs (and spaces) in line " + lineNumber + " of: " + this.file);
                        while (line.endsWith("\t") || line.endsWith(" ")) {
                            line = line.substring(0, line.length() - 1);
                        }
                    }
                }
                if (this.REF_TAG.equals(ShoeboxRecord.getTierTag(line))) {
                    if (lastRecord != null) {
                        lastTime = lastRecord.provideAlternativeTime(record, lastTime);
                        this.createTranscription(lastRecord, transcription);
                    }
                    lastRecord = record;
                    record = new ShoeboxRecord(line, this.tiers, this.knownTiers);
                    if (state) continue;
                    state = true;
                    continue;
                }
                if (!state) {
                    header.append("ShoeboxHeader = '");
                    header.append(line.trim());
                    header.append("'\n");
                    continue;
                }
                if (line.startsWith("\\ELANMedia")) {
                    header.append(ShoeboxRecord.getTierTag(line)).append(" = '");
                    header.append(ShoeboxRecord.getTierContent(line)).append("'\n");
                    continue;
                }
                record.put(line, this.file.getCanonicalPath());
            }
            transcription.setInfo(header.toString());
            if (lastRecord != null) {
                lastTime = lastRecord.provideAlternativeTime(record, lastTime);
                this.createTranscription(lastRecord, transcription);
            }
            if (record != null) {
                lastTime = record.provideAlternativeTime(null, lastTime);
                this.createTranscription(record, transcription);
            }
            reader.close();
            if (emptyCount > 0) {
                _logger.log(Level.INFO, "$ Whitespace-only lines: " + emptyCount + " (spaces: " + emptyLen + ") in: " + this.file);
            }
        }
        catch (IOException e) {
            _logger.log(Level.SEVERE, "IOException in parse: " + e + " for: " + this.file);
            throw e;
        }
        String assoc = this.createTierAssociations(transcription);
        if (assoc.length() == 0) {
            _logger.log(Level.WARNING, "ShoeboxParser: No tiers found in: " + this.file);
            throw new IOException("File contains no Shoebox tiers at all");
        }
        _logger.log(Level.FINE, "ShoeboxParser: Tiers: " + assoc + " in file: " + this.file);
    }

    public void tokenizeTier(AnnexTier tier, String content, int longest, long startTime, long endTime) {
        int position = 0;
        int lastPosition = 0;
        while (content.length() > 0) {
            AnnexAnnotation annexAnnot;
            Object token = null;
            int firstSpace = content.indexOf(32);
            if (firstSpace == -1) {
                token = content;
                content = "";
                lastPosition = position;
                position = longest;
            } else {
                int nextWords;
                lastPosition = position;
                token = content.substring(0, firstSpace);
                position += ((String)token).length();
                for (nextWords = firstSpace + 1; nextWords < content.length() && content.charAt(nextWords) == ' '; ++nextWords) {
                }
                content = content.substring(nextWords);
                position += nextWords - firstSpace;
                if (content.length() == 0) {
                    position = longest;
                }
            }
            try {
                annexAnnot = new AnnexAnnotation((String)token, this.getAlignTime(lastPosition, longest, startTime, endTime), this.getAlignTime(position, longest, startTime, endTime), true, null);
            }
            catch (IllegalArgumentException iae) {
                String filename;
                token = " " + (String)token;
                annexAnnot = new AnnexAnnotation((String)token, this.getAlignTime(lastPosition, longest, startTime, endTime), this.getAlignTime(position, longest, startTime, endTime), true, null);
                try {
                    filename = this.file.getCanonicalPath();
                }
                catch (IOException ioe) {
                    filename = this.file.getPath();
                }
                _logger.log(Level.WARNING, "Added space for orphan diacritics to combine with: " + annexAnnot + " in file: " + filename);
            }
            annexAnnot.interpolatedTime = lastPosition != 0 || position != longest;
            tier.addAnnotation(annexAnnot);
        }
    }

    private void createTranscription(ShoeboxRecord record, AnnexTranscription transcription) {
        for (String tag : record.tags()) {
            ShoeboxTiers.TierType type;
            if (record.getContent(tag) == null || (type = record.getTierType(tag)) == ShoeboxTiers.TierType.TIME) continue;
            if (type == ShoeboxTiers.TierType.ALIGNED || ShoeboxRecord.isProbablyAligned(tag, record.getContent(tag))) {
                int interlinearWidth = record.getLongestAlignedTier();
                if (interlinearWidth < record.getContent(tag).length()) {
                    interlinearWidth = record.getContent(tag).length();
                }
                this.tokenizeTier(this.getOrCreateTier(transcription, tag), record.getContent(tag), interlinearWidth, record.getStartTime(), record.getEndTime());
                continue;
            }
            this.getOrCreateTier(transcription, tag).addAnnotation(new AnnexAnnotation(record.getContent(tag), record.getStartTime(), record.getEndTime(), true, null));
        }
    }

    private String createTierAssociations(AnnexTranscription transcription) throws IOException {
        StringBuilder assocLog = new StringBuilder(32);
        int parentsFound = 0;
        int parentsUnknown = 0;
        for (String tierName : this.tiers.names(false)) {
            AnnexTier parentTier;
            AnnexTier childTier = null;
            try {
                childTier = this.getTier(transcription, tierName);
            }
            catch (IllegalArgumentException e) {
                _logger.log(Level.SEVERE, "createTierAssociations: tier not found: " + tierName + " in: " + this.file);
                assocLog.append(tierName).append('?');
                continue;
            }
            if (childTier.annotations.size() == 1) {
                if (tierName.startsWith("ELANMedia")) continue;
                AnnexAnnotation anno = childTier.annotations.get(0);
                _logger.log(Level.INFO, "$ One-annotation-tier: " + tierName + "[" + anno.beginTime + "-" + anno.endTime + "] <" + anno.value + "> in: " + this.file);
                assocLog.append('@');
            }
            if (this.tiers.getParent(tierName) == null) {
                assocLog.append('[').append(tierName).append("] ");
                childTier.parentTier = null;
                continue;
            }
            childTier.parentTier = parentTier = this.getTier(transcription, this.tiers.getParent(tierName));
            if (this.tiers.getType(tierName) == ShoeboxTiers.TierType.ALIGNED) {
                assocLog.append("A:");
            }
            assocLog.append(tierName);
            if (!"ref".equals(this.tiers.getParent(tierName))) {
                assocLog.append('<').append(this.tiers.getParent(tierName));
            }
            assocLog.append(' ');
            if (this.tiers.getType(tierName) != ShoeboxTiers.TierType.ALIGNED) continue;
            childTier.type = "ALIGNED";
            for (int i = 0; i < childTier.annotations.size(); ++i) {
                AnnexAnnotation annot = childTier.annotations.get(i);
                AnnexAnnotation mother = null;
                for (int j = 0; j < parentTier.annotations.size(); ++j) {
                    AnnexAnnotation next = parentTier.annotations.get(j);
                    if (next.beginTime > annot.beginTime) break;
                    mother = next;
                }
                if (mother == null) {
                    if (++parentsUnknown <= 11) {
                        if (parentsUnknown <= 10) {
                            _logger.log(Level.WARNING, "Warning: could not find parent element for '" + annot.value + "' in tier " + tierName + " in: " + this.file.getCanonicalPath());
                        } else {
                            _logger.log(Level.SEVERE, "More than 10 failed searches for parent element in interlinear text in: " + this.file.getCanonicalPath());
                        }
                    }
                } else {
                    ++parentsFound;
                }
                annot.refAnnotation = mother;
            }
        }
        if (parentsFound > 0) {
            _logger.log(Level.FINE, "createTierAssociations: Matched " + parentsFound + " annotations in: " + this.file.getCanonicalPath());
        }
        if (parentsUnknown > 0) {
            _logger.log(Level.SEVERE, "createTierAssociations: Failed to match " + parentsUnknown + " annotations in: " + this.file.getCanonicalPath());
        }
        return assocLog.toString().trim();
    }

    private AnnexTier getOrCreateTier(AnnexTranscription transcription, String tierName) {
        try {
            return this.getTier(transcription, tierName);
        }
        catch (IllegalArgumentException e) {
            AnnexTier tier = new AnnexTier(tierName, "text");
            transcription.getTiers().add(tier);
            return tier;
        }
    }

    private AnnexTier getTier(AnnexTranscription transcription, String tierName) throws IllegalArgumentException {
        AnnexTier tier = null;
        for (int i = 0; i < transcription.getTiers().size(); ++i) {
            AnnexTier iterate = transcription.getTiers().get(i);
            if (!iterate.name.equals(tierName)) continue;
            tier = iterate;
            break;
        }
        if (tier == null) {
            throw new IllegalArgumentException("tier not found: " + tierName);
        }
        return tier;
    }

    private long getAlignTime(int i, int size, long startTime, long endTime) {
        double position = i;
        if (i > size) {
            _logger.log(Level.SEVERE, "getAlignTime: position beyond size: " + i + " > " + size + " [" + startTime + ", " + endTime + "]");
            throw new IllegalArgumentException("Position is beyond size");
        }
        if (startTime > endTime) {
            _logger.log(Level.SEVERE, "getAlignTime: start after end: " + i + " of " + size + " [" + startTime + " > " + endTime + "]");
            throw new IllegalArgumentException("Start time after end time");
        }
        if (size == 0) {
            return startTime;
        }
        position /= (double)size;
        long duration = endTime - startTime;
        return (long)(position *= (double)duration) + startTime;
    }

    private void printAnnexTranscription(AnnexTranscription transcription) {
        for (AnnexTier tier : transcription.getTiers()) {
            _logger.log(Level.INFO, "| " + tier.name + ": " + tier.annotations.size());
        }
    }

    public static void main(String[] args) {
        if (args.length == 0 || args.length > 2) {
            System.out.println("ShoeboxParser sbxfile");
            System.out.println("ShoeboxParser sbxfile typfile");
            System.out.println("ShoeboxParser -d directory");
            System.out.println("[typfile is of format .typ or .mkr]");
            return;
        }
        if ("-d".equals(args[0])) {
            ArrayList<String> errors = new ArrayList<String>();
            File dir = new File(args[1]);
            File[] files = dir.listFiles();
            for (int i = 0; i < files.length; ++i) {
                try {
                    System.out.println("Parsing " + files[i].getPath());
                    new AnnexTranscription("test", 2, files[i]);
                    continue;
                }
                catch (RuntimeException e) {
                    System.out.println("Error in: " + files[i].getPath() + ": " + e);
                    _logger.log(Level.SEVERE, "RuntimeException in main for: " + files[i].getPath() + ": " + e, e);
                    errors.add(files[i].getPath());
                }
            }
            System.out.println("Parsed " + files.length + " files.");
            System.out.println("Exceptions in: " + errors);
        } else if (args.length == 1) {
            new AnnexTranscription("test", 2, new File(args[0]));
        } else if (args.length == 2) {
            new AnnexTranscription("test", new File(args[0]), new File(args[1]));
        }
    }
}

