/*
 * File:     DobesParser.java
 * Project:  MPI Linguistic Application
 * Date:     03 April 2006
 *
 * Copyright (C) 2001-2006  Max Planck Institute for Psycholinguistics
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package mpi.eudico.server.corpora.clomimpl.dobes;

import mpi.eudico.server.util.ServerConfiguration;

import org.xml.sax.AttributeList;
import org.xml.sax.HandlerBase;
import org.xml.sax.InputSource;
import org.xml.sax.SAXParseException;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;

import java.util.Collections;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Vector;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;


/**
 * A Parser for DOBES minimal format compliant XML files. MAYBE THIS CLASS MUST
 * BE MADE THREAD SAFE BY ADDING SOME SYNCHRONIZED BLOCKS OR BY GIVING UP THE
 * SINGLETON PATTERN.
 *
 * @author Hennie Brugman
 * @version 6-Apr-2001
 */
public class DobesParser extends HandlerBase {
    private static DobesParser parser;

    /** The DOBES minimal XML file is parsed. */
    private final Float ORDERED_KEYS_KEY = new Float(0.12345);
    private boolean verbose;
    private SAXParser saxParser;
    private String lastParsed;
    private String currentFileName;
    private File xmlFile;
    private boolean parseError;
    private Hashtable tiers;
    private String currentTierId;
    private String currentAnnotationId;
    private String currentSpeakerId;
    private String currentStart;
    private String currentEnd;
    private String content;

    /**
     * Private constructor for DobesParser because the Singleton pattern is
     * applied here.
     */
    private DobesParser() {
        try {
            SAXParserFactory factory = SAXParserFactory.newInstance();
            factory.setValidating(true);
            saxParser = factory.newSAXParser();
            lastParsed = "";
            verbose = false;
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * The instance method returns the single incarnation of DobesParser to the
     * caller.
     *
     * @return DOCUMENT ME!
     */
    public static DobesParser Instance() {
        if (parser == null) {
            parser = new DobesParser();
        }

        return parser;
    }

    /**
     * Returns the names of the Tiers that are present in the Transcription
     * file
     *
     * @param fileName DOCUMENT ME!
     *
     * @return DOCUMENT ME!
     */
    public Vector getTierNames(String fileName) {
        // make sure that the correct file has been parsed
        if (!lastParsed.equals(fileName)) {
            parse(fileName);
        }

        Vector tierNames = new Vector(tiers.keySet());

        Collections.sort(tierNames);

        return tierNames;
    }

    /**
     * Returns a Vector with the Annotations for this Tier. Each
     * AnnotationRecord contains begin time, end time and text values
     *
     * @param tierName DOCUMENT ME!
     * @param fileName DOCUMENT ME!
     *
     * @return DOCUMENT ME!
     */
    public Vector getAnnotationsFor(String tierName, String fileName) {
        // make sure that the correct file has been parsed
        if (!lastParsed.equals(fileName)) {
            parse(fileName);
        }

        long start = System.currentTimeMillis();

        Vector annotationVector = new Vector();

        // get the tags from the tiers Hashtable
        Hashtable annotations = (Hashtable) tiers.get(tierName);

        // get an iterator that iterates over the tags in the right order.
        Iterator iter = ((Vector) annotations.get(ORDERED_KEYS_KEY)).iterator();

        while (iter.hasNext()) {
            Vector annotation = (Vector) annotations.get(iter.next());
            annotationVector.add(annotation);
        }

        long duration = System.currentTimeMillis() - start;

        //	System.out.println("Extracting Annotations took " + duration + " milli seconds");
        return annotationVector;
    }

    /**
     * Parses a DOBES-minimal compliant xml file.
     *
     * @param fileName the DOBES-minimal compliant xml file that must be
     *        parsed.
     */
    private void parse(String fileName) {
        long start = System.currentTimeMillis();

        try {
            //		System.out.println("Parse : " + fileName);
            //		System.out.println("Free memory : " + Runtime.getRuntime().freeMemory());
            // only parse the same file once
            if (lastParsed.equals(fileName)) {
                return;
            }

            tiers = new Hashtable();

            // parse the file
            xmlFile = new File(fileName);
            lastParsed = fileName;
            currentFileName = fileName;
            saxParser.parse(xmlFile, this);
        } catch (Exception e) {
            printErrorLocationInfo("Fatal(?) Error! " + e.getMessage());
        }

        long duration = System.currentTimeMillis() - start;

        //	System.out.println("Parsing took " + duration + " milli seconds");
    }

    /**
     * HandlerBase method
     */
    public void startDocument() {
        parseError = false;
    }

    /**
     * HandlerBase method
     */
    public void endDocument() {
    }

    /**
     * HandlerBase method
     *
     * @param name DOCUMENT ME!
     * @param attributes DOCUMENT ME!
     */
    public void startElement(String name, AttributeList attributes) {
        content = null;

        if (name.equals("HEADER")) {
            // implement when dealing with MediaObject
        } else if (name.equals("CHUNK")) {
            currentSpeakerId = attributes.getValue("SPEAKER");
            currentStart = attributes.getValue("START");
            currentEnd = attributes.getValue("END");

            /*            currentTierId = attributes.getValue("SPEAKER");

                           // First check whether this tier already exists
                           if (!tiers.containsKey(currentTierId)) {
                               // create an entry in the tiers Hashtable that can hold the Tag values for the new Tier
                               tiers.put(currentTierId, new Hashtable());

                               // put the Vector with the ordered key info in the Hashtable
                               ((Hashtable) tiers.get(currentTierId)).put(ORDERED_KEYS_KEY, new Vector());
                           }

                           // The id attribute contains the Annotation identifier, start time is used as id.
                           currentAnnotationId = attributes.getValue("START");

               
            // create new "AnnotationRecord" and add to annotations Hashtable for current tier
                           ((Hashtable) tiers.get(currentTierId)).put(currentAnnotationId, new Vector());
                           ((Vector)((Hashtable) tiers.get(currentTierId)).get(ORDERED_KEYS_KEY)).add(currentAnnotationId);

                           // add start and end times to this AnnotationRecord
                           ((Vector) ((Hashtable) tiers.get(currentTierId)).get(currentAnnotationId)).add(attributes.getValue("START"));
                           ((Vector) ((Hashtable) tiers.get(currentTierId)).get(currentAnnotationId)).add(attributes.getValue("END"));
             */
        } else if (name.equals("RENDERED_TEXT") || name.equals("TRANSLATION")) {
            currentTierId = currentSpeakerId;

            if (name.equals("RENDERED_TEXT")) {
                currentTierId += "-RT";
            } else {
                currentTierId += "-TR";
            }

            // First check whether this tier already exists
            if (!tiers.containsKey(currentTierId)) {
                // create an entry in the tiers Hashtable that can hold the Annotation values for the new Tier
                tiers.put(currentTierId, new Hashtable());

                // put the Vector with the ordered key info in the Hashtable
                ((Hashtable) tiers.get(currentTierId)).put(ORDERED_KEYS_KEY,
                    new Vector());
            }

            // The id attribute contains the Annotation identifier, start time is used as id.
            currentAnnotationId = currentStart;

            // create new "AnnotationRecord" and add to annotations Hashtable for current tier
            ((Hashtable) tiers.get(currentTierId)).put(currentAnnotationId,
                new Vector());
            ((Vector) ((Hashtable) tiers.get(currentTierId)).get(ORDERED_KEYS_KEY)).add(currentAnnotationId);

            // add start and end times to this AnnotationRecord
            ((Vector) ((Hashtable) tiers.get(currentTierId)).get(currentAnnotationId)).add(currentStart);
            ((Vector) ((Hashtable) tiers.get(currentTierId)).get(currentAnnotationId)).add(currentEnd);
        }
    }

    //startElement

    /**
     * HandlerBase method
     *
     * @param name DOCUMENT ME!
     */
    public void endElement(String name) {
        if (name.equals("RENDERED_TEXT") || name.equals("TRANSLATION")) {
            ((Vector) ((Hashtable) tiers.get(currentTierId)).get(currentAnnotationId)).add(content);
        }
    }

    /**
     * HandlerBase method
     *
     * @param buf DOCUMENT ME!
     * @param start DOCUMENT ME!
     * @param length DOCUMENT ME!
     */
    public void characters(char[] buf, int start, int length) {
        if (content == null) {
            content = removeWhiteSpace(buf, start, length);
        } else {
            content += removeWhiteSpace(buf, start, length);
        }
    }

    /**
     * HandlerBase method
     *
     * @param publicId DOCUMENT ME!
     * @param systemId DOCUMENT ME!
     *
     * @return DOCUMENT ME!
     */
    public InputSource resolveEntity(String publicId, String systemId) {
        InputSource inputSource = null;

        try {
            // Open an InputSource to a DOBES-minimal DTD
            // The location of the dtd defs is under the corpus directory in the path dobes/dtd.
            if (systemId.endsWith(".dtd")) {
                int to = systemId.indexOf(".dtd") + 4;
                int from = systemId.lastIndexOf('/', to) + 1;
                String fileName = ServerConfiguration.CORPUS_DIRECTORY +
                    File.separator + "dobes" + File.separator + "dtd" +
                    File.separator + systemId.substring(from, to);

                //	inputSource = new InputSource(new FileInputStream(fileName));
                //	inputSource = new InputSource(StringUtil.openEncodedFile("UTF-8", fileName));
                inputSource = new InputSource(new InputStreamReader(
                            new FileInputStream(fileName), "UTF8"));
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

        return inputSource;
    }

    /**
     * HandlerBase method
     *
     * @param e DOCUMENT ME!
     */
    public void error(SAXParseException e) {
        printErrorLocationInfo("Parse error " + e.getMessage());
        parseError = true;
    }

    /**
     * DOCUMENT ME!
     *
     * @param e DOCUMENT ME!
     */
    public void fatalError(SAXParseException e) {
        printErrorLocationInfo("Fatal Parse Error " + e.getMessage());
        parseError = true;
    }

    private String removeWhiteSpace(char[] buf, int start, int length) {
        int from = start;
        int to = start + length;

        /*
           for (int i = start; i  < start + length; i++) {
               if (buf[i] == ' ' || buf[i] == '\t') {
                   from++;
               }
               else {
                   to = from;
                   for (int j = from; j < start + length; j++) {
                       if  (buf[j] != ' ' && buf[i] != '\t') {
                           to++;
                       }
                       else {
                           break;
                       }
                   }
                   break;
               }
           }
           
*/
        return new String(buf, from, to - from);
    }

    private void println(String s) {
        if (verbose) {
            System.out.println(s);
        }
    }

    private void printErrorLocationInfo(String message) {
        System.out.println(message);
        System.out.println("Exception for " + currentFileName);
        System.out.println("Tier id " + currentTierId);
        System.out.println("Annotation id " + currentAnnotationId);
    }
}
