/*
 * Decompiled with CFR 0.152.
 */
package nl.mpi.annot.search.lib;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Date;
import java.util.Locale;
import java.util.Properties;
import nl.mpi.annot.search.lib.LuceneIndexWriter;
import nl.mpi.annot.search.lib.SearchClient;
import nl.mpi.annot.search.lib.WritePostgreSQL;
import nl.mpi.annot.tools.data.AnnexFileTypes;
import nl.mpi.annot.tools.data.AnnexTranscription;
import nl.mpi.annot.tools.data.CorpusStructDB;
import nl.mpi.corpusstructure.CorpusStructureDB;
import org.apache.log4j.Logger;

public class SearchCorpusDB {
    private static final Logger _log = Logger.getLogger((String)"SearchCorpusDB-ingester");
    private static final boolean _showProgress = true;
    private static ArrayList<String> permissionDenied;
    private static ArrayList<String> notExisting;
    private static ArrayList<String> emptyTranscriptions;
    private static ArrayList<String> notParsable;
    private static LuceneIndexWriter _luceneIndexWriter;

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private static void fillDatabase(WritePostgreSQL writer) {
        int i;
        long totalDuration = WritePostgreSQL.start_stopwatch();
        Statement st = null;
        if (!writer.isSimulation()) {
            try {
                st = writer.createStatement();
                WritePostgreSQL.initTables(st);
            }
            catch (SQLException e) {
                _log.error((Object)("Error creating database tables, giving up: " + e), (Throwable)e);
                return;
            }
            finally {
                try {
                    st.close();
                }
                catch (SQLException e) {}
            }
        }
        WritePostgreSQL.stop_stopwatch("DB table setup", totalDuration);
        _log.info((Object)("Database tables created" + (writer.isSimulation() ? " (simulation)" : "")));
        _log.info((Object)("Current default locale: " + Locale.getDefault().toString() + " String \"caf\u00e9\".toUpperCase() is: \"" + "caf\u00e9".toUpperCase() + "\", Character: '\u00e9' to '" + Character.toUpperCase('\u00e9') + "'"));
        Properties props = new Properties(System.getProperties());
        _log.info((Object)("user.language=\"" + props.getProperty("user.language", "") + "\" user.region=\"" + props.getProperty("user.region", "") + "\" file.encoding=\"" + props.getProperty("file.encoding", "") + "\""));
        permissionDenied = new ArrayList();
        notExisting = new ArrayList();
        notParsable = new ArrayList();
        emptyTranscriptions = new ArrayList();
        long stm = -1L;
        String[] mimetypes = new String[SearchClient.getSearchableFormats().length + 1];
        mimetypes[0] = "Unspecified";
        for (int i2 = 1; i2 < mimetypes.length; ++i2) {
            mimetypes[i2] = SearchClient.getSearchableFormats()[i2 - 1];
        }
        WritePostgreSQL.Stats[] stats = new WritePostgreSQL.Stats[mimetypes.length];
        for (int i3 = 0; i3 < mimetypes.length; ++i3) {
            stm = WritePostgreSQL.start_stopwatch();
            stats[i3] = SearchCorpusDB.process(mimetypes[i3], writer);
            String type = mimetypes[i3].replaceAll("^text/", "").replaceAll("^x-", "").replaceAll("-text$", "");
            WritePostgreSQL.stop_stopwatch("read " + stats[i3].nFiles + " " + type + " files", stm);
        }
        _log.info((Object)("Ready parsing at " + new Date()));
        WritePostgreSQL.Stats totalStats = new WritePostgreSQL.Stats();
        for (i = 0; i < mimetypes.length; ++i) {
            String type = mimetypes[i].replaceAll("^text/", "").replaceAll("^x-", "").replaceAll("-text$", "");
            SearchCorpusDB.printStats(stats[i], type, totalStats);
        }
        SearchCorpusDB.printStats(totalStats, "*/*", new WritePostgreSQL.Stats());
        _log.warn((Object)("Overview of problematic files: " + (notExisting.size() > 0 ? "" + notExisting.size() + " missing, " : "[all exist], ") + (permissionDenied.size() > 0 ? "" + permissionDenied.size() + " not readable, " : "[all readable], ") + (emptyTranscriptions.size() > 0 ? "" + emptyTranscriptions.size() + " without transcriptions, " : "[none empty], ") + (notParsable.size() > 0 ? "" + notParsable.size() + " not parseable" : "[all parse okay]")));
        if (notExisting.size() > 0) {
            _log.error((Object)("Not existing: " + notExisting.size()));
        }
        for (i = 0; i < notExisting.size(); ++i) {
            _log.warn((Object)notExisting.get(i));
        }
        if (permissionDenied.size() > 0) {
            _log.error((Object)("Permission denied: " + permissionDenied.size()));
        }
        for (i = 0; i < permissionDenied.size(); ++i) {
            _log.warn((Object)permissionDenied.get(i));
        }
        if (emptyTranscriptions.size() > 0) {
            _log.error((Object)("Empty Transcriptions: " + emptyTranscriptions.size()));
        }
        for (i = 0; i < emptyTranscriptions.size(); ++i) {
            _log.warn((Object)emptyTranscriptions.get(i));
        }
        if (notParsable.size() > 0) {
            _log.error((Object)("Not parsable: " + notParsable.size()));
        }
        for (i = 0; i < notParsable.size(); ++i) {
            _log.warn((Object)notParsable.get(i));
        }
        if (!writer.isSimulation()) {
            try {
                st = writer.createStatement();
                _log.info((Object)("Creating indexes, dropping old tables, renaming tables at: " + new Date()));
                WritePostgreSQL.createIndexes(st);
                long stw = WritePostgreSQL.start_stopwatch();
                WritePostgreSQL.dropOldData(st);
                WritePostgreSQL.renameTables(st);
                WritePostgreSQL.stop_stopwatch("drop / move tables", stw);
                stw = WritePostgreSQL.start_stopwatch();
                st.execute("ANALYZE search.users");
                st.execute("ANALYZE search.nodes");
                st.execute("ANALYZE search.vpaths");
                st.execute("ANALYZE search.fingerprints");
                st.execute("ANALYZE search.tiers");
                st.execute("ANALYZE search.annotations");
                WritePostgreSQL.stop_stopwatch("analyze tables", stw);
            }
            catch (SQLException e) {
                _log.error((Object)("Error swapping database content: " + e), (Throwable)e);
            }
            finally {
                try {
                    st.close();
                }
                catch (SQLException e) {}
            }
        }
        _log.info((Object)"Ready!!");
        WritePostgreSQL.stop_stopwatch("all " + totalStats.nFiles + " done", totalDuration);
        if (!writer.isSimulation()) {
            writer.close();
        }
    }

    private static WritePostgreSQL.Stats process(String annotationFormat, WritePostgreSQL writer) {
        CorpusStructureDB csdb = CorpusStructDB.getCorpusStructureDB();
        String[] format = new String[]{annotationFormat};
        String[] desc = csdb.getDescendants(csdb.getRootNodeId(), 8, format, "ignore", true);
        String shortFormat = annotationFormat.replaceAll("^text/", "").replaceAll("^x-", "").replaceAll("-text$", "");
        _log.info((Object)(shortFormat + " processing: " + desc.length + " '" + annotationFormat + "' files"));
        int feedBackStep = desc.length / 100;
        if (feedBackStep < 1) {
            feedBackStep = 1;
        }
        AnnexTranscription transcription = null;
        WritePostgreSQL.Stats stats = new WritePostgreSQL.Stats();
        System.err.print(annotationFormat + ": ");
        _log.debug((Object)("writer thread: constructing for " + annotationFormat));
        WritePostgreSQL writerThread = new WritePostgreSQL(writer.getConnection());
        writerThread.setName("Process_" + annotationFormat + "_writer");
        writerThread.start();
        writerThread.setName("Process_" + annotationFormat + "_writer");
        for (int i = 0; i < desc.length; ++i) {
            if (i % feedBackStep == 0) {
                System.err.print(100 * i / desc.length + " ");
            }
            ++stats.nFiles;
            String filePath = CorpusStructDB.getFilePathFor((String)desc[i]);
            File file = new File(filePath);
            if (!file.exists()) {
                notExisting.add(annotationFormat + "  ne  " + filePath);
                continue;
            }
            ++stats.nExist;
            if (!file.canRead()) {
                permissionDenied.add(annotationFormat + "  pd  " + filePath);
                ++stats.nPermissionDenied;
                continue;
            }
            transcription = null;
            int type = AnnexFileTypes.getTypeFor((String)CorpusStructDB.getMimeTypeFor((String)desc[i]));
            if (type == -1) {
                type = filePath.endsWith(".cha") ? 1 : (filePath.endsWith(".txt") ? 3 : (filePath.endsWith(".sht") || filePath.endsWith(".tbt") ? 2 : (filePath.endsWith(".html") || filePath.endsWith(".htm") ? 4 : (filePath.endsWith(".xml") || filePath.endsWith(".rt") ? 5 : (filePath.endsWith(".csv") ? 6 : (filePath.endsWith(".eaf") ? 0 : (filePath.endsWith(".pdf") ? 7 : (filePath.endsWith(".srt") ? 8 : (filePath.endsWith(".TextGrid") ? 9 : -1)))))))));
                if (type == -1) {
                    _log.error((Object)("process: Cannot guess type from file name for: " + filePath));
                } else {
                    _log.info((Object)("process: Had to guess type from file name, please fix metadata for: " + filePath));
                }
            }
            if (type != 7 && file.length() > 10000000L) {
                _log.warn((Object)("Too big file, skipped: " + file.length() + " bytes in: " + file.getAbsolutePath()));
                notParsable.add(annotationFormat + "  np  " + filePath);
                ++stats.nNotParsable;
                continue;
            }
            if (type != -1) {
                try {
                    transcription = new AnnexTranscription(desc[i], type, file);
                }
                catch (RuntimeException e) {
                    _log.error((Object)("process: Exception in AnnexTranscription for: " + file + " " + type + " " + desc[i] + ": " + e), (Throwable)e);
                    notParsable.add(annotationFormat + "  np  " + filePath);
                    ++stats.nNotParsable;
                    continue;
                }
            }
            if (transcription == null || !transcription.isValid()) {
                notParsable.add(annotationFormat + "  np  " + filePath);
                ++stats.nNotParsable;
                continue;
            }
            if (transcription.getTiers().size() == 0) {
                emptyTranscriptions.add(annotationFormat + "  em  " + filePath);
            }
            if (_luceneIndexWriter != null) {
                try {
                    _luceneIndexWriter.indexTranscription(transcription, file);
                }
                catch (IOException ioe) {
                    _log.error((Object)("Failed to add to Lucene index: " + filePath));
                    _log.debug((Object)("Lucene indexTranscription call threw IOException: " + ioe), (Throwable)ioe);
                }
            }
            writerThread.sendFileToDb(transcription);
        }
        _log.debug((Object)"writer thread: draining queue");
        writerThread.sendFileToDb(new AnnexTranscription("Last@" + annotationFormat, 42, null));
        stats = writerThread.getStats(stats);
        _log.debug((Object)"writer thread: fetched stats");
        do {
            try {
                writerThread.join(10L);
            }
            catch (InterruptedException ie) {
                // empty catch block
            }
            _log.debug((Object)"writer thread: waiting for join");
        } while (writerThread.isAlive());
        _log.debug((Object)"writer thread: completed");
        System.err.println(annotationFormat + " done.");
        return stats;
    }

    private static void printStats(WritePostgreSQL.Stats stats, String title, WritePostgreSQL.Stats totalStats) {
        long aal = 0L;
        if (stats.nAnnotations > 0L) {
            aal = 100L * stats.annotationsSize / stats.nAnnotations;
        }
        int nIngested = stats.nExist - stats.nPermissionDenied - stats.nEmpty - stats.nNotParsable - stats.nDBProblems;
        _log.info((Object)("# statistics for " + title + " files:"));
        _log.info((Object)("#     files: " + stats.nFiles + "    existing: " + stats.nExist + "   ingested: " + nIngested));
        if (stats.nPermissionDenied + stats.nEmpty + stats.nNotParsable + stats.nDBProblems > 0) {
            _log.info((Object)("# " + (stats.nPermissionDenied > 0 ? "not accessible: " + stats.nPermissionDenied : "") + (stats.nEmpty > 0 ? "    empty: " + stats.nEmpty : "") + (stats.nNotParsable > 0 ? "    not parsable: " + stats.nNotParsable : "") + (stats.nDBProblems > 0 ? "    database problem: " + stats.nDBProblems : "") + "    [some errors]"));
        }
        _log.info((Object)("#     tiers: " + stats.nTiers + "    annotations: " + stats.nAnnotations + "    average annotation length: " + aal / 100L + "." + (aal % 100L < 10L ? "0" : "") + aal % 100L));
        totalStats.nFiles += stats.nFiles;
        totalStats.nExist += stats.nExist;
        totalStats.nPermissionDenied += stats.nPermissionDenied;
        totalStats.nNotParsable += stats.nNotParsable;
        totalStats.nDBProblems += stats.nDBProblems;
        totalStats.nEmpty += stats.nEmpty;
        totalStats.nTiers += stats.nTiers;
        totalStats.nAnnotations += stats.nAnnotations;
        totalStats.annotationsSize += stats.annotationsSize;
    }

    private static void usage() {
        System.out.println("\nFill searchdb/annex DB using all annotation files found in corpusstructure DB");
        System.out.println("\nUsage:");
        System.out.println("java -jar SearchDBIngester.jar corpusdb-server[:port] user password searchdb-server[:port][/searchdbname] user password\n");
        System.out.println("Sample Usage:");
        System.out.println("java -jar SearchDBIngester.jar corpushost webuser xxxx localhost annex yyyy\n");
        System.out.println("Simulation mode:");
        System.out.println("use 'simulate' as the second (searchdb) user name to work without a searchdb");
        System.out.println("JDBC URLs used: jdbc:postgresql://...");
        System.out.println("CORPUSDBSERVER/corpusstructure SEARCHDBSERVER/annex (or .../SEARCHDBNAME)");
        System.out.println("Properties mode:");
        System.out.println("java -jar SearchDBIngester.jar SearchDBIngester.properties");
        System.out.println("Expects the following keys in SearchDBIngester.properties:");
        System.out.println("corpusstructuredb.host corpusstructuredb.user corpusstructuredb.pass");
        System.out.println("searchdb.host searchdb.user searchdb.pass");
        System.out.println("Optional keys:");
        System.out.println("lucene.indexdir, lucene.incremental, lucene.ngramsize,");
        System.out.println("lucene.maxnfrequencies, lucene.indexpositions");
        System.out.println("Set indexdir for Lucene index, default is replace (false), 1- to 5-grams (5),");
        System.out.println("no n-gram freqs (0, keeps tiername/type freqs), no n-gram positions (false).");
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static void main(String[] args) {
        String url;
        if (args.length != 6 && args.length > 1) {
            SearchCorpusDB.usage();
            return;
        }
        if (args.length == 6) {
            CorpusStructDB.setCorpusDB((String)("jdbc:postgresql://" + args[0] + "/corpusstructure"), (String)args[1], (String)args[2]);
        } else {
            Properties p = new Properties(System.getProperties());
            try {
                p.load(new FileInputStream(args.length == 0 ? "SearchDBIngester.properties" : args[0]));
                System.setProperties(p);
            }
            catch (IOException ioe) {
                SearchCorpusDB.usage();
                System.out.println("Cannot load " + (args.length == 0 ? "SearchDBIngester.properties" : args[0]) + " file.");
                return;
            }
            CorpusStructDB.setCorpusDB((String)("jdbc:postgresql://" + System.getProperty("corpusstructuredb.host", "localhost") + "/corpusstructure"), (String)System.getProperty("corpusstructuredb.user", "imdiArchive"), (String)System.getProperty("corpusstructuredb.pass", ""));
        }
        String string = url = args.length == 6 ? args[3] : System.getProperty("searchdb.host", "localhost/annex");
        if (url.contains("//")) {
            System.out.println("Warning: Full JDBC URL specified, better only specify host or host/dbname");
        } else {
            url = url.contains("/") ? "jdbc:postgresql://" + url : "jdbc:postgresql://" + url + "/annex";
        }
        String usr = args.length == 6 ? args[4] : System.getProperty("searchdb.user", "webuser");
        String pwd = args.length == 6 ? args[5] : System.getProperty("searchdb.pass", "");
        WritePostgreSQL writer = null;
        try {
            writer = new WritePostgreSQL(url, usr, pwd);
        }
        catch (SQLException sqle) {
            System.out.println("Cannot connect to database: " + sqle);
            SearchCorpusDB.usage();
            return;
        }
        String indexDir = System.getProperty("lucene.indexdir", "");
        boolean indexUpdate = "true".equals(System.getProperty("lucene.incremental", "false"));
        boolean indexPositions = "true".equals(System.getProperty("lucene.indexpositions", "false"));
        String indexFrequenciesString = System.getProperty("lucene.maxnfrequencies", "0");
        String indexSizeString = System.getProperty("lucene.ngramsize", "5");
        _log.debug((Object)("lucene.indexdir: " + indexDir));
        if (indexDir.length() > 0) {
            try {
                int indexSize = Integer.parseInt(indexSizeString);
                int indexFrequencies = Integer.parseInt(indexFrequenciesString);
                _log.debug((Object)("lucene.incremental: " + indexUpdate));
                _log.debug((Object)("lucene.indexpositions: " + indexPositions));
                _log.debug((Object)("lucene.maxnfrequencies: " + indexFrequencies));
                _log.debug((Object)("lucene.ngramsize: " + indexSize));
                _luceneIndexWriter = new LuceneIndexWriter(new File(indexDir), indexUpdate, indexSize, indexPositions, indexFrequencies);
                System.err.println("Opened Lucene index at: " + indexDir + " for " + (indexUpdate ? "update" : "replace") + " at N-Gram size " + indexSize + " with" + (indexPositions ? "" : "out") + " positions," + (indexFrequencies < 0 ? " no" : "") + " frequencies" + (indexFrequencies > 0 ? " up to " + indexFrequencies + "-grams" : ""));
            }
            catch (IOException ioe) {
                System.err.println("Cannot open Lucene index at: " + indexDir + " - continuing without Lucene!");
                _luceneIndexWriter = null;
                _log.error((Object)("Failed to open Lucene index at: " + indexDir + " Exception: " + ioe));
            }
        }
        try {
            SearchCorpusDB.fillDatabase(writer);
        }
        finally {
            writer.close();
        }
        if (_luceneIndexWriter != null) {
            try {
                _log.info((Object)"Closing Lucene index...");
                _luceneIndexWriter.close();
                _log.info((Object)"Closed Lucene index.");
            }
            catch (IOException ioe) {
                _log.error((Object)("Failed to close the Lucene index, updates there lost! Exception: " + ioe));
                System.err.println("Error while closing Lucene index!");
            }
        }
    }

    static {
        _luceneIndexWriter = null;
    }
}

