/*
 * Decompiled with CFR 0.152.
 */
package nl.mpi.annex.search;

import java.io.File;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import nl.mpi.annex.data.AnnexAnnotation;
import nl.mpi.annex.data.AnnexTier;
import nl.mpi.annex.data.AnnexTranscription;
import nl.mpi.annex.search.SearchClient;
import nl.mpi.annex.util.AnnexUtil;
import nl.mpi.annex.util.UTF8Validator;
import nl.mpi.annex.util.UnicodeNormalizer;
import nl.mpi.corpusstructure.CorpusStructureDB;
import org.apache.log4j.Logger;

public class SearchCorpusDB
extends Thread {
    private volatile Logger _log = null;
    private static final boolean _storeLongTimes = false;
    private static final int BATCHSIZE = 1000;
    private final boolean _showProgress = true;
    private ArrayList<String> permissionDenied;
    private ArrayList<String> notExisting;
    private ArrayList<String> emptyTranscriptions;
    private ArrayList<String> notParsable;
    private Stats _writerStats = null;
    private BlockingQueue<AnnexTranscription> _ingestQueue = null;
    private Connection _con = null;
    private boolean _simulate = false;

    public SearchCorpusDB(String url, String usr, String pwd) throws SQLException {
        this._log = Logger.getLogger((String)"SearchCorpusDB-ingester");
        this._log.debug((Object)"SearchCorpusDB ingester created");
        if (usr == null || url == null || pwd == null) {
            this._log.error((Object)"SearchCorpusDB Constructor recieved null strings as parameters for username, url, and/or password.");
            this._simulate = true;
        }
        boolean bl = this._simulate = this._simulate || usr.equals("simulate");
        if (!this._simulate) {
            this._con = DriverManager.getConnection(url, usr, pwd);
        }
    }

    public SearchCorpusDB(Connection dbcon) {
        this._log = Logger.getLogger((String)"SearchCorpusDB-writer");
        this._log.debug((Object)"SearchCorpusDB writer created");
        this._writerStats = new Stats();
        this._ingestQueue = new ArrayBlockingQueue<AnnexTranscription>(42, true);
        this._con = dbcon;
        if (this._con == null) {
            this._simulate = true;
        }
    }

    public void run() {
        while (true) {
            AnnexTranscription transcription;
            block11: {
                transcription = null;
                try {
                    transcription = (AnnexTranscription)this._ingestQueue.poll();
                    if (transcription == null) {
                        SearchCorpusDB.sleep(5L);
                    }
                    break block11;
                }
                catch (InterruptedException ie) {
                    if (transcription != null) break block11;
                }
                continue;
            }
            if (transcription.getType() == 42) {
                this._log.debug((Object)("writer thread: EOF encountered: " + transcription.getNodeId()));
                int itemsPastEof = this._ingestQueue.size();
                if (itemsPastEof > 0) {
                    this._log.error((Object)("Internal error: Dropped " + itemsPastEof + " items after the EOF marker"));
                }
                break;
            }
            Stats st = this.ingest(transcription, false);
            if (st == null) {
                String filePath = AnnexUtil.getFilePathFor(transcription.getNodeId());
                this._log.error((Object)("Database problem with: " + filePath));
                ++this._writerStats.nDBProblems;
                continue;
            }
            if (st.nEmpty > 0) {
                ++this._writerStats.nEmpty;
                continue;
            }
            this._writerStats.nTiers += st.nTiers;
            this._writerStats.nAnnotations += st.nAnnotations;
            this._writerStats.annotationsSize += st.annotationsSize;
        }
        this._ingestQueue.clear();
        this._ingestQueue = null;
        this._log.debug((Object)"writer thread: waiting for getStats to be called");
        while (this._writerStats != null) {
            try {
                SearchCorpusDB.sleep(5L);
            }
            catch (InterruptedException interruptedException) {}
        }
        this._log.debug((Object)"writer thread: done.");
    }

    public Stats getStats(Stats input) {
        if (this._ingestQueue != null) {
            this._log.debug((Object)"writer thread waiting for final stats to become available");
        }
        while (this._ingestQueue != null) {
            try {
                SearchCorpusDB.sleep(5L);
            }
            catch (InterruptedException interruptedException) {}
        }
        this._log.debug((Object)"writer thread: queue empty");
        input.nFiles += this._writerStats.nFiles;
        input.nExist += this._writerStats.nExist;
        input.nPermissionDenied += this._writerStats.nPermissionDenied;
        input.nNotParsable += this._writerStats.nNotParsable;
        input.nDBProblems += this._writerStats.nDBProblems;
        input.nEmpty += this._writerStats.nEmpty;
        input.nTiers += this._writerStats.nTiers;
        input.nAnnotations += this._writerStats.nAnnotations;
        input.annotationsSize += this._writerStats.annotationsSize;
        this._writerStats = null;
        return input;
    }

    public void sendFileToDb(AnnexTranscription transcription) {
        while (true) {
            try {
                this._ingestQueue.put(transcription);
                return;
            }
            catch (InterruptedException interruptedException) {
                continue;
            }
            break;
        }
    }

    public void close() {
        if (!this._simulate) {
            try {
                this._con.close();
            }
            catch (SQLException sQLException) {
                // empty catch block
            }
        }
    }

    private void initTables(Statement st) throws SQLException {
        try {
            st.executeUpdate("CREATE SCHEMA search");
        }
        catch (SQLException e) {
            // empty catch block
        }
        st.executeUpdate("DROP TABLE IF EXISTS search.new_annotations CASCADE");
        st.executeUpdate("DROP TABLE IF EXISTS search.new_tiers CASCADE");
        st.executeUpdate("DROP TABLE IF EXISTS search.new_vpaths CASCADE");
        st.executeUpdate("DROP TABLE IF EXISTS search.new_prog_data CASCADE");
        st.executeUpdate("CREATE TABLE search.new_vpaths (node_id TEXT NOT NULL, vpath TEXT NOT NULL )");
        st.executeUpdate("CREATE TABLE search.new_tiers (tier_id INTEGER NOT NULL, tier_name TEXT NOT NULL, tier_type TEXT NOT NULL, default_locale TEXT, annotator TEXT, participant TEXT NOT NULL, n_annotations INTEGER NOT NULL, ref_tier_id INTEGER NOT NULL, transcription_type INTEGER NOT NULL, node_id TEXT NOT NULL, aligned_annotations INTEGER NOT NULL, unigram_bits INTEGER, bigram_bits BIT(" + UTF8Validator.stringifyBits(UTF8Validator.fingerprintBigrams("bogus")).length() + "), trigram_bits BIT(" + UTF8Validator.stringifyBits(UTF8Validator.fingerprintTrigrams("bogus")).length() + "), fourgram_bits BIT(" + UTF8Validator.stringifyBits(UTF8Validator.fingerprint4grams("bogus")).length() + ") )");
        String timeType = "INTEGER";
        st.executeUpdate("CREATE TABLE search.new_annotations (ann_id INTEGER NOT NULL, ann_position INTEGER NOT NULL, begin_time " + timeType + " NOT NULL, " + "end_time " + timeType + " NOT NULL, " + "ann_tier_id INTEGER NOT NULL, " + "annotation TEXT NOT NULL )");
        st.executeUpdate("CREATE TABLE search.new_prog_data (label TEXT, tier_id INTEGER, ann_id INTEGER)");
        st.executeUpdate("INSERT INTO search.new_prog_data (label, tier_id, ann_id) VALUES ('indices', 0, 0)");
    }

    private void createIndexes(Statement st) throws SQLException {
        this._log.debug((Object)"creating annotation id index....");
        long stm = this.start_stopwatch();
        st.executeUpdate("CREATE INDEX new_ann_id_index ON search.new_annotations( ann_id )");
        this.stop_stopwatch("anno id index", stm);
        this._log.debug((Object)"creating annotation tier id index....");
        stm = this.start_stopwatch();
        st.executeUpdate("CREATE INDEX new_ann_tier_id_index ON search.new_annotations( ann_tier_id )");
        this.stop_stopwatch("anno tier id index", stm);
        this._log.debug((Object)"creating tier indexes....");
        stm = this.start_stopwatch();
        st.executeUpdate("CREATE INDEX new_node_id_index ON search.new_tiers( node_id )");
        st.executeUpdate("CREATE INDEX new_tier_id_index ON search.new_tiers( tier_id )");
        st.executeUpdate("CREATE INDEX new_tier_name_index ON search.new_tiers( tier_name )");
        st.executeUpdate("CREATE INDEX new_tier_type_index ON search.new_tiers( tier_type )");
        st.executeUpdate("CREATE INDEX new_default_locale_index ON search.new_tiers( default_locale )");
        st.executeUpdate("CREATE INDEX new_participant_index ON search.new_tiers( participant )");
        st.executeUpdate("CREATE INDEX new_ref_tier_id_index ON search.new_tiers( ref_tier_id )");
        st.executeUpdate("CREATE INDEX new_transcription_type_index ON search.new_tiers( transcription_type )");
        this.stop_stopwatch("tier indexes", stm);
        stm = this.start_stopwatch();
        this._log.debug((Object)"creating node vpath indexes...");
        st.executeUpdate("CREATE INDEX new_vpaths_node_index ON search.new_vpaths( node_id )");
        st.executeUpdate("CREATE INDEX new_vpaths_vpath_index ON search.new_vpaths( vpath )");
        this.stop_stopwatch("node vpath indexes", stm);
    }

    private void dropOldData(Statement st) throws SQLException {
        this._log.info((Object)"Dropping old data...");
        st.executeUpdate("DROP TABLE IF EXISTS search.annotations CASCADE");
        st.executeUpdate("DROP INDEX IF EXISTS search.ann_id_index CASCADE");
        st.executeUpdate("DROP INDEX IF EXISTS search.ann_tier_id_index CASCADE");
        st.executeUpdate("DROP TABLE IF EXISTS search.tiers CASCADE");
        st.executeUpdate("DROP INDEX IF EXISTS search.node_id_index CASCADE");
        st.executeUpdate("DROP INDEX IF EXISTS search.tier_id_index CASCADE");
        st.executeUpdate("DROP INDEX IF EXISTS search.tier_name_index CASCADE");
        st.executeUpdate("DROP INDEX IF EXISTS search.tier_type_index CASCADE");
        st.executeUpdate("DROP INDEX IF EXISTS search.default_locale_index CASCADE");
        st.executeUpdate("DROP INDEX IF EXISTS search.participant_index CASCADE");
        st.executeUpdate("DROP INDEX IF EXISTS search.ref_tier_id_index CASCADE");
        st.executeUpdate("DROP INDEX IF EXISTS search.transcription_type_index CASCADE");
        st.executeUpdate("DROP TABLE IF EXISTS search.vpaths CASCADE");
        st.executeUpdate("DROP INDEX IF EXISTS search.vpaths_vpath_index CASCADE");
        st.executeUpdate("DROP INDEX IF EXISTS search.vpaths_node_index CASCADE");
        st.executeUpdate("DROP TABLE IF EXISTS search.prog_data CASCADE");
    }

    private void renameTables(Statement st) throws SQLException {
        st.executeUpdate("ALTER TABLE search.new_annotations RENAME TO annotations");
        st.executeUpdate("ALTER INDEX search.new_ann_id_index RENAME TO ann_id_index");
        st.executeUpdate("ALTER INDEX search.new_ann_tier_id_index RENAME TO ann_tier_id_index");
        st.executeUpdate("ALTER TABLE search.new_tiers RENAME TO tiers");
        st.executeUpdate("ALTER INDEX search.new_node_id_index RENAME TO node_id_index");
        st.executeUpdate("ALTER INDEX search.new_tier_id_index RENAME TO tier_id_index");
        st.executeUpdate("ALTER INDEX search.new_tier_name_index RENAME TO tier_name_index");
        st.executeUpdate("ALTER INDEX search.new_tier_type_index RENAME TO tier_type_index");
        st.executeUpdate("ALTER INDEX search.new_default_locale_index RENAME TO default_locale_index");
        st.executeUpdate("ALTER INDEX search.new_participant_index RENAME TO participant_index");
        st.executeUpdate("ALTER INDEX search.new_ref_tier_id_index RENAME TO ref_tier_id_index");
        st.executeUpdate("ALTER INDEX search.new_transcription_type_index RENAME TO transcription_type_index");
        st.executeUpdate("ALTER TABLE search.new_vpaths RENAME TO vpaths");
        st.executeUpdate("ALTER INDEX search.new_vpaths_node_index RENAME TO vpaths_node_index");
        st.executeUpdate("ALTER INDEX search.new_vpaths_vpath_index RENAME TO vpaths_vpath_index");
        st.executeUpdate("ALTER TABLE search.new_prog_data RENAME TO prog_data");
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void fillDatabase() {
        int i;
        long totalDuration = this.start_stopwatch();
        Statement st = null;
        if (!this._simulate) {
            try {
                st = this._con.createStatement();
                this.initTables(st);
            }
            catch (SQLException e) {
                this._log.error((Object)("Error creating database tables, giving up: " + e), (Throwable)e);
                return;
            }
            finally {
                try {
                    st.close();
                }
                catch (SQLException e) {}
            }
        }
        this.stop_stopwatch("DB table setup", totalDuration);
        this._log.info((Object)("Database tables created" + (this._simulate ? " (simulation)" : "")));
        this.permissionDenied = new ArrayList();
        this.notExisting = new ArrayList();
        this.notParsable = new ArrayList();
        this.emptyTranscriptions = new ArrayList();
        long stm = -1L;
        String[] mimetypes = SearchClient.getSearchableFormats();
        Stats[] stats = new Stats[mimetypes.length];
        for (int i2 = 0; i2 < mimetypes.length; ++i2) {
            stm = this.start_stopwatch();
            stats[i2] = this.process(mimetypes[i2]);
            String type = mimetypes[i2].replaceAll("^text/", "").replaceAll("^x-", "").replaceAll("-text$", "");
            this.stop_stopwatch("read " + stats[i2].nFiles + " " + type + " files", stm);
        }
        this._log.info((Object)("Ready parsing at " + new Date()));
        Stats totalStats = new Stats();
        for (i = 0; i < mimetypes.length; ++i) {
            String type = mimetypes[i].replaceAll("^text/", "").replaceAll("^x-", "").replaceAll("-text$", "");
            this.printStats(stats[i], type, totalStats);
        }
        this.printStats(totalStats, "*/*", new Stats());
        this._log.warn((Object)("Overview of problematic files: " + (this.notExisting.size() > 0 ? "" + this.notExisting.size() + " missing, " : "[all exist], ") + (this.permissionDenied.size() > 0 ? "" + this.permissionDenied.size() + " not readable, " : "[all readable], ") + (this.emptyTranscriptions.size() > 0 ? "" + this.emptyTranscriptions.size() + " without transcriptions, " : "[none empty], ") + (this.notParsable.size() > 0 ? "" + this.notParsable.size() + " not parseable" : "[all parse okay]")));
        if (this.notExisting.size() > 0) {
            this._log.error((Object)("Not existing: " + this.notExisting.size()));
        }
        for (i = 0; i < this.notExisting.size(); ++i) {
            this._log.warn((Object)this.notExisting.get(i));
        }
        if (this.permissionDenied.size() > 0) {
            this._log.error((Object)("Permission denied: " + this.permissionDenied.size()));
        }
        for (i = 0; i < this.permissionDenied.size(); ++i) {
            this._log.warn((Object)this.permissionDenied.get(i));
        }
        if (this.emptyTranscriptions.size() > 0) {
            this._log.error((Object)("Empty Transcriptions: " + this.emptyTranscriptions.size()));
        }
        for (i = 0; i < this.emptyTranscriptions.size(); ++i) {
            this._log.warn((Object)this.emptyTranscriptions.get(i));
        }
        if (this.notParsable.size() > 0) {
            this._log.error((Object)("Not parsable: " + this.notParsable.size()));
        }
        for (i = 0; i < this.notParsable.size(); ++i) {
            this._log.warn((Object)this.notParsable.get(i));
        }
        if (!this._simulate) {
            try {
                st = this._con.createStatement();
                this._log.info((Object)("Creating indexes, dropping old tables, renaming tables at: " + new Date()));
                this.createIndexes(st);
                long stw = this.start_stopwatch();
                this.dropOldData(st);
                this.renameTables(st);
                this.stop_stopwatch("drop / move tables", stw);
                stw = this.start_stopwatch();
                st.execute("ANALYZE search.vpaths");
                st.execute("ANALYZE search.tiers");
                st.execute("ANALYZE search.annotations");
                this.stop_stopwatch("analyze tables", stw);
            }
            catch (SQLException e) {
                this._log.error((Object)("Error swapping database content: " + e), (Throwable)e);
            }
            finally {
                try {
                    st.close();
                }
                catch (SQLException e) {}
            }
        }
        this._log.info((Object)"Ready!!");
        this.stop_stopwatch("all " + totalStats.nFiles + " done", totalDuration);
        if (!this._simulate) {
            try {
                this._con.close();
            }
            catch (SQLException e) {
                // empty catch block
            }
        }
    }

    private Stats process(String annotationFormat) {
        CorpusStructureDB csdb = AnnexUtil.getCorpusStructureDB();
        String[] format = new String[]{annotationFormat};
        String[] desc = csdb.getDescendants(csdb.getRootNodeId(), 8, format, "ignore", true);
        String shortFormat = annotationFormat.replaceAll("^text/", "").replaceAll("^x-", "").replaceAll("-text$", "");
        this._log.info((Object)(shortFormat + " processing: " + desc.length + " '" + annotationFormat + "' files"));
        int feedBackStep = desc.length / 100;
        if (feedBackStep < 1) {
            feedBackStep = 1;
        }
        AnnexTranscription transcription = null;
        Stats stats = new Stats();
        System.err.print(annotationFormat + ": ");
        this._log.debug((Object)("writer thread: constructing for " + annotationFormat));
        SearchCorpusDB writerThread = new SearchCorpusDB(this._con);
        writerThread.setName("Process_" + annotationFormat + "_writer");
        writerThread.start();
        writerThread.setName("Process_" + annotationFormat + "_writer");
        for (int i = 0; i < desc.length; ++i) {
            if (i % feedBackStep == 0) {
                System.err.print(100 * i / desc.length + " ");
            }
            ++stats.nFiles;
            String filePath = AnnexUtil.getFilePathFor(desc[i]);
            File file = new File(filePath);
            if (!file.exists()) {
                this.notExisting.add(annotationFormat + "  ne  " + filePath);
                continue;
            }
            ++stats.nExist;
            if (!file.canRead()) {
                this.permissionDenied.add(annotationFormat + "  pd  " + filePath);
                ++stats.nPermissionDenied;
                continue;
            }
            if (file.length() > 10000000L) {
                this._log.warn((Object)("Too big file, skipped: " + file.length() + " bytes in: " + file.getAbsolutePath()));
                this.notParsable.add(annotationFormat + "  np  " + filePath);
                ++stats.nNotParsable;
                continue;
            }
            transcription = null;
            int type = -1;
            try {
                type = AnnexUtil.getTypeFor(desc[i]);
                transcription = new AnnexTranscription(desc[i], type, file);
            }
            catch (RuntimeException e) {
                this._log.error((Object)("process: Exception in AnnexTranscription for: " + file + " " + type + " " + desc[i] + ": " + e), (Throwable)e);
                this.notParsable.add(annotationFormat + "  np  " + filePath);
                ++stats.nNotParsable;
                continue;
            }
            if (transcription == null || !transcription.isValid()) {
                this.notParsable.add(annotationFormat + "  np  " + filePath);
                ++stats.nNotParsable;
                continue;
            }
            if (transcription.getTiers().size() == 0) {
                this.emptyTranscriptions.add(annotationFormat + "  em  " + filePath);
            }
            writerThread.sendFileToDb(transcription);
        }
        this._log.debug((Object)"writer thread: draining queue");
        writerThread.sendFileToDb(new AnnexTranscription("Last@" + annotationFormat, 42, null));
        stats = writerThread.getStats(stats);
        this._log.debug((Object)"writer thread: fetched stats");
        do {
            try {
                writerThread.join(10L);
            }
            catch (InterruptedException ie) {
                // empty catch block
            }
            this._log.debug((Object)"writer thread: waiting for join");
        } while (writerThread.isAlive());
        this._log.debug((Object)"writer thread: completed");
        System.err.println(annotationFormat + " done.");
        return stats;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private Stats ingest(AnnexTranscription transcription, boolean inCurrent) {
        Stats stats = new Stats();
        HashMap<AnnexTier, Integer> ids = new HashMap<AnnexTier, Integer>();
        boolean acm = false;
        int nTiers = transcription.getTiers().size();
        if (nTiers == 0) {
            ++stats.nEmpty;
            return stats;
        }
        String prog_dataTable = "search.new_prog_data";
        if (inCurrent) {
            prog_dataTable = "search.prog_data";
        }
        PreparedStatement stVPaths = null;
        Statement stAnnotation = null;
        Statement stTier = null;
        String filePath = AnnexUtil.getFilePathFor(transcription.getNodeId());
        try {
            AnnexTier tier;
            int i;
            Statement st = null;
            ResultSet rs = null;
            int tierId = -1;
            int annotationId = -1;
            if (!this._simulate) {
                acm = this._con.getAutoCommit();
                this._con.setAutoCommit(false);
                st = this._con.createStatement();
                rs = st.executeQuery("SELECT tier_id, ann_id FROM " + prog_dataTable + " WHERE label = 'indices'");
                rs.next();
                tierId = rs.getInt("tier_id");
                annotationId = rs.getInt("ann_id");
                stAnnotation = this._con.prepareStatement("INSERT INTO " + (inCurrent ? "search.annotations" : "search.new_annotations") + " (ann_id, ann_position, begin_time, end_time, ann_tier_id, annotation)" + " VALUES(?, ?, ?, ?, ?, ?)");
                stTier = this._con.prepareStatement("INSERT INTO " + (inCurrent ? "search.tiers" : "search.new_tiers") + " (tier_id, tier_name, tier_type, default_locale, annotator, participant, n_annotations," + " ref_tier_id, transcription_type, node_id, aligned_annotations, unigram_bits, bigram_bits, trigram_bits, fourgram_bits )" + " VALUES (?, ?, ?, ?, ?, ?, ?,   ?, ?, ?, ?, ?, ?::bit(" + UTF8Validator.stringifyBits(UTF8Validator.fingerprintBigrams("bogus")).length() + "), ?::bit(" + UTF8Validator.stringifyBits(UTF8Validator.fingerprintTrigrams("bogus")).length() + "), ?::bit(" + UTF8Validator.stringifyBits(UTF8Validator.fingerprint4grams("bogus")).length() + ") )");
                stVPaths = this._con.prepareStatement("INSERT INTO search.new_vpaths ( node_id, vpath ) VALUES ( ?, ? )");
            }
            String[] vpaths = AnnexUtil.getCorpusStructureDB().getCorpusNode(transcription.getNodeId()).getVPaths();
            for (i = 0; i < vpaths.length; ++i) {
                if (this._simulate) continue;
                stVPaths.clearParameters();
                stVPaths.setString(1, transcription.getNodeId());
                stVPaths.setString(2, vpaths[i]);
                stVPaths.executeUpdate();
            }
            stats.nTiers = nTiers;
            for (i = 0; i < nTiers; ++i) {
                tier = transcription.getTiers().get(i);
                ids.put(tier, tierId++);
            }
            if (!this._simulate) {
                rs = st.executeQuery("SELECT tier_id, ann_id FROM " + prog_dataTable + " WHERE label = 'indices'");
                rs.next();
                tierId = rs.getInt("tier_id");
                annotationId = rs.getInt("ann_id");
            }
            for (i = 0; i < nTiers; ++i) {
                tier = transcription.getTiers().get(i);
                int ref_tier_id = -1;
                if (tier.parentTier != null) {
                    if (ids.get(tier.parentTier) != null) {
                        ref_tier_id = (Integer)ids.get(tier.parentTier);
                    } else {
                        this._log.warn((Object)("Ignored bad parent tier reference: " + tier.name + " ==> " + tier.parentTier.name + " in: " + filePath));
                    }
                }
                int nAnnotations = tier.annotations.size();
                int unigram_bits = 0;
                int[] bigram_bits = UTF8Validator.fingerprintBigrams("bogus");
                for (int j = 0; j < bigram_bits.length; ++j) {
                    bigram_bits[j] = 0;
                }
                int[] trigram_bits = UTF8Validator.fingerprintTrigrams("bogus");
                for (int j = 0; j < trigram_bits.length; ++j) {
                    trigram_bits[j] = 0;
                }
                int[] fourgram_bits = UTF8Validator.fingerprint4grams("bogus");
                for (int j = 0; j < fourgram_bits.length; ++j) {
                    fourgram_bits[j] = 0;
                }
                stats.nAnnotations += (long)nAnnotations;
                int nAlignedAnnotations = 0;
                for (int j = 0; j < nAnnotations; ++j) {
                    AnnexAnnotation annotation = tier.annotations.get(j);
                    if (annotation != null && annotation.isTimeAligned) {
                        ++nAlignedAnnotations;
                    }
                    String anno = annotation.value;
                    stats.annotationsSize += (long)anno.length();
                    if (nAnnotations > 1) {
                        anno = " " + anno + " ";
                    }
                    unigram_bits |= UTF8Validator.fingerprintChars(annotation.value);
                    int[] bigram_bits_anno = UTF8Validator.fingerprintBigrams(annotation.value);
                    for (int k = 0; k < bigram_bits.length; ++k) {
                        int n = k;
                        bigram_bits[n] = bigram_bits[n] | bigram_bits_anno[k];
                    }
                    int[] trigram_bits_anno = UTF8Validator.fingerprintTrigrams(annotation.value);
                    for (int k = 0; k < trigram_bits.length; ++k) {
                        int n = k;
                        trigram_bits[n] = trigram_bits[n] | trigram_bits_anno[k];
                    }
                    int[] fourgram_bits_anno = UTF8Validator.fingerprint4grams(annotation.value);
                    for (int k = 0; k < fourgram_bits.length; ++k) {
                        int n = k;
                        fourgram_bits[n] = fourgram_bits[n] | fourgram_bits_anno[k];
                    }
                }
                if (tier.type == null) {
                    this._log.error((Object)("Tier without type in: " + filePath));
                    tier.type = "unknown";
                }
                if (tier.participant == null) {
                    this._log.error((Object)("Tier without participant info in: " + filePath));
                    tier.participant = "unknown";
                }
                if (nAnnotations == 0 || nAnnotations == 1 && tier.annotations.get((int)0).value.trim().length() == 0) {
                    this._log.debug((Object)("Empty tier: " + tier.name + " in: " + filePath));
                }
                int nulCharLines = 0;
                int nonNormalLines = 0;
                for (int j = 0; j < nAnnotations; ++j) {
                    int endTime;
                    String normalized;
                    String denormal;
                    AnnexAnnotation annotation = tier.annotations.get(j);
                    if (annotation.value.length() > 2600) {
                        this._log.warn((Object)("Annotation longer than 2600 characters, length " + annotation.value.length() + " in " + nAnnotations + "-annotation tier '" + tier.name + "'[" + j + "] in: " + filePath));
                    }
                    if (annotation.value.indexOf(0) != -1) {
                        ++nulCharLines;
                        annotation.value = annotation.value.replace('\u0000', ' ');
                    }
                    if ((denormal = UnicodeNormalizer.checkNFC(normalized = UnicodeNormalizer.normalizeToNFC(annotation.value, " in tier '" + tier.name + "'[" + j + "] in: " + filePath))) != null || !normalized.equals(annotation.value)) {
                        if (denormal != null) {
                            this._log.warn((Object)("Annotation cannot be normalized to NFC: " + denormal + " in tier '" + tier.name + "'[" + j + "] in: " + filePath));
                        }
                        ++nonNormalLines;
                        annotation.value = normalized;
                    }
                    if (this._simulate) continue;
                    stAnnotation.clearParameters();
                    stAnnotation.setInt(1, annotationId++);
                    stAnnotation.setInt(2, j);
                    int startTime = annotation.beginTime < Integer.MIN_VALUE ? Integer.MIN_VALUE : Integer.MAX_VALUE;
                    int n = endTime = annotation.endTime < Integer.MIN_VALUE ? Integer.MIN_VALUE : Integer.MAX_VALUE;
                    if (annotation.beginTime >= Integer.MIN_VALUE && annotation.beginTime <= Integer.MAX_VALUE) {
                        startTime = (int)annotation.beginTime;
                    } else {
                        this._log.error((Object)("Bad annotation start time: " + annotation.beginTime + " to " + annotation.endTime + " for '" + annotation.value + "' in " + filePath));
                    }
                    if (annotation.endTime >= Integer.MIN_VALUE && annotation.endTime <= Integer.MAX_VALUE) {
                        endTime = (int)annotation.endTime;
                    } else {
                        this._log.error((Object)("Bad annotation end time: " + annotation.beginTime + " to " + annotation.endTime + " for '" + annotation.value + "' in " + filePath));
                    }
                    stAnnotation.setInt(3, startTime);
                    stAnnotation.setInt(4, endTime);
                    stAnnotation.setInt(5, tierId);
                    stAnnotation.setString(6, annotation.value);
                    stAnnotation.addBatch();
                    if (j % 1000 != 0 && j != nAnnotations - 1) continue;
                    stAnnotation.executeBatch();
                    stAnnotation.clearBatch();
                }
                if (nulCharLines > 0) {
                    this._log.warn((Object)("Had to replace NUL chars in " + nulCharLines + " lines for tier: " + tier.name + " in: " + filePath));
                }
                if (nonNormalLines > 0) {
                    this._log.debug((Object)("Normalized to NFC in " + nonNormalLines + " lines for tier: " + tier.name + " in: " + filePath));
                }
                String bigramBitString = UTF8Validator.stringifyBits(bigram_bits);
                String trigramBitString = UTF8Validator.stringifyBits(trigram_bits);
                String fourgramBitString = UTF8Validator.stringifyBits(fourgram_bits);
                if (!this._simulate) {
                    stTier.clearParameters();
                    stTier.setInt(1, tierId);
                    stTier.setString(2, tier.name);
                    stTier.setString(3, tier.type);
                    stTier.setString(4, tier.defaultLocale);
                    stTier.setString(5, tier.annotator);
                    stTier.setString(6, tier.participant);
                    stTier.setInt(7, nAnnotations);
                    stTier.setInt(8, ref_tier_id);
                    stTier.setInt(9, transcription.getType());
                    stTier.setString(10, transcription.getNodeId());
                    stTier.setInt(11, nAlignedAnnotations);
                    if (unigram_bits == 0) {
                        stTier.setNull(12, 4);
                    } else {
                        stTier.setInt(12, unigram_bits);
                    }
                    if (bigramBitString.indexOf(49) == -1) {
                        stTier.setNull(13, 12);
                    } else {
                        stTier.setString(13, bigramBitString);
                    }
                    if (trigramBitString.indexOf(49) == -1) {
                        stTier.setNull(14, 12);
                    } else {
                        stTier.setString(14, trigramBitString);
                    }
                    if (fourgramBitString.indexOf(49) == -1) {
                        stTier.setNull(15, 12);
                    } else {
                        stTier.setString(15, fourgramBitString);
                    }
                    stTier.executeUpdate();
                }
                ++tierId;
            }
            if (!this._simulate) {
                st.executeUpdate("UPDATE " + prog_dataTable + " SET " + "tier_id = " + tierId + ", " + "ann_id = " + annotationId + " WHERE label = 'indices'");
                this._con.commit();
                this._con.setAutoCommit(acm);
            }
        }
        catch (SQLException e) {
            this._log.error((Object)("ingest SQLException: " + e + "in: " + filePath), (Throwable)e);
            try {
                this._con.rollback();
                this._con.setAutoCommit(acm);
            }
            catch (SQLException ex) {
                this._log.error((Object)("ingest: failed rollback: " + ex.toString()));
                this._log.error((Object)"Not making new database tables active: Missed rollbacks!");
                this._log.error((Object)"Old database still active. Please fix problem and retry.");
                System.exit(1);
            }
            Stats ex = null;
            return ex;
        }
        catch (RuntimeException re) {
            this._log.error((Object)("ingest RuntimeException:" + re + "in: " + filePath), (Throwable)re);
            try {
                this._con.rollback();
                this._con.setAutoCommit(acm);
            }
            catch (SQLException ex) {
                this._log.error((Object)("ingest: failed rollback: " + ex.toString()));
                this._log.error((Object)"Not making new database tables active: Missed rollbacks!");
                this._log.error((Object)"Old database still active. Please fix problem and retry.");
                System.exit(1);
            }
            Stats stats2 = null;
            return stats2;
        }
        finally {
            if (stTier != null) {
                try {
                    stTier.close();
                }
                catch (SQLException e) {}
            }
            if (stAnnotation != null) {
                try {
                    stAnnotation.close();
                }
                catch (SQLException e) {}
            }
        }
        return stats;
    }

    private void printStats(Stats stats, String title, Stats totalStats) {
        long aal = 0L;
        if (stats.nAnnotations > 0L) {
            aal = 100L * stats.annotationsSize / stats.nAnnotations;
        }
        int nIngested = stats.nExist - stats.nPermissionDenied - stats.nEmpty - stats.nNotParsable - stats.nDBProblems;
        this._log.info((Object)("# statistics for " + title + " files:"));
        this._log.info((Object)("#     files: " + stats.nFiles + "    existing: " + stats.nExist + "   ingested: " + nIngested));
        if (stats.nPermissionDenied + stats.nEmpty + stats.nNotParsable + stats.nDBProblems > 0) {
            this._log.info((Object)("# " + (stats.nPermissionDenied > 0 ? "not accessible: " + stats.nPermissionDenied : "") + (stats.nEmpty > 0 ? "    empty: " + stats.nEmpty : "") + (stats.nNotParsable > 0 ? "    not parsable: " + stats.nNotParsable : "") + (stats.nDBProblems > 0 ? "    database problem: " + stats.nDBProblems : "") + "    [some errors]"));
        }
        this._log.info((Object)("#     tiers: " + stats.nTiers + "    annotations: " + stats.nAnnotations + "    average annotation length: " + aal / 100L + "." + (aal % 100L < 10L ? "0" : "") + aal % 100L));
        totalStats.nFiles += stats.nFiles;
        totalStats.nExist += stats.nExist;
        totalStats.nPermissionDenied += stats.nPermissionDenied;
        totalStats.nNotParsable += stats.nNotParsable;
        totalStats.nDBProblems += stats.nDBProblems;
        totalStats.nEmpty += stats.nEmpty;
        totalStats.nTiers += stats.nTiers;
        totalStats.nAnnotations += stats.nAnnotations;
        totalStats.annotationsSize += stats.annotationsSize;
    }

    private long start_stopwatch() {
        return System.currentTimeMillis();
    }

    private void stop_stopwatch(String what, long startTime) {
        long millis = System.currentTimeMillis() + 499L - startTime;
        long hours = millis / 3600000L;
        long minutes = (millis -= hours * 3600000L) / 60000L;
        long seconds = (millis -= minutes * 60000L) / 1000L;
        this._log.info((Object)("Done: " + what + ", duration: " + hours + ":" + (minutes < 10L ? "0" : "") + minutes + ":" + (seconds < 10L ? "0" : "") + seconds + " \tWall time: " + new Date()));
    }

    private static void usage() {
        System.out.println("\nFill searchdb/annex DB using all annotation files found in corpusstructure DB");
        System.out.println("\nUsage:");
        System.out.println("java -jar SearchDBIngester.jar corpusdb-server[:port] user password searchdb-server[:port][/searchdbname] user password\n");
        System.out.println("Sample Usage:");
        System.out.println("java -jar SearchDBIngester.jar lux08 webuser xxxx localhost albertr yyyy\n");
        System.out.println("Simulation mode:");
        System.out.println("use 'simulate' as the second (searchdb) user name to work without a searchdb");
        System.out.println("JDBC URLs used: jdbc:postgresql://...");
        System.out.println("CORPUSDBSERVER/corpusstructure SEARCHDBSERVER/annex (or .../SEARCHDBNAME)");
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static void main(String[] args) {
        if (args.length < 6) {
            SearchCorpusDB.usage();
            System.exit(0);
        }
        if (args[0].indexOf("/") != -1) {
            System.out.println("The first argument must be only the corpusdb server name, no JDBC URL");
            SearchCorpusDB.usage();
            System.exit(0);
        }
        AnnexUtil.setCorpusDB("jdbc:postgresql://" + args[0] + "/corpusstructure", args[1], args[2]);
        String url = "jdbc:postgresql://" + args[3] + "/annex";
        if (args[3].indexOf("//") != -1) {
            System.out.println("Must specify searchdb server name or 'server/dbname' but no full JDBC URL");
            SearchCorpusDB.usage();
            System.exit(0);
        }
        if (args[3].indexOf("/") != -1) {
            url = "jdbc:postgresql://" + args[3];
        }
        String usr = args[4];
        String pwd = args[5];
        SearchCorpusDB ingester = null;
        try {
            ingester = new SearchCorpusDB(url, usr, pwd);
        }
        catch (SQLException sqle) {
            System.out.println("Cannot connect to database: " + sqle);
            SearchCorpusDB.usage();
            return;
        }
        try {
            ingester.fillDatabase();
        }
        finally {
            ingester.close();
        }
    }

    public PreparedStatement getPreparedStatement(String statement) throws SQLException {
        return this._con.prepareStatement(statement);
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void addNode(String nodeId) throws SQLException {
        if (this._simulate) {
            return;
        }
        PreparedStatement ps = null;
        ResultSet rs = null;
        try {
            ps = this.getPreparedStatement("SELECT node_id FROM search.tiers WHERE node_id = ?");
            ps.setString(1, nodeId);
            rs = ps.executeQuery();
            if (rs.next()) {
                throw new SQLException("Already in DB, remove before re-adding: " + nodeId);
            }
        }
        finally {
            if (ps != null) {
                ps.close();
            }
            if (rs != null) {
                rs.close();
            }
        }
        String filePath = AnnexUtil.getFilePathFor(nodeId);
        File file = new File(filePath);
        if (!file.exists()) {
            throw new SQLException("File does not exist for nodeID: " + nodeId + " => " + filePath);
        }
        AnnexTranscription transcription = null;
        int type = AnnexUtil.getTypeFor(nodeId);
        transcription = new AnnexTranscription(nodeId, type, file);
        if (transcription == null || !transcription.isValid()) {
            throw new SQLException("Invalid transcription for nodeID: " + nodeId + " => " + filePath);
        }
        Stats stats = this.ingest(transcription, true);
        if (stats == null) {
            throw new SQLException("Database problem for nodeID: " + nodeId + " => " + filePath);
        }
    }

    public void removeNode(String nodeId) throws SQLException {
        if (this._simulate) {
            return;
        }
        this._con.setAutoCommit(false);
        PreparedStatement ps = null;
        Statement ps2 = null;
        ResultSet rs = null;
        try {
            ps = this.getPreparedStatement("SELECT tier_id FROM search.tiers WHERE node_id = ?");
            ps.setString(1, nodeId);
            rs = ps.executeQuery();
            ps2 = this.getPreparedStatement("DELETE FROM search.annotations WHERE ann_tier_id = ?");
            while (rs.next()) {
                int tierId = rs.getInt("tier_id");
                ps2.setInt(1, tierId);
                ps2.executeUpdate();
            }
            ps2.close();
            ps2 = null;
            rs.close();
            rs = null;
            ps.close();
            ps = this.getPreparedStatement("DELETE FROM search.tiers WHERE node_id = ?");
            ps.setString(1, nodeId);
            ps.executeUpdate();
            this._con.commit();
        }
        catch (SQLException e) {
            this._con.rollback();
            throw e;
        }
        finally {
            if (ps2 != null) {
                ps2.close();
            }
            if (ps != null) {
                ps.close();
            }
            if (rs != null) {
                rs.close();
            }
        }
    }

    static {
        try {
            Class.forName("org.postgresql.Driver");
        }
        catch (ClassNotFoundException e) {
            System.out.println("org.postgresql.Driver not found - provide JARs, see Class-Path header if running via -jar");
            Logger.getLogger((String)"SearchCorpusDB").error((Object)"org.postgresql.Driver not found - provide JARs, see Class-Path header if running via -jar");
        }
    }

    private static class Stats {
        public int nFiles;
        public int nExist;
        public int nPermissionDenied;
        public int nNotParsable;
        public int nDBProblems;
        public int nEmpty;
        public int nTiers;
        public long nAnnotations;
        public long annotationsSize;

        private Stats() {
        }
    }
}

