AdministrationDao.java example

Explorer
ANNIS-master
/*
 * Copyright 2009-2011 Collaborative Research Centre SFB 632
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package annis.administration;

import annis.dao.autogenqueries.QueriesGenerator;
import annis.examplequeries.ExampleQuery;
import annis.exceptions.AnnisException;
import annis.model.QueryNode;
import annis.ql.parser.QueryData;
import annis.security.UserConfig;
import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.collect.Multimap;
import com.google.common.collect.TreeMultimap;
import com.google.common.io.Files;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.sql.DataSource;
import org.apache.commons.dbcp2.BasicDataSource;
import org.apache.commons.dbcp2.DelegatingConnection;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.filefilter.DirectoryFileFilter;
import org.apache.commons.io.filefilter.FileFileFilter;
import org.apache.commons.lang3.StringUtils;
import org.codehaus.jackson.map.AnnotationIntrospector;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.map.SerializationConfig;
import org.codehaus.jackson.xc.JaxbAnnotationIntrospector;
import org.postgresql.PGConnection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.core.io.FileSystemResource;
import org.springframework.core.io.Resource;
import org.springframework.core.io.WritableResource;
import org.springframework.dao.DataAccessException;
import org.springframework.jdbc.core.ResultSetExtractor;
import org.springframework.jdbc.core.RowMapper;
import org.springframework.jdbc.core.namedparam.MapSqlParameterSource;
import org.springframework.jdbc.core.simple.ParameterizedSingleColumnRowMapper;
import org.springframework.jdbc.datasource.DataSourceUtils;
import org.springframework.transaction.annotation.Isolation;
import org.springframework.transaction.annotation.Propagation;
import org.springframework.transaction.annotation.Transactional;

/**
 *
 */
public class AdministrationDao extends AbstractAdminstrationDao
{

  private static final Logger log = LoggerFactory.getLogger(
    AdministrationDao.class);

  // if this is true, the staging area is not deleted
  private boolean temporaryStagingArea;

  private DeleteCorpusDao deleteCorpusDao;
  
  private boolean hackDistinctLeftRightToken;


  /**
   * Searches for textes which are empty or only contains whitespaces. If that
   * is the case the visualizer and no document visualizer are defined in the
   * corpus properties file a new file is created and stores a new config which
   * disables document browsing.
   *
   *
   * @param corpusID The id of the corpus which texts are analyzed.
   */
  private void analyzeTextTable(String toplevelCorpusName)
  {
    List<String> rawTexts = getQueryDao().getRawText(toplevelCorpusName);

    // pattern for checking the token layer
    final Pattern WHITESPACE_MATCHER = Pattern.compile("^\\s+$");

    for (String s : rawTexts)
    {

      if (s != null && WHITESPACE_MATCHER.matcher(s).matches())
      {
        // deactivate doc browsing if no document browser configuration is exists
        if (getQueryDao().getDocBrowserConfiguration(toplevelCorpusName) == null)
        {
          // should exists anyway
          Properties corpusConf;
          try
          {
            corpusConf = getQueryDao().
              getCorpusConfiguration(toplevelCorpusName);
          }
          catch (FileNotFoundException ex)
          {
            log.error(
              "not found a corpus configuration, so skip analyzing the text table",
              ex);
            return;
          }

          // disable document browsing if it is not explicit switch on by the
          // user in the corpus.properties
          boolean hasKey = corpusConf.containsKey("browse-documents");
          boolean isActive = Boolean.parseBoolean(corpusConf.getProperty(
            "browse-documents"));

          if (!(hasKey && isActive))
          {
            log.info("disable document browser");
            corpusConf.put("browse-documents", "false");
            getQueryDao().setCorpusConfiguration(toplevelCorpusName, corpusConf);
          }

          // once disabled don't search in further texts
          return;
        }
      }
    }
  }

  public ImportStatus initImportStatus()
  {
    return new CorpusAdministration.ImportStatsImpl();
  }

  public enum EXAMPLE_QUERIES_CONFIG
  {

    IF_MISSING, TRUE, FALSE

  }
  /**
   * If this is true and no example_queries.tab is found, automatic queries are
   * generated.
   */
  private EXAMPLE_QUERIES_CONFIG generateExampleQueries;

  private String schemaVersion;

  /**
   * A mapping for file-endings to mime types.
   */
  private Map<String, String> mimeTypeMapping;

  private Map<String, String> tableInsertSelect;

  private Map<String, String> tableInsertFrom;

  // all files have to carry this suffix.
  private String annisFileSuffix = ".annis";
  /**
   * Optional tab for example queries. If this tab not exist, a dummy file from
   * the resource folder is used.
   */
  private static final String EXAMPLE_QUERIES_TAB = "example_queries";

  /**
   * The name of the file and the relation containing the resolver information.
   */
  private static final String FILE_RESOLVER_VIS_MAP = "resolver_vis_map";
  // tables imported from bulk files
  // DO NOT CHANGE THE ORDER OF THIS LIST!  Doing so may cause foreign key failures during import.

  private final String[] importedTables =
  {
    "corpus", "corpus_annotation",
    "text", "node", "node_annotation",
    "component", "rank", "edge_annotation",
    FILE_RESOLVER_VIS_MAP, EXAMPLE_QUERIES_TAB
  };

  private final String[] tablesToCopyManually =
  {
    "corpus", "corpus_annotation",
    "text",
    FILE_RESOLVER_VIS_MAP,
    EXAMPLE_QUERIES_TAB,
    "corpus_stats",
    "media_files"
  };
  // tables created during import

  private final String[] createdTables =
  {
    "corpus_stats",
    "media_files",
    "nodeidmapping"
  };

  private final ObjectMapper jsonMapper = new ObjectMapper();

  private QueriesGenerator queriesGenerator;
  
  /**
   * Called when Spring configuration finished
   */
  public void init()
  {
    AnnotationIntrospector introspector = new JaxbAnnotationIntrospector();
    jsonMapper.setAnnotationIntrospector(introspector);
    // the json should be as compact as possible in the database
    jsonMapper.configure(SerializationConfig.Feature.INDENT_OUTPUT,
      false);
  }

  ///// Subtasks of creating the database
  protected void dropDatabase(String database)
  {

    log.debug("dropping possible existing database");
    closeAllConnections(database);
    getJdbcTemplate().execute("DROP DATABASE IF EXISTS " + database);

  }

  protected void dropUser(String username)
  {
    log.debug("dropping possible existing user");
    getJdbcTemplate().execute("DROP USER IF EXISTS " + username);

  }

  protected void createUser(String username, String password)
  {
    log.info("creating user: " + username);
    getJdbcTemplate().execute("CREATE USER " + username + " PASSWORD '"
      + password
      + "'");
  }

  protected void createDatabase(String database, String owner)
  {
    log.info("creating database: " + database
      + " OWNER " + owner + " ENCODING = 'UTF8'");
    getJdbcTemplate().execute("CREATE DATABASE " + database
      + " OWNER " + owner + " ENCODING = 'UTF8'");
  }

  protected void installPlPgSql()
  {
    log.info("installing stored procedure language plpgsql");
    try
    {
      getJdbcTemplate().execute("CREATE LANGUAGE plpgsql");
    }
    catch (Exception ex)
    {
      log.warn("plpqsql was already installed: " + ex.getMessage());
    }
  }

  protected void createFunctionUniqueToplevelCorpusName()
  {
    log.info("creating trigger function: unique_toplevel_corpus_name");
    executeSqlFromScript("unique_toplevel_corpus_name.sql");
  }

  protected void createSchema()
  {
    log.info("creating ANNIS database schema (" + getSchemaVersion() + ")");
    executeSqlFromScript("schema.sql");

    // update schema version
    getJdbcTemplate().execute(
      "DELETE FROM repository_metadata WHERE \"name\"='schema-version'");

    getJdbcTemplate().execute("INSERT INTO repository_metadata "
      + "VALUES ('schema-version', '"
      + StringUtils.replace(getSchemaVersion(), "'", "''") + "');");

  }

  protected void createSchemaIndexes()
  {
    log.info(
      "creating ANNIS database schema indexes (" + getDatabaseSchemaVersion()
      + ")");
    executeSqlFromScript("schemaindex.sql");
  }

  protected void populateSchema()
  {
    log.info("populating the schemas with default values");
    bulkloadTableFromResource("resolver_vis_map",
      new FileSystemResource(new File(getScriptPath(),
          FILE_RESOLVER_VIS_MAP + annisFileSuffix)));
    // update the sequence
    executeSqlFromScript("update_resolver_sequence.sql");

    log.info(
      "creating immutable functions for extracting annotations");
    executeSqlFromScript("functions_get.sql");
  }

  /**
   * Get the real schema name and version as used by the database.
   *
   * @return
   */
  @Transactional(readOnly = true, propagation = Propagation.REQUIRED)
  public String getDatabaseSchemaVersion()
  {
    try
    {

      List<Map<String, Object>> result = getJdbcTemplate().
        queryForList(
          "SELECT \"value\" FROM repository_metadata WHERE \"name\"='schema-version'");

      String schema
        = result.size() > 0 ? (String) result.get(0).get("value") : "";
      return schema;
    }
    catch (DataAccessException ex)
    {
      String error = "Wrong database schema (too old to get the exact number), "
        + "please initialize the database.";
      log.error(error);
    }
    return "";
  }

  public boolean checkDatabaseSchemaVersion() throws AnnisException
  {
    String dbSchemaVersion = getDatabaseSchemaVersion();
    if (getSchemaVersion() != null && !getSchemaVersion().equalsIgnoreCase(
      dbSchemaVersion))
    {
      String error = "Wrong database schema \"" + dbSchemaVersion
        + "\", please initialize the database.";
      log.error(error);
      throw new AnnisException(error);
    }
    return true;
  }

  public void initializeDatabase(String host, String port, String database,
    String user, String password, String defaultDatabase, String superUser,
    String superPassword, boolean useSSL, String pgSchema)
  {
    // connect as super user to the default database to create new user and database
    if (superPassword != null)
    {
      log.info("Creating Annis database and user.");
      getDataSource().setInnerDataSource(createDataSource(host, port,
        defaultDatabase, superUser, superPassword, useSSL, pgSchema));

      createDatabaseAndUser(database, user, password);
    }
    // switch to new database as new user for the rest of the initialization procedure
    getDataSource().setInnerDataSource(createDataSource(host, port, database,
      user, password, useSSL, pgSchema));

    //
    if (pgSchema != null && !"public".equalsIgnoreCase(pgSchema))
    {
      pgSchema = pgSchema.toLowerCase().replaceAll("[^a-z0-9]", "_");
      log.info("creating PostgreSQL schema {}", pgSchema);
      // we have to create a schema before we can use it
      try
      {
        getJdbcTemplate().execute("CREATE SCHEMA " + pgSchema + ";");
      }
      catch (DataAccessException ex)
      {
        // ignore if the schema already exists
        log.info("schema " + pgSchema + " already exists");
      }
    }

    setupDatabase();

  }

  private void createDatabaseAndUser(String database,
    String user, String password)
  {
    dropDatabase(database);
    dropUser(user);
    createUser(user, password);
    createDatabase(database, user);

    installPlPgSql();
  }

  private void setupDatabase()
  {
    createFunctionUniqueToplevelCorpusName();
    createSchema();
    createSchemaIndexes();
    populateSchema();
  }

  private BasicDataSource createDataSource(File dbProperties)
    throws IOException, URISyntaxException
  {
    BasicDataSource result;

    Properties props = new Properties();
    try (InputStream is = new FileInputStream(dbProperties))
    {
      props.load(is);

      String rawJdbcURL = props.getProperty("datasource.url").trim();

      rawJdbcURL = StringUtils.removeStart(rawJdbcURL, "jdbc:");
      URI jdbcURL = new URI(rawJdbcURL);

      result = createDataSource(
        jdbcURL.getHost(),
        "" + jdbcURL.getPort(),
        jdbcURL.getPath().substring(1), // remove the "/" at the beginning
        props.getProperty("datasource.username"),
        props.getProperty("datasource.password"),
        "true".equalsIgnoreCase(props.getProperty("datasource.ssl")),
        props.getProperty("datasource.schema"));
    }

    return result;
  }

  private BasicDataSource createDataSource(String host, String port,
    String database,
    String user, String password, boolean useSSL,
    String schema)
  {

    String url = "jdbc:postgresql://" + host + ":" + port + "/" + database;

    // DriverManagerDataSource is deprecated
    // return new DriverManagerDataSource("org.postgresql.Driver", url, user, password);
    BasicDataSource result = new BasicDataSource();
    result.setUrl(url);
    if (useSSL)
    {
      result.setConnectionProperties("ssl=true");
    }
    result.setUsername(user);
    result.setPassword(password);
    result.setValidationQuery("SELECT 1;");
    result.setAccessToUnderlyingConnectionAllowed(true);
    if (schema == null)
    {
      schema = "public";
    }
    result.setConnectionInitSqls(Arrays.asList(
      "SET search_path TO \"$user\"," + schema));

    result.setDriverClassName("org.postgresql.Driver");

    return result;
  }

  /**
   * Reads ANNIS files from several directories.
   *
   * @param path Specifies the path to the corpora, which should be imported.
   * @param aliasName An alias name for this corpus. Can be null.
   * @param overwrite If set to true conflicting top level corpora are deleted.
   * @param waitForOtherTasks If true wait for other tasks to finish, if false
   * abort.
   *
   * @return true if successful
   */
  @Transactional(readOnly = false, propagation = Propagation.REQUIRES_NEW,
    isolation = Isolation.READ_COMMITTED)
  public boolean importCorpus(String path,
    String aliasName,
    boolean overwrite,
    boolean waitForOtherTasks)
  {

    // check schema version first
    checkDatabaseSchemaVersion();

    if (!lockRepositoryMetadataTable(waitForOtherTasks))
    {
      log.error("Another import is currently running");
      return false;
    }

    // explicitly unset any timeout
    getJdbcTemplate().update("SET statement_timeout TO 0");

    ANNISFormatVersion annisFormatVersion = getANNISFormatVersion(path);

    if (annisFormatVersion == ANNISFormatVersion.V3_3)
    {
      return importVersion4(path, aliasName, overwrite, annisFormatVersion);
    }
    else if (annisFormatVersion == ANNISFormatVersion.V3_1 || annisFormatVersion
      == ANNISFormatVersion.V3_2)
    {
      return importVersion3(path, aliasName, overwrite, annisFormatVersion);
    }

    log.error("Unknown ANNIS import format version");
    return false;
  }

  private boolean importVersion4(String path, String aliasName,
    boolean overwrite,
    ANNISFormatVersion version)
  {
    this.annisFileSuffix = ".annis";
    createStagingAreaV33(temporaryStagingArea);
    bulkImport(path, version);

    String toplevelCorpusName = getTopLevelCorpusFromTmpArea();

    // remove conflicting top level corpora, when override is set to true.
    if (overwrite)
    {
      deleteCorpusDao.checkAndRemoveTopLevelCorpus(toplevelCorpusName);
    }
    else
    {
      checkTopLevelCorpus();
    }

    applyConstraints();
    createStagingAreaIndexes(version);

    fixResolverVisMapTable(toplevelCorpusName, tableInStagingArea(
      FILE_RESOLVER_VIS_MAP));
    analyzeStagingTables();

    addDocumentNameMetaData();

    Offsets offsets = calculateOffsets();
    long corpusID = getNewToplevelCorpusID(offsets);
    createNodeIdMapping();

    importBinaryData(path, toplevelCorpusName);

    extendStagingText(corpusID);
    extendStagingExampleQueries(corpusID);

    analyzeAutoGeneratedQueries(corpusID);

    computeCorpusStatistics(path);

    analyzeStagingTables();

    insertCorpus(corpusID, offsets);

    computeCorpusPath(corpusID);

    createAnnotations(corpusID);

    createAnnoCategory(corpusID);

    // create the new facts table partition
    createFacts(corpusID, version, offsets);
    
    if(hackDistinctLeftRightToken)
    {
      adjustDistinctLeftRightToken(corpusID);
    }
    
    if (temporaryStagingArea)
    {
      dropStagingArea();
    }

    // create empty corpus properties file
    if (getQueryDao().getCorpusConfigurationSave(toplevelCorpusName) == null)
    {
      log.info("creating new corpus.properties file");
      getQueryDao().setCorpusConfiguration(toplevelCorpusName, new Properties());
    }

    analyzeFacts(corpusID);
    analyzeTextTable(toplevelCorpusName);
    generateExampleQueries(corpusID);

    if (aliasName != null && !aliasName.isEmpty())
    {
      addCorpusAlias(corpusID, aliasName);
    }
    return true;
  }

  private boolean importVersion3(String path, String aliasName,
    boolean overwrite,
    ANNISFormatVersion version)
  {
    this.annisFileSuffix = ".tab";

    createStagingAreaV32(temporaryStagingArea);
    bulkImport(path, version);

    String toplevelCorpusName = getTopLevelCorpusFromTmpArea();

    // remove conflicting top level corpora, when override is set to true.
    if (overwrite)
    {
      deleteCorpusDao.checkAndRemoveTopLevelCorpus(toplevelCorpusName);
    }
    else
    {
      checkTopLevelCorpus();
    }

    createStagingAreaIndexes(version);

    fixResolverVisMapTable(toplevelCorpusName, tableInStagingArea(
      FILE_RESOLVER_VIS_MAP));
    computeTopLevelCorpus();
    analyzeStagingTables();

    computeLeftTokenRightToken();

    removeUnecessarySpanningRelations();

    addUniqueNodeNameAppendix();
    adjustRankPrePost();
    adjustTextId();
    addDocumentNameMetaData();

    Offsets offsets = calculateOffsets();
    long corpusID = getNewToplevelCorpusID(offsets);
    createNodeIdMapping();

    importBinaryData(path, toplevelCorpusName);

    extendStagingText(corpusID);
    extendStagingExampleQueries(corpusID);

    analyzeAutoGeneratedQueries(corpusID);

    computeRealRoot();
    computeLevel();
    computeCorpusStatistics(path);
    computeSpanFromSegmentation();

    applyConstraints();
    analyzeStagingTables();

    insertCorpus(corpusID, offsets);

    computeCorpusPath(corpusID);

    createAnnotations(corpusID);

    createAnnoCategory(corpusID);

    // create the new facts table partition
    createFacts(corpusID, version, offsets);
    
    if(hackDistinctLeftRightToken)
    {
      adjustDistinctLeftRightToken(corpusID);
    }
    
    if (temporaryStagingArea)
    {
      dropStagingArea();
    }

    // create empty corpus properties file
    if (getQueryDao().getCorpusConfigurationSave(toplevelCorpusName) == null)
    {
      log.info("creating new corpus.properties file");
      getQueryDao().setCorpusConfiguration(toplevelCorpusName, new Properties());
    }

    analyzeFacts(corpusID);
    analyzeTextTable(toplevelCorpusName);
    generateExampleQueries(corpusID);

    if (aliasName != null && !aliasName.isEmpty())
    {
      addCorpusAlias(corpusID, aliasName);
    }
    return true;
  }

  ///// Subtasks of importing a corpus
  protected void dropIndexes()
  {
    log.info("dropping indexes");
    for (String index : listIndexesOnTables(allTables()))
    {
      log.debug("dropping index: " + index);
      getJdbcTemplate().execute("DROP INDEX " + index);
    }
  }

  void createStagingAreaV33(boolean useTemporary)
  {
    log.info("creating staging area for import format version 3.3");
    MapSqlParameterSource args = makeArgs().addValue(":tmp", useTemporary
      ? "TEMPORARY" : "UNLOGGED");
    executeSqlFromScript("staging_area.sql", args);
  }

  void createStagingAreaV32(boolean useTemporary)
  {
    log.info("creating staging area for import format version 3.1/3.2");
    MapSqlParameterSource args = makeArgs().addValue(":tmp", useTemporary
      ? "TEMPORARY" : "UNLOGGED");
    executeSqlFromScript("staging_area_v32.sql", args);
  }

  /**
   * Reads tab seperated files from the filesystem, but it takes only files into
   * account with the {@link DefaultAdministrationDao#REL_ANNIS_FILE_SUFFIX}
   * suffix. Further it is straight forward except for the
   * {@link DefaultAdministrationDao#FILE_RESOLVER_VIS_MAP} and the
   * {@link DefaultAdministrationDao#EXAMPLE_QUERIES_TAB}. This is done by this
   * method automatically.
   *
   * <ul>
   *
   * <li>{@link DefaultAdministrationDao#FILE_RESOLVER_VIS_MAP}: For backwards
   * compatibility, the columns must be counted, since there exists one
   * additional column for visibility behaviour of visualizers.</li>
   *
   * <li>{@link DefaultAdministrationDao#EXAMPLE_QUERIES_TAB}: Takes into
   * account the state of {@link #generateExampleQueries}.</li>
   *
   * </ul>
   *
   * @param path The path to the ANNIS files. The files have to have this suffix
   * @param version {@link DefaultAdministrationDao#REL_ANNIS_FILE_SUFFIX}
   */
  void bulkImport(String path, ANNISFormatVersion version)
  {
    log.info("bulk-loading data");

    for (String table : importedTables)
    {
      if (table.equalsIgnoreCase(FILE_RESOLVER_VIS_MAP))
      {
        importResolverVisMapTable(path, table);
      }
      // check if example query exists. If not copy it from the resource folder.
      else if (table.equalsIgnoreCase(EXAMPLE_QUERIES_TAB))
      {
        File f = new File(path, table + annisFileSuffix);
        if (f.exists())
        {
          log.info(table + annisFileSuffix + " file exists");
          bulkloadTableFromResource(tableInStagingArea(table),
            new FileSystemResource(f));

          if (generateExampleQueries == (EXAMPLE_QUERIES_CONFIG.IF_MISSING))
          {
            generateExampleQueries = EXAMPLE_QUERIES_CONFIG.FALSE;
          }
        }
        else
        {
          if (generateExampleQueries == EXAMPLE_QUERIES_CONFIG.IF_MISSING)
          {
            generateExampleQueries = EXAMPLE_QUERIES_CONFIG.TRUE;
          }

          log.info(table + annisFileSuffix + " file not found");
        }
      }
      else if (table.equalsIgnoreCase("node"))
      {
        bulkImportNode(path, version);
      }
      else
      {
        bulkloadTableFromResource(tableInStagingArea(table),
          new FileSystemResource(new File(path, table + annisFileSuffix)));
      }
    }
  }

  private void bulkImportNode(String path, ANNISFormatVersion version)
  {
    // check column number by reading first line
    File nodeTabFile = new File(path, "node" + annisFileSuffix);
    try (BufferedReader reader
      = new BufferedReader(new InputStreamReader(
          new FileInputStream(nodeTabFile), "UTF-8"));)
    {

      String firstLine = reader.readLine();

      int columnNumber = firstLine == null ? 13
        : StringUtils.splitPreserveAllTokens(firstLine, '\t').length;
      if (version == ANNISFormatVersion.V3_3 || version
        == ANNISFormatVersion.V3_2)
      {
        // new node table with segmentations
        // no special handling needed
        bulkloadTableFromResource(tableInStagingArea("node"),
          new FileSystemResource(nodeTabFile));
      }
      else if (version == ANNISFormatVersion.V3_1)
      {
        getJdbcTemplate().execute("DROP TABLE IF EXISTS _tmpnode;");
        // old node table without segmentations
        // create temporary table for  bulk import
        getJdbcTemplate().execute(
          "CREATE TEMPORARY TABLE _tmpnode"
          + "\n(\n"
          + "id bigint,\n"
          + "text_ref integer,\n"
          + "corpus_ref integer,\n"
          + "namespace varchar,\n"
          + "name varchar,\n"
          + "\"left\" integer,\n"
          + "\"right\" integer,\n"
          + "token_index integer,\n"
          + "continuous boolean,\n"
          + "span varchar\n"
          + ");");

        bulkloadTableFromResource("_tmpnode",
          new FileSystemResource(nodeTabFile));

        log.info("copying nodes from temporary helper table into staging area");
        getJdbcTemplate().execute(
          "INSERT INTO " + tableInStagingArea("node") + "\n"
          + "  SELECT id, text_ref, corpus_ref, namespace AS layer, name, \"left\", "
          + "\"right\", token_index, "
          + "NULL AS seg_name, NULL AS seg_left, NULL AS seg_left, continuous, "
          + "span\n"
          + "FROM _tmpnode");
      }
      else
      {
        throw new RuntimeException("Illegal number of columns in node"
          + annisFileSuffix + ", "
          + "should be 13 or 10 but was " + columnNumber);
      }
        }
        catch (IOException ex)
        {
          log.error(null, ex);
        }
  }

  void createStagingAreaIndexes(ANNISFormatVersion version)
  {
    log.info("creating indexes for staging area");
    if (version == ANNISFormatVersion.V3_3)
    {
      executeSqlFromScript("indexes_staging_v33.sql");
    }
    else
    {
      executeSqlFromScript("indexes_staging_v32.sql");
    }
  }

  void computeTopLevelCorpus()
  {
    log.info("computing top-level corpus");
    executeSqlFromScript("toplevel_corpus.sql");
  }

  void importBinaryData(String path, String toplevelCorpusName)
  {
    log.info("importing all binary data from ExtData");
    File extData = new File(path + "/ExtData");
    if (extData.canRead() && extData.isDirectory())
    {
      // import toplevel corpus media files
      File[] topFiles = extData.listFiles((FileFilter) FileFileFilter.FILE);
      for (File data : topFiles)
      {
        String extension = FilenameUtils.getExtension(data.getName());
        try
        {
          if (mimeTypeMapping.containsKey(extension))
          {
            log.info("import " + data.getCanonicalPath() + " to staging area");

            // search for corpus_ref
            String sqlScript
              = "SELECT id FROM _corpus WHERE top_level IS TRUE LIMIT 1";
            long corpusID = getJdbcTemplate().queryForLong(sqlScript);

            importSingleFile(data.getCanonicalPath(), toplevelCorpusName,
              corpusID);
          }
          else
          {
            log.warn(
              "not importing " + data.getCanonicalPath()
              + " since file type is unknown");
          }
        }
        catch (IOException ex)
        {
          log.error("no canonical path given", ex);
        }
      }

      // get each subdirectory (which corresponds to an document name)
      File[] documents = extData.listFiles(
        (FileFilter) DirectoryFileFilter.DIRECTORY);
      for (File doc : documents)
      {
        if (doc.isDirectory() && doc.canRead())
        {
          File[] dataFiles = doc.listFiles((FileFilter) FileFileFilter.FILE);
          for (File data : dataFiles)
          {
            String extension = FilenameUtils.getExtension(data.getName());
            try
            {
              if (mimeTypeMapping.containsKey(extension))
              {
                log.info(
                  "import " + data.getCanonicalPath() + " to staging area");

                // search for corpus_ref
                String sqlScript
                  = "SELECT id FROM _corpus WHERE \"name\" = ? LIMIT 1";
                long corpusID = getJdbcTemplate().queryForLong(sqlScript, doc.
                  getName());

                importSingleFile(data.getCanonicalPath(), toplevelCorpusName,
                  corpusID);
              }
              else
              {
                log.
                  warn(
                    "not importing " + data.getCanonicalPath()
                    + " since file type is unknown");
              }
            }
            catch (IOException ex)
            {
              log.error("no canonical path given", ex);
            }
          }
        }
      }
    }
  }

  /**
   * Imports a single binary file.
   *
   * @param file Specifies the file to be imported.
   * @param corpusRef Assigns the file this corpus.
   * @param toplevelCorpusName The toplevel corpus name
   */
  private void importSingleFile(String file, String toplevelCorpusName,
    long corpusRef)
  {

    BinaryImportHelper preStat = new BinaryImportHelper(file, getRealDataDir(),
      toplevelCorpusName,
      corpusRef, mimeTypeMapping);
    getJdbcTemplate().execute(BinaryImportHelper.SQL, preStat);

  }

  /**
   * Updates the example queries table in the staging area. The final toplevel
   * corpus must already be computed.
   *
   * @param toplevelID The final top level corpus id.
   *
   */
  void extendStagingExampleQueries(long toplevelID)
  {
    log.info("extending _example_queries");
    executeSqlFromScript("extend_staging_example_queries.sql",
      makeArgs().addValue(":id", toplevelID));
  }

  void extendStagingText(long toplevelID)
  {
    log.info("extending _text");
    executeSqlFromScript("extend_staging_text.sql", makeArgs().addValue(":id",
      toplevelID));
  }

  void computeLeftTokenRightToken()
  {
    log.info("computing values for struct.left_token and struct.right_token");
    executeSqlFromScript("left_token_right_token.sql");

    // re-analyze node since we added new columns
    log.info("analyzing node");
    getJdbcTemplate().execute("ANALYZE " + tableInStagingArea("node"));

  }

  void computeRealRoot()
  {
    log.info("computing real root for rank");
    executeSqlFromScript("root.sql");
  }

  void computeLevel()
  {
    log.info(
      "computing values for rank.level (dominance and pointing relations)");
    executeSqlFromScript("level.sql");

    log.info("computing values for rank.level (coverage)");
    executeSqlFromScript("level_coverage.sql");
  }

  void computeCorpusStatistics(String path)
  {

    File f = new File(path);
    String absolutePath = path;
    try
    {
      absolutePath = f.getCanonicalPath();
    }
    catch (IOException ex)
    {
      log.error(
        "Something went really wrong when calculating the canonical path", ex);
    }

    log.info("computing statistics for top-level corpus");
    MapSqlParameterSource args = makeArgs().addValue(":path", absolutePath);
    executeSqlFromScript("corpus_stats.sql", args);
  }

  void computeCorpusPath(long corpusID)
  {
    MapSqlParameterSource args = makeArgs().addValue(":id", corpusID);
    log.info("computing path information of the corpus tree for corpus with ID "
      + corpusID);
    executeSqlFromScript("compute_corpus_path.sql", args);
  }

  void addDocumentNameMetaData()
  {
    log.info("add the document name as metadata");
    executeSqlFromScript("adddocmetadata.sql");
  }

  protected void adjustRankPrePost()
  {
    log.info("updating pre and post order in _rank");
    executeSqlFromScript("adjustrankprepost.sql");
    log.info("analyzing _rank");
    getJdbcTemplate().execute("ANALYZE " + tableInStagingArea("rank"));
  }

  protected void adjustTextId()
  {
    log.info("updating id in _text and text_ref in _node");
    executeSqlFromScript("adjusttextid.sql");
    log.info("analyzing _node and _text");
    getJdbcTemplate().execute("ANALYZE " + tableInStagingArea("text"));
    getJdbcTemplate().execute("ANALYZE " + tableInStagingArea("node"));
  }

  protected void addUniqueNodeNameAppendix()
  {
    // first check if this is actually necessary
    log.info("check if node names are unique");

    getJdbcTemplate().execute(
      "ALTER TABLE _node ADD COLUMN unique_name_appendix varchar;");

    List<Integer> checkDuplicate = getJdbcTemplate().queryForList(
      "SELECT COUNT(*) from _node GROUP BY \"name\", corpus_ref HAVING COUNT(*) > 1 LIMIT 1",
      Integer.class);
    if (checkDuplicate.isEmpty())
    {
      log.info("node names are unique, no update necessary");
    }
    else
    {
      log.info("add an unique node name appendix");
      executeSqlFromScript("unique_node_name_appendix.sql");
    }
  }

  /**
   *
   * @return the new corpus ID
   */
  void createNodeIdMapping()
  {

    log.info("creating node ID mapping (for properly sorted IDs)");
    executeSqlFromScript("node_id_mapping.sql");

  }
  /*
   private long getRecentCorpusID()
   {
   int numOfEntries = getJdbcTemplate().queryForObject(
   "SELECT COUNT(*) from corpus_stats", Integer.class);

   long recentCorpusId = 0;

   if (numOfEntries > 0)
   {
   recentCorpusId = getJdbcTemplate().queryForObject(
   "SELECT max(id) FROM corpus_stats", Long.class);
   log.debug("the id from recently imported corpus: {}", recentCorpusId);
   }
   return recentCorpusId;
   }
   */

  long getNewToplevelCorpusID(Offsets offsets)
  {
    log.info("query for the new corpus ID");

    long maxLocalID = getJdbcTemplate().queryForObject(
      "SELECT MAX(id) FROM _corpus WHERE top_level IS TRUE", Long.class);

    long result = maxLocalID + offsets.getCorpusID();

    log.info("new corpus ID is " + result);
    return result;
  }

  void computeSpanFromSegmentation()
  {
    log.info("computing span value for segmentation nodes");
    executeSqlFromScript("span_from_segmentation.sql");
  }

  void applyConstraints()
  {
    log.info("activating relational constraints");
    executeSqlFromScript("constraints.sql");
  }

  private Offsets calculateOffsets()
  {
    log.info("querying ID offsets");

    long offsetCorpusID = getJdbcTemplate().queryForObject(
      "SELECT COALESCE((SELECT max(max_corpus_id)+1 FROM corpus_stats),0)",
      Long.class);
    long offsetCorpusPost = getJdbcTemplate().queryForObject(
      "SELECT COALESCE((SELECT max(max_corpus_post)+1 FROM corpus_stats),0)",
      Long.class);

    long offsetNodeID = getJdbcTemplate().queryForObject(
      "SELECT COALESCE((SELECT max(max_node_id)+1 FROM corpus_stats),0)",
      Long.class);

    return new Offsets(offsetCorpusID, offsetCorpusPost, offsetNodeID);
  }

  void insertCorpus(long corpusID, Offsets offsets)
  {

    MapSqlParameterSource args = offsets.makeArgs()
      .addValue(":id", corpusID);

    log.info("moving corpus from staging area to main db");
    for (String table : tablesToCopyManually)
    {
      log.debug("moving table \"{}\" from staging area to main db", table);
      StringBuilder sql = new StringBuilder();

      String predefinedFrom
        = tableInsertFrom == null ? null : tableInsertFrom.get(table);
      String predefinedSelect
        = tableInsertSelect == null ? null : tableInsertSelect.get(table);

      if (predefinedFrom != null || predefinedSelect != null)
      {
        if (predefinedFrom == null)
        {
          predefinedFrom = predefinedSelect;
        }

        sql.append("INSERT INTO ");
        sql.append(table);
        sql.append(" ( ");
        sql.append(predefinedSelect);

        sql.append(" ) (SELECT ");
        sql.append(predefinedFrom);
        sql.append(" FROM ");
        sql.append(tableInStagingArea(table)).append(")");
      }
      else
      {
        sql.append("INSERT INTO ");
        sql.append(table);
        sql.append(" (SELECT * FROM ");
        sql.append(tableInStagingArea(table)).append(")");
      }
      executeSql(sql.toString(), args);

    }
  }

  void dropStagingArea()
  {
    log.info("dropping staging area");

    // tables must be dropped in reverse order
    List<String> tables = importedAndCreatedTables();
    Collections.reverse(tables);

    for (String table : tables)
    {
      getJdbcTemplate().execute("DROP TABLE " + tableInStagingArea(table));
    }

  }

  void dropMaterializedTables()
  {
    log.info("dropping materialized tables");

    getJdbcTemplate().execute("DROP TABLE facts");

  }

  void analyzeStagingTables()
  {
    log.info("analyzing staging area");
    for (String t : importedTables)
    {
      log.debug("analyzing " + t);
      getJdbcTemplate().execute("ANALYZE " + tableInStagingArea(t));
    }
  }

  void createAnnotations(long corpusID)
  {
    MapSqlParameterSource args = makeArgs().addValue(":id", corpusID);
    log.info("creating annotations table for corpus with ID " + corpusID);
    executeSqlFromScript("annotations.sql", args);

    log.info("indexing annotations table for corpus with ID " + corpusID);
    executeSqlFromScript("indexes_annotations.sql", args);
  }

  void createAnnoCategory(long corpusID)
  {
    MapSqlParameterSource args = makeArgs().addValue(":id", corpusID);
    log.
      info("creating annotation category table for corpus with ID " + corpusID);
    executeSqlFromScript("annotation_category.sql", args);
  }
  
  void analyzeFacts(long corpusID)
  {
    log.info("analyzing facts table for corpus with ID " + corpusID);
    getJdbcTemplate().execute("ANALYZE facts_" + corpusID);
  }

  /**
   * Update the statistics for the "facts" table as a whole.
   */
  @Transactional(propagation = Propagation.REQUIRED)
  public void analyzeParentFacts()
  {
    log.info("analyzing parent facts table");
    // explicitly unset any timeout. Since this function might be called independent
    // from the import process we have to set it manually.
    getJdbcTemplate().update("SET statement_timeout TO 0");
    getJdbcTemplate().execute("ANALYZE facts");
  }
  
  void adjustDistinctLeftRightToken(long corpusID)
  {
    /* HACK: 
     adjust the left/right_token value to the average maximal left/right_token
     value per corpus/text on import to enhance the planner selectivity estimations.
    */
    
    log.info("adjusting statistical information for left_token and right_token columns");
    
    int adjustedLeft = getJdbcTemplate().queryForObject("SELECT avg(maxleft)::integer\n" 
      + "FROM\n" +"( SELECT max(left_token) maxleft FROM _node GROUP BY corpus_ref, text_ref ) AS m", Integer.class);
    int adjustedRight = getJdbcTemplate().queryForObject("SELECT avg(maxright)::integer\n" 
      + "FROM\n" +"( SELECT max(right_token) maxright FROM _node GROUP BY corpus_ref, text_ref ) AS m", Integer.class);
    
    getJdbcTemplate().execute("ALTER TABLE facts_" + corpusID + " ALTER COLUMN left_token SET (n_distinct=" + adjustedLeft + ")");
    getJdbcTemplate().execute("ALTER TABLE facts_" + corpusID + " ALTER COLUMN right_token SET (n_distinct=" + adjustedRight + ")");
  }

  void createFacts(long corpusID, ANNISFormatVersion version, Offsets offsets)
  {
    MapSqlParameterSource args = offsets.makeArgs()
      .addValue(":id", corpusID);
    
    log.info("creating materialized facts table for corpus with ID " + corpusID);
    
    String defaultStatTargetRaw = 
      getJdbcTemplate().queryForObject("SHOW default_statistics_target", String.class);
    
    // this is the minimal value
    int selectedStatTarget = 250;
    
    if(defaultStatTargetRaw != null)
    {
      try
      {
        int defaultStatTargetConfig = Integer.parseInt(defaultStatTargetRaw);
        // make sure the sample size is not less than the default one
        selectedStatTarget = Math.max(selectedStatTarget, defaultStatTargetConfig);
      }
      catch(NumberFormatException ex)
      {
        log.warn("Could not parse the \"default_statistics_target\" PostgreSQL parameter.");
      }
    }
    args.addValue(":stat_target", selectedStatTarget);
    
    if (version == ANNISFormatVersion.V3_3)
    {
      executeSqlFromScript("facts.sql", args);
    }
    else
    {
      executeSqlFromScript("facts_v32.sql", args);
    }

    log.info("indexing the new facts table (general indexes)");
    executeSqlFromScript("indexes.sql", args);

    log.info("indexing the new facts table (edge related indexes)");
    executeSqlFromScript("indexes_edge.sql", args);

  }

  void removeUnecessarySpanningRelations()
  {
    log.info("setting \"continuous\" to a correct value");
    executeSqlFromScript("set_continuous.sql");

    // re-analyze node since we added new columns
    log.info("analyzing node");
    getJdbcTemplate().execute("ANALYZE " + tableInStagingArea("node"));

    log.info("removing unnecessary span relations");
    executeSqlFromScript("remove_span_relations.sql");

  }

  ///// Other sub tasks
  public List<Long> listToplevelCorpora()
  {
    String sql = "SELECT id FROM corpus WHERE top_level = 'y'";

    return getJdbcTemplate().query(sql, ParameterizedSingleColumnRowMapper.
      newInstance(Long.class));
  }

  /**
   * Delete files not used by this instance in the data directory.
   */
  @Transactional(readOnly = true)
  public void cleanupData()
  {

    List<String> allFilesInDatabaseList = getJdbcTemplate().queryForList(
      "SELECT filename FROM media_files AS m", String.class);

    File dataDir = getRealDataDir();

    Set<File> allFilesInDatabase = new HashSet<>();
    for (String singleFileName : allFilesInDatabaseList)
    {
      allFilesInDatabase.add(new File(dataDir, singleFileName));
    }

    log.info("Cleaning up the data directory");
    // go through each file of the folder and check if it is not included
    File[] childFiles = dataDir.listFiles();
    if (childFiles != null)
    {
      for (File f : childFiles)
      {
        if (f.isFile() && !allFilesInDatabase.contains(f))
        {
          if (!f.delete())
          {
            log.warn("Could not delete {}", f.getAbsolutePath());
          }
        }
      }
    }
  }

  public List<Map<String, Object>> listCorpusStats()
  {
    return getJdbcTemplate().queryForList(
      "SELECT * FROM corpus_info ORDER BY name");
  }

  /**
   * Lists the corpora using the connection information of a given
   * "database.properties". file
   *
   * @param databaseProperties
   * @return
   */
  public List<Map<String, Object>> listCorpusStats(File databaseProperties)
  {
    List<Map<String, Object>> result = new LinkedList<>();

    DataSource origDataSource = getDataSource().getInnerDataSource();
    try
    {
      if (databaseProperties != null)
      {
        getDataSource().setInnerDataSource(createDataSource(databaseProperties));
      }
      result = getJdbcTemplate().queryForList(
        "SELECT * FROM corpus_info ORDER BY name");
    }
    catch (IOException | URISyntaxException | DataAccessException ex)
    {
      if (databaseProperties == null)
      {
        log.error("Could not query corpus list", ex);
      }
      else
      {
        log.error(
          "Could not query corpus list for the file " + databaseProperties.
          getAbsolutePath(), ex);
      }
    }
    finally
    {
      getDataSource().setInnerDataSource(origDataSource);
    }
    return result;
  }

  public List<String> listUsedIndexes()
  {
    log.info("retrieving list of used indexes");
    return listIndexDefinitions(true);
  }

  public List<String> listUnusedIndexes()
  {
    log.info("retrieving list of unused indexes");
    return listIndexDefinitions(false);
  }

  /**
   * Provides a list where the keys are the aliases and the values are the
   * corpus names.
   *
   * @param databaseProperties
   * @return
   */
  public Multimap<String, String> listCorpusAlias(File databaseProperties)
  {
    Multimap<String, String> result = TreeMultimap.create();

    DataSource origDataSource = getDataSource().getInnerDataSource();
    try
    {
      if (databaseProperties != null)
      {
        getDataSource().setInnerDataSource(createDataSource(databaseProperties));
      }
      result = getJdbcTemplate().query(
        "SELECT a.alias AS alias, c.name AS corpus\n"
        + "FROM corpus_alias AS a, corpus AS c\n" + "WHERE\n"
        + " a.corpus_ref = c.id",
        new ResultSetExtractor<Multimap<String, String>>()
        {

          @Override
          public Multimap<String, String> extractData(ResultSet rs) throws
          SQLException,
          DataAccessException
          {
            Multimap<String, String> data = TreeMultimap.create();
            while (rs.next())
            {
              // alias -> corpus name
              data.put(rs.getString(1), rs.getString(2));
            }
            return data;
          }
        });

    }
    catch (IOException | URISyntaxException | DataAccessException ex)
    {
      if (databaseProperties == null)
      {
        log.error(
          "Could not query corpus list", ex);
      }
      else
      {
        log.error(
          "Could not query corpus list for the file " + databaseProperties.
          getAbsolutePath(), ex);
      }
    }
    finally
    {
      getDataSource().setInnerDataSource(origDataSource);
    }

    return result;
  }

  @Transactional(readOnly = true)
  public UserConfig retrieveUserConfig(final String userName)
  {
    String sql = "SELECT * FROM user_config WHERE id=?";
    UserConfig config = getJdbcTemplate().query(sql, new Object[]
    {
      userName
    },
      new ResultSetExtractor<UserConfig>()
      {
        @Override
        public UserConfig extractData(ResultSet rs) throws SQLException,
        DataAccessException
        {

          // default to empty config
          UserConfig c = new UserConfig();

          if (rs.next())
          {
            try
            {
              c = jsonMapper.readValue(rs.getString("config"),
                UserConfig.class);
            }
            catch (IOException ex)
            {
              log.
              error(
                "Could not parse JSON that is stored in database (user configuration)",
                ex);
            }
          }
          return c;
        }
      });

    return config;
  }

  @Transactional(readOnly = false)
  public void storeUserConfig(String userName, UserConfig config)
  {
    String sqlUpdate = "UPDATE user_config SET config=?::json WHERE id=?";
    String sqlInsert = "INSERT INTO user_config(id, config) VALUES(?,?)";
    try
    {
      String jsonVal = jsonMapper.writeValueAsString(config);

      // if no row was affected there is no entry yet and we should create one
      if (getJdbcTemplate().update(sqlUpdate, jsonVal, userName) == 0)
      {
        getJdbcTemplate().update(sqlInsert, userName, jsonVal);
      }
    }
    catch (IOException ex)
    {
      log.error("Cannot serialize user config JSON for database.", ex);
    }
  }
  
  @Transactional(readOnly = false)
  public void deleteUserConfig(String userName)
  {
    getJdbcTemplate().update("DELETE FROM user_config WHERE id=?", userName);
  }

  public void addCorpusAlias(long corpusID, String alias)
  {
    getJdbcTemplate().update(
      "INSERT INTO corpus_alias (alias, corpus_ref)\n"
      + "VALUES(\n"
      + "  ?, \n"
      + "  ?\n"
      + ");",
      alias, corpusID);
  }

  public void addCorpusAlias(String corpusName, String alias)
  {
    getJdbcTemplate().update(
      "INSERT INTO corpus_alias (alias, corpus_ref)\n"
      + "SELECT ? AS alias, c.id\n"
      + "FROM corpus AS c WHERE c.top_level AND c.name=? LIMIT 1;",
      alias, corpusName);
  }

  ///// Helpers
  private List<String> importedAndCreatedTables()
  {
    List<String> tables = new ArrayList<>();
    tables.addAll(Arrays.asList(importedTables));
    tables.addAll(Arrays.asList(createdTables));
    return tables;
  }

  private List<String> allTables()
  {
    List<String> tables = new ArrayList<>();
    tables.addAll(Arrays.asList(importedTables));
    tables.addAll(Arrays.asList(createdTables));
    //tables.addAll(Arrays.asList(materializedTables));
    return tables;
  }

  private ParameterizedSingleColumnRowMapper<String> stringRowMapper()
  {
    return ParameterizedSingleColumnRowMapper.newInstance(String.class);
  }

  // executes an SQL script from $ANNIS_HOME/scripts
  @Transactional(propagation = Propagation.MANDATORY)
  public PreparedStatement executeSqlFromScript(String script)
  {
    return executeSqlFromScript(script, null);
  }

  // bulk-loads a table from a resource
  private void bulkloadTableFromResource(String table, Resource resource)
  {
    log.debug("bulk-loading data from '" + resource.getFilename()
      + "' into table '" + table + "'");
    String sql = "COPY \"" + table
      + "\" FROM STDIN WITH DELIMITER E'\t' NULL AS 'NULL'";

    try
    {
      // retrieve the currently open connection if running inside a transaction
      Connection originalCon = DataSourceUtils.getConnection(getDataSource());
      Connection con = originalCon;
      if (con instanceof DelegatingConnection)
      {
        DelegatingConnection<?> delCon = (DelegatingConnection<?>) con;
        con = delCon.getInnermostDelegate();
      }

      Preconditions.checkState(con instanceof PGConnection,
        "bulk-loading only works with a PostgreSQL JDBC connection");

      // Postgres JDBC4 8.4 driver now supports the copy API
      PGConnection pgCon = (PGConnection) con;
      pgCon.getCopyAPI().copyIn(sql, resource.getInputStream());

      DataSourceUtils.releaseConnection(originalCon, getDataSource());

    }
    catch (SQLException e)
    {
      throw new DatabaseAccessException(e);
    }
    catch (IOException e)
    {
      throw new FileAccessException(e);
    }
  }
  
  @Transactional(readOnly = false)
  public void restoreTableFromResource(String table, Resource resource)
  {
    // this is only a public wrapper function providing the transaction
    bulkloadTableFromResource(table, resource);
  }
  
  @Transactional(readOnly = true)
  public void dumpTableToResource(String table, WritableResource resource)
  {
    log.debug("dumping data to '" + resource.getFilename()
      + "' from table '" + table + "'");
    String sql = "COPY \"" + table
      + "\" TO STDOUT WITH DELIMITER E'\t' NULL AS 'NULL'";

    try
    {
      // retrieve the currently open connection if running inside a transaction
      Connection originalCon = DataSourceUtils.getConnection(getDataSource());
      Connection con = originalCon;
      if (con instanceof DelegatingConnection)
      {
        DelegatingConnection<?> delCon = (DelegatingConnection<?>) con;
        con = delCon.getInnermostDelegate();
      }

      Preconditions.checkState(con instanceof PGConnection,
        "bulk-loading only works with a PostgreSQL JDBC connection");

      // Postgres JDBC4 8.4 driver now supports the copy API
      PGConnection pgCon = (PGConnection) con;
      pgCon.getCopyAPI().copyOut(sql, resource.getOutputStream());

      DataSourceUtils.releaseConnection(originalCon, getDataSource());

    }
    catch (SQLException e)
    {
      throw new DatabaseAccessException(e);
    }
    catch (IOException e)
    {
      throw new FileAccessException(e);
    }
  }

  // get a list of indexes on the imported Snd created tables tables which are not
  // auto-created by postgres (namely, primary key and unique constraints)
  // exploits the fact that the index has the same name as the constraint
  private List<String> listIndexesOnTables(List<String> tables)
  {
    String sql
      = ""
      + "SELECT indexname "
      + "FROM pg_indexes "
      + "WHERE tablename IN (" + StringUtils.repeat("?", ",", tables.size())
      + ") "
      + "AND lower(indexname) NOT IN "
      + "	(SELECT lower(conname) FROM pg_constraint WHERE contype in ('p', 'u'))";

    return getJdbcTemplate().query(sql, tables.toArray(), stringRowMapper());
  }

  private List<String> listIndexDefinitions(boolean used)
  {
    return listIndexDefinitions(used, allTables());
  }

  /*
   * Returns the CREATE INDEX statement for all indexes on the Annis tables,
   * that are not auto-created by PostgreSQL (primary keys and unique
   * constraints).
   *
   * @param used	If True, return used indexes. If False, return unused indexes
   * (scan count is 0).
   */
  public List<String> listIndexDefinitions(boolean used, List<String> tables)
  {
    String scansOp = used ? "!=" : "=";
    String sql
      = "SELECT pg_get_indexdef(x.indexrelid) AS indexdef "
      + "FROM pg_index x, pg_class c "
      + "WHERE x.indexrelid = c.oid "
      + "AND c.relname IN ( " + StringUtils.repeat("?", ",", tables.size())
      + ") "
      + "AND pg_stat_get_numscans(x.indexrelid) " + scansOp + " 0";
    return getJdbcTemplate().query(sql, tables.toArray(), stringRowMapper());
  }

  public List<String> listIndexDefinitions(String... tables)
  {
    String sql = ""
      + "SELECT pg_get_indexdef(x.indexrelid) AS indexdef "
      + "FROM pg_index x, pg_class c, pg_indexes i "
      + "WHERE x.indexrelid = c.oid "
      + "AND c.relname = i.indexname "
      + "AND i.tablename IN ( " + StringUtils.repeat("?", ",", tables.length)
      + " )";
    return getJdbcTemplate().query(sql, tables,
      new ParameterizedSingleColumnRowMapper<String>());
  }

  public List<String> listUsedIndexes(String... tables)
  {
    String sql = ""
      + "SELECT pg_get_indexdef(x.indexrelid) AS indexdef "
      + "FROM pg_index x, pg_class c, pg_indexes i "
      + "WHERE x.indexrelid = c.oid "
      + "AND c.relname = i.indexname "
      + "AND i.tablename IN ( " + StringUtils.repeat("?", ",", tables.length)
      + " ) "
      + "AND pg_stat_get_numscans(x.indexrelid) != 0";
    return getJdbcTemplate().query(sql, tables,
      new ParameterizedSingleColumnRowMapper<String>());
  }

  public boolean resetStatistics()
  {
    try
    {
      getJdbcTemplate().queryForList("SELECT pg_stat_reset()");
      return true;
    }
    catch (DataAccessException e)
    {
      return false;
    }
  }

  /**
   * Retrieves the name of the top level corpus in the corpus.tab file.
   *
   * <p>
   * At this point, the tab files must be in the staging area.</p>
   *
   * @return The name of the toplevel corpus or an empty String if no top level
   * corpus is found.
   */
  private String getTopLevelCorpusFromTmpArea()
  {
    String sql = "SELECT name FROM " + tableInStagingArea("corpus")
      + " WHERE type='CORPUS'\n"
      + "AND pre = (SELECT min(pre) FROM " + tableInStagingArea("corpus")
      + ")\n"
      + "AND post = (SELECT max(post) FROM " + tableInStagingArea("corpus")
      + ")";

    return getJdbcTemplate().query(sql, new ResultSetExtractor<String>()
    {
      @Override
      public String extractData(ResultSet rs) throws SQLException,
        DataAccessException
      {
        if (rs.next())
        {
          return rs.getString("name");
        }
        else
        {
          return null;
        }
      }
    });
  }

  ///// Getter / Setter
  public boolean isTemporaryStagingArea()
  {
    return temporaryStagingArea;
  }

  public void setTemporaryStagingArea(boolean temporaryStagingArea)
  {
    this.temporaryStagingArea = temporaryStagingArea;
  }

  /**
   * Get the name and version of the schema this @{link AdministrationDao} is
   * configured to work with.
   *
   * @return
   */
  public String getSchemaVersion()
  {
    return schemaVersion;
  }

  public void setSchemaVersion(String schemaVersion)
  {
    this.schemaVersion = schemaVersion;
  }

  public Map<String, String> getMimeTypeMapping()
  {
    return mimeTypeMapping;
  }

  public void setMimeTypeMapping(Map<String, String> mimeTypeMapping)
  {
    this.mimeTypeMapping = mimeTypeMapping;
  }

  public Map<String, String> getTableInsertSelect()
  {
    return tableInsertSelect;
  }

  public void setTableInsertSelect(Map<String, String> tableInsertSelect)
  {
    this.tableInsertSelect = tableInsertSelect;
  }

  public Map<String, String> getTableInsertFrom()
  {
    return tableInsertFrom;
  }

  public void setTableInsertFrom(Map<String, String> tableInsertFrom)
  {
    this.tableInsertFrom = tableInsertFrom;
  }

  private void readOldResolverVisMapFormat(File resolver_vis_tab)
  {
    StringBuilder sb = new StringBuilder();
    sb.append("CREATE TABLE tmp_resolver_vis_map ");
    sb.append("( ");
    sb.append("\"corpus\"   varchar, ");
    sb.append("\"version\" 	varchar, ");
    sb.append("\"namespace\"	varchar, ");
    sb.append("\"element\"    varchar, ");
    sb.append("\"vis_type\"   varchar NOT NULL, ");
    sb.append("\"display_name\"   varchar NOT NULL, ");
    sb.append("\"order\" integer default '0', ");
    sb.append("\"mappings\" varchar");
    sb.append(");");

    getJdbcTemplate().execute(sb.toString());

    bulkloadTableFromResource("tmp_resolver_vis_map",
      new FileSystemResource(resolver_vis_tab));

    sb = new StringBuilder();

    sb.append("INSERT INTO ");
    sb.append(tableInStagingArea(FILE_RESOLVER_VIS_MAP));
    sb.append("\n\t");
    sb.append(" (");
    sb.append("corpus, ");
    sb.append("version, ");
    sb.append("namespace, ");
    sb.append("element, ");
    sb.append("vis_type, ");
    sb.append("display_name, ");
    sb.append("\"order\", ");
    sb.append("mappings");
    sb.append(")");
    sb.append("\n");
    sb.append("SELECT tmp.corpus, ");
    sb.append("tmp.version, ");
    sb.append("tmp.namespace, ");
    sb.append("tmp.element, ");
    sb.append("tmp.vis_type, ");
    sb.append("tmp.display_name, ");
    sb.append("tmp.\"order\", ");
    sb.append("tmp.mappings");
    sb.append("\n\t");
    sb.append("FROM tmp_resolver_vis_map AS tmp; ");

    getJdbcTemplate().execute(sb.toString());
    getJdbcTemplate().execute("DROP TABLE tmp_resolver_vis_map;");
  }

  /**
   * Imported the old and the new version of the resolver_vis_map.tab. The new
   * version has an additional column for visibility status of the
   * visualization.
   *
   * @param path The path to the ANNIS file.
   * @param table The final table in the database of the resolver_vis_map table.
   */
  private void importResolverVisMapTable(String path, String table)
  {
    try
    {

      // count cols for detecting old resolver_vis_map table format
      File resolver_vis_tab = new File(path, table + annisFileSuffix);

      if (!resolver_vis_tab.isFile())
      {
        return;
      }

      String firstLine;
      try (BufferedReader bReader = new BufferedReader(
        new InputStreamReader(new FileInputStream(resolver_vis_tab), "UTF-8")))
      {
        firstLine = bReader.readLine();
      }

      int cols = 9; // default number
      if (firstLine != null)
      {
        String[] entries = firstLine.split("\t");
        cols = entries.length;
        log.debug("the first row: {} amount of cols: {}", entries, cols);
      }

      switch (cols)
      {
        // old format
        case 8:
          readOldResolverVisMapFormat(resolver_vis_tab);
          break;
        // new format
        case 9:
          bulkloadTableFromResource(tableInStagingArea(table),
            new FileSystemResource(new File(path, table + annisFileSuffix)));
          break;
        default:
          log.error("invalid amount of cols");
          throw new RuntimeException();
      }

    }
    catch (IOException | FileAccessException e)
    {
      log.error("could not read {}", table, e);
    }
  }

  /**
   * Removes any unwanted entries from the resolver_vis_map table
   *
   * @param toplevelCorpus
   * @param table
   */
  private void fixResolverVisMapTable(String toplevelCorpus, String table)
  {
    log.info("checking resolver_vis_map for errors");

    // delete all entries that reference a different corpus than the imported one
    int invalidRows = getJdbcTemplate().update("DELETE FROM " + table
      + " WHERE corpus <> ?", toplevelCorpus);
    if (invalidRows > 0)
    {
      log.warn("there were " + invalidRows
        + " rows in the resolver_vis_map that referenced the wrong corpus");
    }

  }

  /**
   * Generates example queries if no example queries tab file is defined by the
   * user.
   */
  private void generateExampleQueries(long corpusID)
  {
    // set in the annis.properties file.
    if (generateExampleQueries == EXAMPLE_QUERIES_CONFIG.TRUE)
    {
      queriesGenerator.generateQueries(corpusID);
    }
  }

  /**
   * @return the generateExampleQueries
   */
  public EXAMPLE_QUERIES_CONFIG isGenerateExampleQueries()
  {
    return generateExampleQueries;
  }

  /**
   * @param generateExampleQueries the generateExampleQueries to set
   */
  public void setGenerateExampleQueries(
    EXAMPLE_QUERIES_CONFIG generateExampleQueries)
  {
    this.generateExampleQueries = generateExampleQueries;
  }

  /**
   * Counts nodes and operators of the AQL example query and writes it back to
   * the staging area.
   *
   * @param corpusID specifies the corpus, the analyze things.
   *
   */
  private void analyzeAutoGeneratedQueries(long corpusID)
  {
    // read the example queries from the staging area
    List<ExampleQuery> exampleQueries = getJdbcTemplate().query(
      "SELECT * FROM _" + EXAMPLE_QUERIES_TAB, new RowMapper<ExampleQuery>()
      {
        @Override
        public ExampleQuery mapRow(ResultSet rs, int i) throws SQLException
        {
          ExampleQuery eQ = new ExampleQuery();
          eQ.setExampleQuery(rs.getString("example_query"));
          return eQ;
        }
      });

    // count the nodes of the aql Query
    countExampleQueryNodes(exampleQueries);

    // fetch the operators
    getOperators(exampleQueries, "\\.(\\*)?|\\>|\\>\\*|_i_");

    writeAmountOfNodesBack(exampleQueries);
  }

  /**
   * Maps example queries to integer, which represents the amount of nodes of
   * the aql query.
   *
   */
  private void countExampleQueryNodes(List<ExampleQuery> exampleQueries)
  {

    for (ExampleQuery eQ : exampleQueries)
    {

      QueryData query = getQueryDao().parseAQL(eQ.getExampleQuery(), null);

      int count = 0;
      for (List<QueryNode> qNodes : query.getAlternatives())
      {
        count += qNodes.size();
      }

      eQ.setNodes(count);
    }
  }

  /**
   * Writes the counted nodes and the used operators back to the staging area.
   *
   */
  private void writeAmountOfNodesBack(List<ExampleQuery> exampleQueries)
  {

    String sqlTemplate = "UPDATE _" + EXAMPLE_QUERIES_TAB + " SET nodes=?, used_ops=CAST(? AS text[]) WHERE example_query=?;";
    
    for (ExampleQuery eQ : exampleQueries)
    {
      getJdbcTemplate().update(sqlTemplate, eQ.getNodes(), eQ.getUsedOperators(), eQ.getExampleQuery());
    }
  }

  /**
   * Fetches operators used in the {@link ExampleQuery#getExampleQuery()} with a
   * given regex.
   *
   * @param exQueries Set the used operators property of each member.
   * @param regex The regex to search operators.
   */
  private void getOperators(List<ExampleQuery> exQueries, String regex)
  {

    Pattern opsRegex = Pattern.compile(regex);
    for (ExampleQuery eQ : exQueries)
    {
      List<String> ops = new ArrayList<>();
      Matcher m = opsRegex.matcher(eQ.getExampleQuery().replaceAll("\\s", ""));

      while (m.find())
      {
        ops.add(m.group());
      }

      eQ.setUsedOperators("{" + StringUtils.join(ops, ",") + "}");
    }
  }

  /**
   * @return the queriesGenerator
   */
  public QueriesGenerator getQueriesGenerator()
  {
    return queriesGenerator;
  }

  /**
   * @param queriesGenerator the queriesGenerator to set
   */
  public void setQueriesGenerator(
    QueriesGenerator queriesGenerator)
  {
    this.queriesGenerator = queriesGenerator;
  }

  public DeleteCorpusDao getDeleteCorpusDao()
  {
    return deleteCorpusDao;
  }

  public void setDeleteCorpusDao(DeleteCorpusDao deleteCorpusDao)
  {
    this.deleteCorpusDao = deleteCorpusDao;
  }

  public boolean isHackDistinctLeftRightToken()
  {
    return hackDistinctLeftRightToken;
  }

  public void setHackDistinctLeftRightToken(boolean hackDistinctLeftRightToken)
  {
    this.hackDistinctLeftRightToken = hackDistinctLeftRightToken;
  }
  
  

  /**
   * Checks, if a already exists a corpus with the same name of the top level
   * corpus in the corpus.tab file. If this is the case an Exception is thrown
   * and the import is aborted.
   *
   * @throws
   * annis.administration.DefaultAdministrationDao.ConflictingCorpusException
   */
  private void checkTopLevelCorpus() throws ConflictingCorpusException
  {
    String corpusName = getTopLevelCorpusFromTmpArea();
    if (existConflictingTopLevelCorpus(corpusName))
    {
      String msg
        = "There already exists a top level corpus with the name: " + corpusName;
      throw new ConflictingCorpusException(msg);
    }
  }

  private ANNISFormatVersion getANNISFormatVersion(String path)
  {
    File pathDir = new File(path);
    if (pathDir.isDirectory())
    {
      // check for existance of "annis.version" file
      File versionFile = new File(pathDir, "annis.version");
      if (versionFile.isFile() && versionFile.exists())
      {
        try
        {
          // read the first line
          String firstLine = Files.readFirstLine(versionFile, Charsets.UTF_8);
          if ("3.3".equals(firstLine.trim()))
          {
            return ANNISFormatVersion.V3_3;
          }
        }
        catch (IOException ex)
        {
          log.warn("Could not read annis.version file", ex);
        }
      }
      else
      {
        // we have to distinguish between 3.1 and 3.2
        File nodeTab = new File(pathDir, "node.tab");
        if (nodeTab.isFile() && nodeTab.exists())
        {
          try
          {
            String firstLine = Files.readFirstLine(nodeTab, Charsets.UTF_8);
            List<String> cols = Splitter.on('\t').splitToList(firstLine);
            if (cols.size() == 13)
            {
              return ANNISFormatVersion.V3_2;
            }
            else if (cols.size() == 10)
            {
              return ANNISFormatVersion.V3_1;
            }
          }
          catch (IOException ex)
          {
            log.warn("Could not read node.tab file", ex);
          }
        }
      }
    }
    return ANNISFormatVersion.UNKNOWN;
  }

  public static class ConflictingCorpusException extends AnnisException
  {

    public ConflictingCorpusException(String msg)
    {
      super(msg);
    }
  }

  public static class Offsets
  {

    private final long corpusID;
    private final long corpusPost;
    private final long nodeID;

    public Offsets(long corpusID, long corpusPost, long nodeID)
    {
      this.corpusID = corpusID;
      this.corpusPost = corpusPost;
      this.nodeID = nodeID;
    }

    public long getCorpusID()
    {
      return corpusID;
    }

    public long getCorpusPost()
    {
      return corpusPost;
    }

    public MapSqlParameterSource makeArgs()
    {
      return new MapSqlParameterSource()
        .addValue(":offset_corpus_id", corpusID)
        .addValue(":offset_corpus_post", corpusPost)
        .addValue(":offset_node_id", nodeID);
    }
  }

}