/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.tallison.lucene.syns; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; /** * This runs the three indexing steps in order. * 1) create the ngram+1 indices * 2) create the context index * 3) create the final word vector index */ class BuildIndices { public static void main(String[] args) throws IOException { Path sourceLuceneIndexPath = Paths.get(args[0]); String contentField = args[1]; Path indexRoot = Paths.get(args[2]); SyntacticSynsConfig synsConfig = new SyntacticSynsConfig(indexRoot); boolean deleteIntermediateIndices = false; //parameters //column index in the csv file that should be treated as the narrative column int narrInd = 8; //maximum ngram for the target size synsConfig.setMaxKeyPhraseLength(3); //minimum number of times that the key phrase must occur synsConfig.setMinKeyPhraseTermFrequency(5); //this should weed out stop words from being targets int maxTargetCount = 10000; //minimum number of times that the target + a given context must occur int minTargetContextCount = 1; /* //add target pre and post stop sets */ //build NgramIndex NGramIndexBuilder ngramBuilder = new NGramIndexBuilder(); ngramBuilder.execute(sourceLuceneIndexPath, contentField, synsConfig); System.out.println("finished building ngram index"); //build context index ContextIndexBuilder contextBuilder = new ContextIndexBuilder(); // contextBuilder.execute(synsConfig); System.out.println("finished building context index"); //build syns index SynsIndexBuilder indexer = new SynsIndexBuilder(synsConfig); indexer.execute(); /*if (deleteIntermediateIndices){ FileUtil.rmdir(synsConfig.getNGramIndex()); FileUtil.rmdir(synsConfig.getContextIndex()); } */ System.out.println("Done with all three steps!"); } }