/*
* Carrot2 project.
*
* Copyright (C) 2002-2010, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.examples;
import java.io.File;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.carrot2.core.Document;
import org.carrot2.examples.clustering.ClusteringDataFromLucene;
/**
* Create a Lucene index on disk based on {@link SampleDocumentData}.
*
* @see ClusteringDataFromLucene
*/
public class CreateLuceneIndex
{
public static void main(String [] args)
throws Exception
{
if (args.length != 1)
{
System.out.println("Args: index-dir");
System.exit(-1);
}
File indexDir = new File(args[0]);
if (indexDir.exists())
{
System.out.println("Index directory already exists: " + indexDir.getAbsolutePath());
System.exit(-2);
}
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
IndexWriter writer = new IndexWriter(FSDirectory.open(indexDir), analyzer, true, MaxFieldLength.UNLIMITED);
for (Document d : SampleDocumentData.DOCUMENTS_DATA_MINING)
{
final org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();
/*
* We will create Lucene documents with searchable "fullContent" field and "title",
* "url" and "snippet" fields for clustering.
*/
doc.add(new Field("fullContent", d.getSummary(), Store.NO, Index.ANALYZED));
doc.add(new Field("title", d.getTitle(), Store.YES, Index.NO));
doc.add(new Field("snippet", d.getSummary(), Store.YES, Index.NO));
doc.add(new Field("url", d.getContentUrl(), Store.YES, Index.NO));
writer.addDocument(doc);
}
writer.close();
}
}