LuceneRdfManager.java example

Explorer
java_practical_semantic_web-master
package com.knowledgebooks.rdf.implementation;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;


/**
 * Wrapper class for an embedded Lucene index to support free text search of all
 * information sources that have been added.
 * <p/>
 * <p/>
 * Copyright 2002-2008 by Mark Watson. All rights reserved.
 * <p/>
 * This software is not public domain. It can be legally
 * used under either of the following licenses:
 * <p/>
 * 1. KnowledgeBooks.com Non Commercial Royality Free License<br/>
 * 2. KnowledgeBooks.com Commercial Use License
 * <p/>
 * see www.knowledgebooks.com for details
 */
public class LuceneRdfManager {
  private String data_store_file_root;

  /**
   * @param data_store_file_root
   * @throws java.lang.Exception
   */
  public LuceneRdfManager(String data_store_file_root) throws Exception {
    this.data_store_file_root = data_store_file_root;
  }

  /**
   * @throws java.io.IOException
   */
  public void close() throws IOException {
  }

  /**
   * @param subject
   * @param predicate
   * @param object
   * @throws org.apache.lucene.index.CorruptIndexException
   *
   * @throws java.io.IOException
   */
  public void addTripleToIndex(String subject, String predicate, String object) throws IOException {
    File index_dir = new File(data_store_file_root + "/lucene_index");
    writer = new IndexWriter(FSDirectory.open(index_dir), new StandardAnalyzer(Version.LUCENE_CURRENT), !index_dir.exists(), IndexWriter.MaxFieldLength.LIMITED);
    //File index_dir = new File(data_store_file_root + "/lucene_index");
    //writer = new IndexWriter(FSDirectory.open(index_dir), new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
    Document doc = new Document();
    doc.add(new Field("subject", subject, Field.Store.YES, Field.Index.NO));
    doc.add(new Field("predicate", predicate, Field.Store.YES, Field.Index.NO));
    doc.add(new Field("object", object, Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(doc);
    writer.optimize();
    writer.close();
  }

  /**
   * @param search_query
   * @return
   * @throws org.apache.lucene.queryParser.ParseException
   *
   * @throws java.io.IOException
   */
  public List<List<String>> searchIndex(String search_query) throws ParseException, IOException {
    File index_dir = new File(data_store_file_root + "/lucene_index");
    reader = IndexReader.open(FSDirectory.open(index_dir), true);
    List<List<String>> ret = new ArrayList<List<String>>();
    Searcher searcher = new IndexSearcher(reader);

    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
    QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "object", analyzer);
    Query query = parser.parse(search_query);

    TopScoreDocCollector collector = TopScoreDocCollector.create(10, false);
    searcher.search(query, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;

    for (int i = 0; i < hits.length; i += 1) {
      Document doc = searcher.doc(hits[i].doc);
      List<String> as2 = new ArrayList<String>(23);
      as2.add(doc.get("subject"));
      as2.add(doc.get("predicate"));
      as2.add(doc.get("object"));
      ret.add(as2);
    }
    reader.close();
    return ret;
  }

  private IndexWriter writer;
  private IndexReader reader;
}