/* * This file is part of anycook. The new internet cookbook * Copyright (C) 2014 Jan Graßegger * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see [http://www.gnu.org/licenses/]. */ package de.anycook.db.lucene; import de.anycook.conf.Configuration; import de.anycook.db.mysql.DBGetRecipe; import de.anycook.db.mysql.DBRecipe; import de.anycook.recipe.Recipe; import de.anycook.recipe.step.Step; import de.anycook.recipe.step.Steps; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.DateTools.Resolution; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.IntField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.NIOFSDirectory; import java.io.IOException; import java.nio.file.Paths; import java.sql.SQLException; import java.util.Date; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; import java.util.Set; public class FulltextIndex { private static FulltextIndex singleton = null; private final Logger logger; private final Analyzer analyzer = new NGramAnalyzer(); private final Directory index; //private final IndexWriterConfig indexWriterConfig; private FulltextIndex() throws IOException { logger = LogManager.getLogger(getClass()); String indexPath = Configuration.getInstance().getFullTextIndexPath(); index = new NIOFSDirectory(Paths.get(indexPath)); } public static FulltextIndex init() throws IOException { if (singleton == null) { singleton = new FulltextIndex(); LogManager.getLogger(FulltextIndex.class).info("created new instance of FulltextIndex"); } return singleton; } private IndexWriterConfig createIndexWriterConfig() { return new IndexWriterConfig(analyzer); } public void addRecipe(String recipeName) throws SQLException, DBRecipe.RecipeNotFoundException, IOException { recipeName = recipeName.toLowerCase(Locale.GERMAN); if (checkIfRecipeExists(recipeName)) { removeRecipe(recipeName); } try (DBGetRecipe dbGetRecipe = new DBGetRecipe()) { Recipe recipe = dbGetRecipe.get(recipeName); int id = dbGetRecipe.getActiveIdfromRecipe(recipeName); String date = DateTools.dateToString(new Date(), Resolution.DAY); List<Step> steps = Steps.loadRecipeSteps(recipeName); StringBuilder stepText = new StringBuilder(); for (Step step : steps) { stepText.append(" ").append(step.getText()); } try (IndexWriter writer = new IndexWriter(index, createIndexWriterConfig())) { Document doc = new Document(); doc.add(new TextField("title", recipe.getName(), Field.Store.YES)); doc.add(new TextField("description", recipe.getDescription() == null ? "" : recipe .getDescription(), Field.Store.YES)); doc.add(new TextField("steps", stepText.toString(), Field.Store.YES)); doc.add(new IntField("version_id", id, Field.Store.YES)); doc.add(new TextField("date", date, Field.Store.YES)); writer.addDocument(doc); writer.commit(); logger.info("added " + recipeName + " to index"); } catch (CorruptIndexException | LockObtainFailedException e) { throw new IOException(e); } } } public void clearIndex() throws IOException { try (IndexWriter writer = new IndexWriter(index, createIndexWriterConfig())) { writer.deleteAll(); writer.commit(); logger.info("cleared index"); } catch (CorruptIndexException e) { throw new IOException(e); } } public void addAllRecipes() throws SQLException, IOException { try (DBGetRecipe dbGetRecipe = new DBGetRecipe(); IndexWriter writer = new IndexWriter(index, createIndexWriterConfig())) { writer.deleteAll(); for (String recipeName : dbGetRecipe.getAllActiveRecipeNames()) { try { Document document = generateRecipeDoc(recipeName); writer.addDocument(document); } catch (DBRecipe.RecipeNotFoundException e) { //nope } } writer.commit(); logger.info("successfully build fulltext index"); } } public Document generateRecipeDoc(String recipeName) throws SQLException, DBRecipe.RecipeNotFoundException { try (DBGetRecipe dbGetRecipe = new DBGetRecipe()) { Recipe recipe = dbGetRecipe.get(recipeName); List<Step> steps = Steps.loadRecipeSteps(recipeName); StringBuilder stepText = new StringBuilder(); for (Step step : steps) { stepText.append(" ").append(step.getText()); } int id = dbGetRecipe.getActiveIdfromRecipe(recipeName); String date = DateTools.dateToString(new Date(), Resolution.DAY); Document doc = new Document(); doc.add(new TextField("title", recipe.getName(), Field.Store.YES)); doc.add(new TextField("description", recipe.getDescription() == null ? "" : recipe.getDescription(), Field.Store.YES)); doc.add(new TextField("steps", stepText.toString(), Field.Store.YES)); doc.add(new IntField("version_id", id, Field.Store.YES)); doc.add(new TextField("date", date, Field.Store.YES)); return doc; } } public void removeRecipe(String recipeName) throws IOException { try (IndexWriter writer = new IndexWriter(index, createIndexWriterConfig())) { writer.deleteDocuments(new Term("title", recipeName)); writer.commit(); logger.info("removed " + recipeName + " from index"); } catch (CorruptIndexException e) { throw new IOException(e); } } private boolean checkIfRecipeExists(String recipeName) throws IOException { try (IndexReader reader = DirectoryReader.open(index)) { IndexSearcher searcher = new IndexSearcher(reader); MultiFieldQueryParser.parse(new String[]{recipeName}, new String[]{"title"}, analyzer); Query query = new QueryParser("title", analyzer).parse(recipeName); TopDocs topdocs = searcher.search(query, 1); if (topdocs.totalHits > 0) { return true; } } catch (CorruptIndexException | ParseException e) { throw new IOException(e); } return false; } public Set<String> search(String q) throws IOException { Set<String> recipes = new LinkedHashSet<>(); String fields[] = new String[]{"description", "steps"}; logger.debug(String.format("searching for %s", q)); try (IndexReader reader = DirectoryReader.open(index)) { int hitsPerPage = 1000; IndexSearcher searcher = new IndexSearcher(reader); Query query = new MultiFieldQueryParser(fields, analyzer).parse(q); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, null); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { Document d = searcher.doc(hit.doc); recipes.add(d.get("title")); } } catch (CorruptIndexException | ParseException e) { logger.error(e); } logger.debug(String.format("found %d results", recipes.size())); return recipes; } public Set<String> search(Set<String> queries) throws IOException { Set<String> recipeNames = new HashSet<>(); for (String term : queries) { recipeNames.addAll(search(term)); } return recipeNames; } }