package com.epam.wilma.message.search.lucene.index;
/*==========================================================================
Copyright 2013-2017 EPAM Systems
This file is part of Wilma.
Wilma is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Wilma is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Wilma. If not, see <http://www.gnu.org/licenses/>.
===========================================================================*/
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import com.epam.wilma.message.search.domain.exception.SystemException;
import com.epam.wilma.message.search.lucene.helper.TermFactory;
import com.epam.wilma.message.search.lucene.index.helper.BufferedReaderFactory;
import com.epam.wilma.message.search.lucene.index.helper.DocumentFactory;
import com.epam.wilma.message.search.lucene.index.helper.FileInputStreamFactory;
/**
* Class for adding files to Lucene index using {@link IndexWriter}.
* @author Tamas_Bihari
*
*/
@Component
public class FileIndexer {
private final Logger logger = LoggerFactory.getLogger(FileIndexer.class);
@Value("#{fieldName}")
private String fieldName;
@Autowired
private IndexWriter indexWriter;
@Autowired
private DocumentFactory documentFactory;
@Autowired
private BufferedReaderFactory bufferedReaderFactory;
@Autowired
private TermFactory termFactory;
@Autowired
private FileInputStreamFactory fileInputStreamFactory;
/**
* Adds a file to index with {@link IndexWriter}.
* @param file will be indexed by the function
*/
public void indexFile(final File file) {
FileInputStream fis = getInputStream(file);
Document doc = documentFactory.createDocument();
// Add the path of the file as a field named "path". Use a field that is indexed (i.e. searchable), but don't tokenize
// the field into separate words and don't index term frequency or positional information:
Field pathField = new StringField(fieldName, file.getAbsolutePath(), Field.Store.YES);
doc.add(pathField);
// Add the last modified date of the file a field named "modified".
// Use a LongField that is indexed (i.e. efficiently filterable with NumericRangeFilter).
doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));
// Add the contents of the file to a field named "contents".
// If that's not the case searching for special characters will fail.
BufferedReader bufferedReader;
try {
bufferedReader = bufferedReaderFactory.createReader(fis);
doc.add(new Field("contents", bufferedReader, TextField.TYPE_NOT_STORED));
addDocument(file, doc);
fis.close();
} catch (IOException e) {
logger.error(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
}
}
private FileInputStream getInputStream(final File file) {
FileInputStream fis;
try {
fis = fileInputStreamFactory.createFileInputStream(file);
} catch (FileNotFoundException fnfe) {
throw new SystemException(fnfe.getMessage());
}
return fis;
}
private void addDocument(final File file, final Document doc) throws IOException {
if (indexWriter.getConfig().getOpenMode() == OpenMode.CREATE) {
indexWriter.addDocument(doc);
} else {
Term term = termFactory.createTerm(fieldName, file.getPath());
indexWriter.updateDocument(term, doc);
}
}
}