package doser.entitydisambiguation.modknowledgebase;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.QueryParserBase;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import doser.lucene.analysis.DoserIDAnalyzer;
public class NewDocumentOrUpdateOperator extends AbstractKnowledgebaseOperator {
private final KBModifications action;
private final String docPrimKeyField;
private final Map<String, String> hashMap;
private final String uri;
public NewDocumentOrUpdateOperator(final String path, final Analyzer analyzer,
final String uri, final Map<String, String> hash,
final String docPrimKeyField, final KBModifications action) {
super(path, analyzer);
this.uri = uri;
this.hashMap = hash;
this.docPrimKeyField = docPrimKeyField;
this.action = action;
}
@Override
public void modifyIndex(final IndexWriter writer,
final IndexSearcher searcher) throws ModifyKnowledgeBaseException {
final QueryParser qp = new QueryParser(this.docPrimKeyField, new DoserIDAnalyzer());
try {
final TopDocs top = searcher.search(
qp.parse(QueryParserBase.escape(this.uri)), 1);
final ScoreDoc[] scores = top.scoreDocs;
final Document doc = new Document();
if (scores.length == 0) {
// Create Document first
final Map<String, String> hash = KnowledgeBaseEntryCreation
.createKnowledgeBaseEntryOutOfDbPediaURI(this.uri);
for (final Map.Entry<String, String> entry : hash.entrySet()) {
String key = entry.getKey();
final String value = entry.getValue();
if (key.contains("_")) {
key = key.replaceAll("_[\\d]", "");
}
if (key.equalsIgnoreCase(docPrimKeyField)) {
doc.add(new StringField(key, value, Field.Store.YES));
} else {
doc.add(new TextField(key, value, Field.Store.YES));
}
}
writer.addDocument(doc);
writer.commit();
} else {
final int docNr = scores[0].doc;
final Document currentDoc = searcher.getIndexReader().document(
docNr);
// BugFix create new Document and copy Fields.
final List<IndexableField> fields = currentDoc.getFields();
for (final IndexableField field : fields) {
if (field.name().equalsIgnoreCase(docPrimKeyField)) {
doc.add(new StringField(field.name(), field
.stringValue(), Field.Store.YES));
} else {
doc.add(new TextField(field.name(),
field.stringValue(), Field.Store.YES));
}
}
}
// Update Document
for (final Map.Entry<String, String> subentry : this.hashMap
.entrySet()) {
final IndexableField field = doc.getField(subentry.getKey());
if (field == null) {
throw new ModifyKnowledgeBaseException(
"UpdateField no found", null);
}
final List<Document> docListToAdd = new LinkedList<Document>();
docListToAdd.add(doc);
if (this.action.equals(KBModifications.OVERRIDEFIELD)) {
doc.removeFields(subentry.getKey());
String[] newentries = generateSeperatedFieldStrings(subentry
.getValue());
for (int i = 0; i < newentries.length; i++) {
doc.add(new TextField(subentry.getKey(), newentries[i],
Field.Store.YES));
}
} else if (this.action
.equals(KBModifications.UPDATERELATEDLABELS)) {
doc.removeFields(subentry.getKey());
doc.add(UpdateKnowledgeBaseEntryOperator.updateOccurrences(
subentry.getValue(), field, "surroundinglabels"));
writer.updateDocuments(new Term(this.docPrimKeyField),
docListToAdd);
} else if (this.action
.equals(KBModifications.UPDATEOCCURRENCES)) {
doc.removeFields(subentry.getKey());
IndexableField f = UpdateKnowledgeBaseEntryOperator
.updateOccurrences(subentry.getValue(), field,
"occurrences");
doc.add(f);
writer.updateDocuments(new Term(this.docPrimKeyField,
this.uri), docListToAdd);
}
}
} catch (final IOException e) {
throw new ModifyKnowledgeBaseException(
"IOException in IndexSearcher", e);
} catch (ParseException e) {
throw new ModifyKnowledgeBaseException("QueryParser Exception", e);
}
}
// private void updateCachingOccurrences(int docId,
// IndexableField f) {
// HashMap<Integer, Integer> hash = new HashMap<Integer, Integer>();
// String str = f.stringValue();
// final String[] split = str.split(";;;");
// for (final String element : split) {
// final String[] splitter = element.split(":::");
// int check = 1;
// try {
// check = Integer.valueOf(splitter[1]);
// hash.put(splitter[0].hashCode(), check);
// } catch (final NumberFormatException e) {
// Logger.getRootLogger().error(e.getStackTrace());
// }
// }
// HashMapUpdateInformation updateInfos = new HashMapUpdateInformation(
// UpdateTypes.Occurrences, docId, hash);
// this.setChanged();
// notifyObservers(updateInfos);
// }
/**
* Verschiedene Einträge werden in HTML formulare mit
getrennt
* (neue Zeile). Jede Zeile soll ein eigenes Field im Index darstellen
*
* @param the
* whole string
* @return field content array
*/
private String[] generateSeperatedFieldStrings(String str) {
String[] splitter = str.split("
");
return splitter;
}
}