package doser.entitydisambiguation.modknowledgebase;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.QueryParserBase;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import doser.lucene.analysis.DoserIDAnalyzer;
public class UpdateKnowledgeBaseEntryOperator extends
AbstractKnowledgebaseOperator {
private static String buildStringFromHashMap(final Map<String, Integer> hash) {
final StringBuffer buffer = new StringBuffer("");
for (final Map.Entry<String, Integer> entry : hash.entrySet()) {
buffer.append(entry.getKey() + ":::" + entry.getValue() + ";;;");
}
final String str = buffer.toString();
return str.substring(0, str.length() - 3);
}
public static IndexableField updateOccurrences(final String occurrence,
final IndexableField field, final String fieldname) {
IndexableField res = null;
final String fieldString = field.stringValue();
final HashMap<String, Integer> hash = new HashMap<String, Integer>();
if ((fieldString != null) && !fieldString.equalsIgnoreCase("")) {
final String[] split = fieldString.split(";;;");
for (final String element : split) {
final String[] splitter = element.split(":::");
int check = 1;
try {
check = Integer.valueOf(splitter[1]);
hash.put(splitter[0], check);
} catch (final NumberFormatException e) {
res = field;
}
}
if (hash.containsKey(occurrence)) {
Integer amount = hash.get(occurrence);
hash.put(occurrence, ++amount);
} else {
hash.put(occurrence, 1);
}
final String value = buildStringFromHashMap(hash);
res = new TextField(fieldname, value, Field.Store.YES);
} else if ((fieldString != null) && fieldString.equalsIgnoreCase("")) {
res = new TextField(fieldname, occurrence + ":::1", Field.Store.YES);
}
return res;
}
private final KBModifications action;
private final Map<String, HashMap<String, String>> attributes;
private final String docPrimaryKey;
public UpdateKnowledgeBaseEntryOperator(final String path,
final Analyzer analyzer,
final Map<String, HashMap<String, String>> hash,
final String docPrimaryKey, final KBModifications action) {
super(path, analyzer);
this.attributes = hash;
this.docPrimaryKey = docPrimaryKey;
this.action = action;
}
@Override
public void modifyIndex(final IndexWriter writer,
final IndexSearcher searcher) throws ModifyKnowledgeBaseException {
for (final Map.Entry<String, HashMap<String, String>> entry : this.attributes
.entrySet()) {
final String key = entry.getKey();
final HashMap<String, String> hash = entry.getValue();
final QueryParser qp = new QueryParser(this.docPrimaryKey,
new DoserIDAnalyzer());
try {
final TopDocs top = searcher.search(
qp.parse(QueryParserBase.escape(key)), 1);
final ScoreDoc[] scores = top.scoreDocs;
if (scores.length > 0) {
final Document doc = new Document();
final Document currentDoc = searcher.getIndexReader()
.document(scores[0].doc);
// BugFix create new Document und copy Fields.
final List<IndexableField> fields = currentDoc.getFields();
for (final IndexableField field : fields) {
if (field.stringValue() != null) {
if (field.name().equalsIgnoreCase(docPrimaryKey)) {
doc.add(new StringField(field.name(), field
.stringValue(), Field.Store.YES));
} else {
doc.add(new TextField(field.name(), field
.stringValue(), Field.Store.YES));
}
}
}
final List<Document> docListToAdd = new LinkedList<Document>();
docListToAdd.add(doc);
for (final Map.Entry<String, String> subentry : hash
.entrySet()) {
final IndexableField field = doc.getField(subentry
.getKey());
if (field == null) {
throw new ModifyKnowledgeBaseException(
"UpdateField no found", null);
}
if (this.action.equals(KBModifications.OVERRIDEFIELD)) {
doc.removeFields(subentry.getKey());
String[] newentries = generateSeperatedFieldStrings(subentry
.getValue());
for (int i = 0; i < newentries.length; i++) {
doc.add(new TextField(subentry.getKey(),
newentries[i], Field.Store.YES));
}
} else if (this.action
.equals(KBModifications.UPDATERELATEDLABELS)) {
doc.removeFields(subentry.getKey());
doc.add(updateOccurrences(subentry.getValue(),
field, "surroundinglabels"));
} else if (this.action
.equals(KBModifications.UPDATEOCCURRENCES)) {
doc.removeFields(subentry.getKey());
IndexableField f = updateOccurrences(
subentry.getValue(), field, "occurrences");
doc.add(f);
}
}
writer.updateDocuments(new Term(this.docPrimaryKey, key),
docListToAdd);
} else {
throw new ModifyKnowledgeBaseException(
"Document not found", null);
}
} catch (final IOException e) {
throw new ModifyKnowledgeBaseException(
"IOException in IndexSearcher", e);
} catch (ParseException e) {
throw new ModifyKnowledgeBaseException("Queryparser Exception",
e);
}
}
}
// private void updateCachingOccurrences(int docId,
// IndexableField f) {
// HashMap<Integer, Integer> hash = new HashMap<Integer, Integer>();
// String str = f.stringValue();
// final String[] split = str.split(";;;");
// for (final String element : split) {
// final String[] splitter = element.split(":::");
// int check = 1;
// try {
// check = Integer.valueOf(splitter[1]);
// hash.put(splitter[0].hashCode(), check);
// } catch (final NumberFormatException e) {
// Logger.getRootLogger().error(e.getStackTrace());
// }
// }
// HashMapUpdateInformation updateInfos = new HashMapUpdateInformation(
// UpdateTypes.Occurrences, docId, hash);
// this.setChanged();
// notifyObservers(updateInfos);
// }
/**
* Verschiedene Einträge werden in HTML formulare mit
getrennt
* (neue Zeile). Jede Zeile soll ein eigenes Field im Index darstellen
*
* @param the
* whole string
* @return field content array
*/
private String[] generateSeperatedFieldStrings(String str) {
String[] splitter = str.split("
");
return splitter;
}
}