/* Copyright 2012 Tim Garrett, Mothsoft LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.mothsoft.alexis.engine.predictive;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.LinkedHashMap;
import java.util.Map;
import com.mothsoft.alexis.domain.Document;
import com.mothsoft.alexis.domain.DocumentAssociation;
import com.mothsoft.alexis.domain.DocumentNamedEntity;
import com.mothsoft.alexis.domain.DocumentTerm;
public class OpenNLPMaxentContextBuilder {
private static final String HOUR_OF_DAY_FORMAT = "HOUR_OF_DAY=%d";
private static final String QUARTER_OF_HOUR_FORMAT = "QTR_OF_HOUR=%d";
private static final String DAY_OF_WEEK_FORMAT = "DAY_OF_WEEK=%d";
private static final String DAY_OF_MONTH_FORMAT = "DAY_OF_MONTH=%d";
private static final String ASSOC_FORMAT = "%s:%s";
/** Build a context map from a document */
public static Map<String, Integer> buildContext(final Document document) {
final Map<String, Integer> contextMap = new LinkedHashMap<String, Integer>(512);
for (final DocumentTerm dt : document.getDocumentTerms()) {
final String value = dt.getTerm().getValueLowercase();
putAndIncrement(contextMap, value, dt.getCount());
}
for (final DocumentAssociation association : document.getDocumentAssociations()) {
final String value = String.format(ASSOC_FORMAT, association.getA().getValueLowercase(), association.getB()
.getValueLowercase());
putAndIncrement(contextMap, value, association.getAssociationCount());
}
for (final DocumentNamedEntity name : document.getNamedEntities()) {
final String value = name.getName();
putAndIncrement(contextMap, value, name.getCount());
}
// build time-oriented context features
final GregorianCalendar calendar = new GregorianCalendar();
calendar.setTime(document.getCreationDate());
final String hourOfDay = String.format(HOUR_OF_DAY_FORMAT, calendar.get(Calendar.HOUR_OF_DAY));
putAndIncrement(contextMap, hourOfDay, 1);
final String quarterOfHour = String.format(QUARTER_OF_HOUR_FORMAT,
getQuarterOfHour(calendar.get(Calendar.MINUTE)));
putAndIncrement(contextMap, quarterOfHour, 1);
final String dayOfWeek = String.format(DAY_OF_WEEK_FORMAT, calendar.get(Calendar.DAY_OF_WEEK));
putAndIncrement(contextMap, dayOfWeek, 1);
final String dayOfMonth = String.format(DAY_OF_MONTH_FORMAT, calendar.get(Calendar.DAY_OF_MONTH));
putAndIncrement(contextMap, dayOfMonth, 1);
return contextMap;
}
/**
* Qtr of hour: [0, 14] = 0, [15, 29] = 1, [30, 44] = 2, [45, 59] = 3
*
* @param i
* @return
*/
private static int getQuarterOfHour(final int minute) {
if (minute <= 14) {
return 0;
} else if (minute <= 29) {
return 1;
} else if (minute <= 44) {
return 2;
} else {
return 3;
}
}
/**
* Append the context from 'document' to the supplied context map
* 'contextMap'
*/
public static void append(final Map<String, Integer> contextMap, final Document document) {
final Map<String, Integer> newMap = buildContext(document);
for (final Map.Entry<String, Integer> newEntry : newMap.entrySet()) {
final String key = newEntry.getKey();
final Integer value = newEntry.getValue();
putAndIncrement(contextMap, key, value);
}
}
public static void buildContextArrays(final Map<String, Integer> contextMap, final String[] context,
final float[] values) {
int i = 0;
for (final Map.Entry<String, Integer> ith : contextMap.entrySet()) {
context[i] = ith.getKey();
values[i] = ith.getValue();
i++;
}
}
private static void putAndIncrement(final Map<String, Integer> map, final String value, final int count) {
int newCount = count;
if (map.containsKey(value)) {
newCount += map.get(value);
}
map.put(value, newCount);
}
}