package io.lumify.gdelt;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
public class GDELTParser {
private Map<String, Integer> headerMapping = new HashMap<String, Integer>();
public GDELTParser() {
loadHeaderMapping("CSV.header.fieldids.txt");
}
private void loadHeaderMapping(String fileName) {
InputStream is = GDELTParser.class.getResourceAsStream(fileName);
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
String line;
try {
while ((line = reader.readLine()) != null) {
String[] fields = line.split("\\t");
headerMapping.put(fields[0].trim(), Integer.parseInt(fields[1].trim()));
}
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
try {
reader.close();
} catch (IOException e) {
System.err.println("Failed to close reader: " + e.toString());
}
}
}
public GDELTEvent parseLine(String gdeltTabDelimitedLine) throws ParseException {
String[] columns = gdeltTabDelimitedLine.split("\\t");
GDELTEvent event = new GDELTEvent();
List<Method> methods = getGDELTMethods(GDELTEvent.class);
for (Method method : methods) {
method.setAccessible(true);
GDELTField annotation = method.getAnnotation(GDELTField.class);
int index = headerMapping.get(annotation.name());
if (index > columns.length - 1) {
throw new ParseException("Annotated field index is beyond source column array index", index);
}
String column = columns[index];
if (column == null || column.trim().equals("")) {
if (annotation.required()) {
throw new ParseException(annotation.name() + " field is required", index);
}
continue;
}
column = column.trim();
Class type = method.getParameterTypes()[0];
try {
if (type.equals(String.class)) {
method.invoke(event, column);
} else if (type.equals(Date.class)) {
String formatString = annotation.dateFormat();
if (formatString == null) {
throw new ParseException("Date type requires dataFormat annotation parameter", index);
}
Date date = new SimpleDateFormat(formatString).parse(column);
method.invoke(event, date);
} else if (type.equals(Integer.class) || type.equals(Integer.TYPE)) {
method.invoke(event, Integer.parseInt(column));
} else if (type.equals(Double.class) || type.equals(Double.TYPE)) {
method.invoke(event, Double.parseDouble(column));
} else if (type.equals(Boolean.class) || type.equals(Boolean.TYPE)) {
method.invoke(event, column.equals("1"));
} else {
throw new ParseException(type + " is not supported", index);
}
} catch (InvocationTargetException e) {
throw new RuntimeException(e);
} catch (IllegalAccessException e) {
throw new RuntimeException(e);
}
}
return event;
}
private List<Method> getGDELTMethods(Class<GDELTEvent> clazz) {
List<Method> gdeltMethods = new ArrayList<Method>();
Method[] methods = clazz.getMethods();
for (Method method : methods) {
if (method.getAnnotation(GDELTField.class) != null) {
gdeltMethods.add(method);
}
}
return gdeltMethods;
}
}