package mil.nga.giat.geowave.format.gdelt;
import java.io.File;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import org.apache.commons.lang3.tuple.Pair;
import org.geotools.feature.AttributeTypeBuilder;
import org.geotools.feature.simple.SimpleFeatureTypeBuilder;
import org.opengis.feature.simple.SimpleFeatureType;
import com.vividsolutions.jts.geom.Point;
/**
* This is a convenience class for performing common GDELT static utility
* methods such as schema validation, file parsing, and SimpleFeatureType
* definition.
*/
public class GDELTUtils
{
private static final ThreadLocal<DateFormat> dateFormat = new ThreadLocal<DateFormat>() {
@Override
protected DateFormat initialValue() {
return new SimpleDateFormat(
"yyyyMMdd");
}
};
public static Date parseDate(
final String source )
throws ParseException {
return dateFormat.get().parse(
source);
}
public static final int GDELT_MIN_COLUMNS = 57;
public static final int GDELT_MAX_COLUMNS = 58;
public static final String GDELT_EVENT_FEATURE = "gdeltevent";
// "Core" fields
public static final String GDELT_GEOMETRY_ATTRIBUTE = "geometry";
public static final String GDELT_EVENT_ID_ATTRIBUTE = "eventid";
public static final int GDELT_EVENT_ID_COLUMN_ID = 0;
public static final String GDELT_TIMESTAMP_ATTRIBUTE = "Timestamp";
public static final int GDELT_TIMESTAMP_COLUMN_ID = 1;
public static final String GDELT_LATITUDE_ATTRIBUTE = "Latitude";
public static final String GDELT_LONGITUDE_ATTRIBUTE = "Longitude";
public static final int GDELT_ACTION_GEO_TYPE_COLUMN_ID = 49;
private static final int GDELT_ACTION_LATITUDE_COLUMN_ID = 53;
private static final int GDELT_ACTION_LONGITUDE_COLUMN_ID = 54;
public static final String ACTOR_1_NAME_ATTRIBUTE = "actor1Name";
public static final int ACTOR_1_NAME_COLUMN_ID = 6;
public static final String ACTOR_2_NAME_ATTRIBUTE = "actor2Name";
public static final int ACTOR_2_NAME_COLUMN_ID = 16;
public static final String ACTION_COUNTRY_CODE_ATTRIBUTE = "countryCode";
public static final int ACTION_COUNTRY_CODE_COLUMN_ID = 51;
public static final String SOURCE_URL_ATTRIBUTE = "sourceUrl";
public static final int SOURCE_URL_COLUMN_ID = 57;
// "Supplemental" fields
public static final String ACTOR_1_COUNTRY_CODE_ATTRIBUTE = "actor1CountryCode";
public static final int ACTOR_1_COUNTRY_CODE_COLUMN_ID = 37;
public static final String ACTOR_2_COUNTRY_CODE_ATTRIBUTE = "actor2CountryCode";
public static final int ACTOR_2_COUNTRY_CODE_COLUMN_ID = 44;
public static final String NUM_MENTIONS_ATTRIBUTE = "numMentions";
public static final int NUM_MENTIONS_COLUMN_ID = 31;
public static final String NUM_SOURCES_ATTRIBUTE = "numSources";
public static final int NUM_SOURCES_COLUMN_ID = 32;
public static final String NUM_ARTICLES_ATTRIBUTE = "numArticles";
public static final int NUM_ARTICLES_COLUMN_ID = 33;
public static final String AVG_TONE_ATTRIBUTE = "avgTone";
public static final int AVG_TONE_COLUMN_ID = 34;
public static SimpleFeatureType createGDELTEventDataType(
final boolean includeSupplementalFields ) {
final SimpleFeatureTypeBuilder simpleFeatureTypeBuilder = new SimpleFeatureTypeBuilder();
simpleFeatureTypeBuilder.setName(GDELT_EVENT_FEATURE);
final AttributeTypeBuilder attributeTypeBuilder = new AttributeTypeBuilder();
simpleFeatureTypeBuilder.add(attributeTypeBuilder.binding(
Point.class).nillable(
false).buildDescriptor(
GDELT_GEOMETRY_ATTRIBUTE));
simpleFeatureTypeBuilder.add(attributeTypeBuilder.binding(
Integer.class).nillable(
false).buildDescriptor(
GDELT_EVENT_ID_ATTRIBUTE));
simpleFeatureTypeBuilder.add(attributeTypeBuilder.binding(
Date.class).nillable(
false).buildDescriptor(
GDELT_TIMESTAMP_ATTRIBUTE));
simpleFeatureTypeBuilder.add(attributeTypeBuilder.binding(
Double.class).nillable(
false).buildDescriptor(
GDELT_LATITUDE_ATTRIBUTE));
simpleFeatureTypeBuilder.add(attributeTypeBuilder.binding(
Double.class).nillable(
false).buildDescriptor(
GDELT_LONGITUDE_ATTRIBUTE));
simpleFeatureTypeBuilder.add(attributeTypeBuilder.binding(
String.class).nillable(
true).buildDescriptor(
ACTOR_1_NAME_ATTRIBUTE));
simpleFeatureTypeBuilder.add(attributeTypeBuilder.binding(
String.class).nillable(
true).buildDescriptor(
ACTOR_2_NAME_ATTRIBUTE));
simpleFeatureTypeBuilder.add(attributeTypeBuilder.binding(
String.class).nillable(
true).buildDescriptor(
ACTION_COUNTRY_CODE_ATTRIBUTE));
simpleFeatureTypeBuilder.add(attributeTypeBuilder.binding(
String.class).nillable(
true).buildDescriptor(
SOURCE_URL_ATTRIBUTE));
if (includeSupplementalFields) {
simpleFeatureTypeBuilder.add(attributeTypeBuilder.binding(
String.class).nillable(
true).buildDescriptor(
ACTOR_1_COUNTRY_CODE_ATTRIBUTE));
simpleFeatureTypeBuilder.add(attributeTypeBuilder.binding(
String.class).nillable(
true).buildDescriptor(
ACTOR_2_COUNTRY_CODE_ATTRIBUTE));
simpleFeatureTypeBuilder.add(attributeTypeBuilder.binding(
Integer.class).nillable(
false).buildDescriptor(
NUM_MENTIONS_ATTRIBUTE));
simpleFeatureTypeBuilder.add(attributeTypeBuilder.binding(
Integer.class).nillable(
false).buildDescriptor(
NUM_SOURCES_ATTRIBUTE));
simpleFeatureTypeBuilder.add(attributeTypeBuilder.binding(
Integer.class).nillable(
false).buildDescriptor(
NUM_ARTICLES_ATTRIBUTE));
simpleFeatureTypeBuilder.add(attributeTypeBuilder.binding(
Double.class).nillable(
false).buildDescriptor(
AVG_TONE_ATTRIBUTE));
}
return simpleFeatureTypeBuilder.buildFeatureType();
}
public static Pair<Double, Double> parseLatLon(
final String[] vals ) {
final String latString = vals[GDELTUtils.GDELT_ACTION_LATITUDE_COLUMN_ID];
final String lonString = vals[GDELTUtils.GDELT_ACTION_LONGITUDE_COLUMN_ID];
if ((latString == null) || (lonString == null) || latString.trim().isEmpty() || lonString.trim().isEmpty()) {
return null;
}
final Double lat = Double.parseDouble(latString);
final Double lon = Double.parseDouble(lonString);
return Pair.of(
lat,
lon);
}
public static boolean validate(
final File file ) {
return file.getName().toLowerCase(
Locale.ENGLISH).matches(
"\\d{8}\\.export\\.csv\\.zip") || file.getName().toLowerCase(
Locale.ENGLISH).matches(
"\\d{4,6}\\.zip");
}
}