package io.lumify.gdelt;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@RunWith(JUnit4.class)
public class GDELTParserTest {
private GDELTParser parser;
private GDELTEvent event;
@Before
public void setUp() throws ParseException {
this.parser = new GDELTParser();
this.event = this.parser.parseLine("303291233\t20040703\t200407\t2004\t2004.5014\tEDU\tSTUDENT\tUSA\tALQ\tara\tALE\tHIN\tEDU\tRAD\tMOD\tBUS\tCOMPANY\tUSA\tALT\tart\tALA\tHIT\tBUS\tDAR\tDOM\t1\t040\t040\t04\t1\t1.0\t5\t1\t5\t2.24358974358974\t5\tStaffordshire, Staffordshire, United Kingdom\tUK\tUKM9\t52.8333\t-2\t-2608511\t6\tLondon, United Kingdom\tUK2\tUKM92\t62.8333\t-3\t-3608511\t7\tChelsea, United Kingdom\tUK3\tUKM93\t72.8333\t-4\t-4608511\t20140701\thttp://www.thisismoney.co.uk/news/article-2675900/Student-loans-firm-shamed-axing-fake-legal-threats-Company-admits-sending-300-000-graduates-letters-past-decade.html?ITO=1490&ns_campaign=1490/RK=0");
}
@Test
public void testParseEventDataFields() throws Exception {
assertEquals("303291233", event.getGlobalEventId());
assertEquals(new SimpleDateFormat("yyyyMMdd").parse("20040703"), event.getDateOfOccurrence());
assertEquals("EDU", event.getActor1Code());
assertEquals("STUDENT", event.getActor1Name());
assertEquals("USA", event.getActor1CountryCode());
assertEquals("ALQ", event.getActor1KnownGroupCode());
assertEquals("ara", event.getActor1EthnicCode());
assertEquals("ALE", event.getActor1Religion1Code());
assertEquals("HIN", event.getActor1Religion2Code());
assertEquals("EDU", event.getActor1Type1Code());
assertEquals("RAD", event.getActor1Type2Code());
assertEquals("MOD", event.getActor1Type3Code());
assertEquals("BUS", event.getActor2Code());
assertEquals("COMPANY", event.getActor2Name());
assertEquals("USA", event.getActor2CountryCode());
assertEquals("ALT", event.getActor2KnownGroupCode());
assertEquals("art", event.getActor2EthnicCode());
assertEquals("ALA", event.getActor2Religion1Code());
assertEquals("HIT", event.getActor2Religion2Code());
assertEquals("BUS", event.getActor2Type1Code());
assertEquals("DAR", event.getActor2Type2Code());
assertEquals("DOM", event.getActor2Type3Code());
assertTrue(event.isRootEvent());
assertEquals("040", event.getEventCode());
assertEquals("040", event.getEventBaseCode());
assertEquals("04", event.getEventRootCode());
assertEquals(1, event.getQuadClass());
assertEquals(1.0f, event.getGoldsteinScale(), 0.01);
assertEquals(5, event.getNumMentions());
assertEquals(1, event.getNumSources());
assertEquals(5, event.getNumArticles());
assertEquals(2.24358974358974, event.getAverageTone(), 0.01);
assertEquals(5, event.getActor1GeoType());
assertEquals("Staffordshire, Staffordshire, United Kingdom", event.getActor1GeoFullName());
assertEquals("UK", event.getActor1GeoCountryCode());
assertEquals("UKM9", event.getActor1GeoADM1Code());
assertEquals(52.8333, event.getActor1GeoLatitude(), 0.01);
assertEquals(-2, event.getActor1GeoLongitude(), 0.01);
assertEquals("-2608511", event.getActor1GeoFeatureId());
assertEquals(6, event.getActor2GeoType());
assertEquals("London, United Kingdom", event.getActor2GeoFullName());
assertEquals("UK2", event.getActor2GeoCountryCode());
assertEquals("UKM92", event.getActor2GeoADM1Code());
assertEquals(62.8333, event.getActor2GeoLatitude(), 0.01);
assertEquals(-3, event.getActor2GeoLongitude(), 0.01);
assertEquals("-3608511", event.getActor2GeoFeatureId());
assertEquals(7, event.getActionGeoType());
assertEquals("Chelsea, United Kingdom", event.getActionGeoFullName());
assertEquals("UK3", event.getActionGeoCountryCode());
assertEquals("UKM93", event.getActionGeoADM1Code());
assertEquals(72.8333, event.getActionGeoLatitude(), 0.01);
assertEquals(-4, event.getActionGeoLongitude(), 0.01);
assertEquals("-4608511", event.getActionGeoFeatureId());
assertEquals(new SimpleDateFormat("yyyyMMdd").parse("20140701"), event.getDateAdded());
assertEquals("http://www.thisismoney.co.uk/news/article-2675900/Student-loans-firm-shamed-axing-fake-legal-threats-Company-admits-sending-300-000-graduates-letters-past-decade.html?ITO=1490&ns_campaign=1490/RK=0", event.getSourceUrl());
}
@Test
public void testActor1() {
GDELTActor actor = event.getActor1();
assertEquals("EDU", actor.getCode());
assertEquals("STUDENT", actor.getName());
assertEquals("USA", actor.getCountryCode());
assertEquals("ALQ", actor.getKnownGroupCode());
assertEquals("ara", actor.getEthnicCode());
assertEquals("ALE", actor.getReligion1Code());
assertEquals("HIN", actor.getReligion2Code());
assertEquals("EDU", actor.getType1Code());
assertEquals("RAD", actor.getType2Code());
assertEquals("MOD", actor.getType3Code());
}
@Test
public void testActor2() {
GDELTActor actor = event.getActor2();
assertEquals("BUS", actor.getCode());
assertEquals("COMPANY", actor.getName());
assertEquals("USA", actor.getCountryCode());
assertEquals("ALT", actor.getKnownGroupCode());
assertEquals("art", actor.getEthnicCode());
assertEquals("ALA", actor.getReligion1Code());
assertEquals("HIT", actor.getReligion2Code());
assertEquals("BUS", actor.getType1Code());
assertEquals("DAR", actor.getType2Code());
assertEquals("DOM", actor.getType3Code());
}
@Ignore // Remove the annotation when making changes to parsing code. The test takes a long time to run.
@Test
public void testParsingLargeFile() throws ParseException, IOException {
InputStream is = GDELTParserTest.class.getResourceAsStream("20140701.export.txt");
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
String line;
try {
while ((line = reader.readLine()) != null) {
parser.parseLine(line);
}
} finally {
try {
reader.close();
} catch (IOException e) {
System.err.println("Failed to close reader: " + e.toString());
}
}
}
}