package org.wikibrain.wikidata; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.time.DateUtils; import org.junit.Test; import org.wikibrain.core.lang.Language; import org.wikibrain.core.lang.LanguageInfo; import org.wikibrain.core.model.RawPage; import org.wikibrain.parser.WpParseException; import org.wikibrain.parser.xml.PageXmlParser; import org.wikibrain.utils.WpIOUtils; import java.io.*; import java.text.ParseException; import java.util.Date; import static org.junit.Assert.*; /** * @author Shilad Sen */ public class TestWikidataParser { @Test public void testWikidataRawRecord() throws IOException, WpParseException { String json = WpIOUtils.resourceToString("/testPage.json"); WikidataParser parser = new WikidataParser(); WikidataEntity entity = parser.parse(json); assertEquals(entity.getType(), WikidataEntity.Type.ITEM); assertEquals(entity.getId(), 157); assertEquals(entity.getLabels().get(Language.ES), "Fran\u00e7ois Hollande"); assertEquals(entity.getDescriptions().get(Language.EN), "24th President of the French Republic"); WikidataStatement stm = entity.getStatements().get(0); assertEquals(stm.getProperty().getId(), 40); assertEquals(stm.getValue().getType(), WikidataValue.Type.ITEM); assertEquals(stm.getValue().getItemValue(), 16783695); } @Test public void testDump() throws IOException, WpParseException { File tmp = File.createTempFile("wikibrain", "dump.json.bz2"); try { tmp.deleteOnExit(); InputStream in = TestWikidataParser.class.getResourceAsStream("/testDump.json.bz2"); OutputStream out = new FileOutputStream(tmp); IOUtils.copy(in, out); IOUtils.closeQuietly(in); IOUtils.closeQuietly(out); WikidataDumpParser parser = new WikidataDumpParser(tmp); int numItems = 0; int numProperties = 0; for (WikidataEntity record : parser) { if (record.getType() == WikidataEntity.Type.ITEM) { numItems++; } else if (record.getType() == WikidataEntity.Type.PROPERTY) { numProperties++; } } assertEquals(400, numItems); assertEquals(1304, numProperties); } finally { tmp.delete(); } } }