package org.wikibrain.wikidata; import com.google.gson.JsonElement; import com.google.gson.JsonParser; import org.mockito.Mockito; import gnu.trove.map.TIntIntMap; import gnu.trove.map.hash.TIntIntHashMap; import gnu.trove.set.TIntSet; import gnu.trove.set.hash.TIntHashSet; import org.apache.commons.collections.IteratorUtils; import org.apache.commons.io.FileUtils; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import org.wikibrain.core.dao.DaoException; import org.wikibrain.core.dao.MetaInfoDao; import org.wikibrain.core.dao.UniversalPageDao; import org.wikibrain.core.dao.sql.MetaInfoSqlDao; import org.wikibrain.core.dao.sql.TestDaoUtil; import org.wikibrain.core.dao.sql.WpDataSource; import org.wikibrain.core.lang.Language; import org.wikibrain.core.lang.LanguageSet; import org.wikibrain.utils.WpIOUtils; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.net.URISyntaxException; import java.net.URL; import java.util.*; import static org.junit.Assert.*; /** * @author Shilad Sen */ public class TestWikidataDao { public static Language EN = Language.getByLangCode("en"); public static File cacheFile; public static File dbDir; @BeforeClass public static void createDb() throws IOException, DaoException, ClassNotFoundException, URISyntaxException { dbDir = File.createTempFile("dao", "cache"); dbDir.delete(); dbDir.mkdirs(); cacheFile = File.createTempFile("dao", "cache"); cacheFile.delete(); cacheFile.mkdirs(); // Add all the entity ids we need as values for SOME key URL url = TestWikidataDao.class.getResource("/testDump.json.bz2"); TIntIntMap map = new TIntIntHashMap(); BufferedReader reader = WpIOUtils.openBufferedReader(new File(url.toURI())); while (true) { String line = reader.readLine(); if (line == null) { break; } if (!line.contains("{")) { continue; } line = line.trim(); if (line.endsWith(",")) { line = line.substring(0, line.length()-1); } JsonElement element = new JsonParser().parse(line.trim()); String id = element.getAsJsonObject().get("id").getAsString(); if (id.startsWith("Q")) { int i = Integer.valueOf(id.substring(1)); map.put(i,i); } } reader.close(); Map<Language, TIntIntMap> concepts = new HashMap<Language, TIntIntMap>(); for (Language lang : LanguageSet.ALL){ concepts.put(lang,map); } UniversalPageDao upDao = Mockito.mock(UniversalPageDao.class); Mockito.when(upDao.getAllUnivToLocalIdsMap(LanguageSet.ALL)).thenReturn(concepts); WpDataSource ds = TestDaoUtil.getWpDataSource(dbDir); MetaInfoDao md = new MetaInfoSqlDao(ds); md.beginLoad(); WikidataSqlDao wd = new WikidataSqlDao(ds, null, null); wd.beginLoad(); WikidataDumpLoader loader = new WikidataDumpLoader(wd, md, upDao, LanguageSet.ALL); loader.load(new File(url.toURI())); wd.endLoad(); md.endLoad(); } @AfterClass public static void deleteDb() throws IOException { FileUtils.deleteDirectory(dbDir); FileUtils.deleteDirectory(cacheFile); } @Test public void testProps() throws DaoException, IOException, ClassNotFoundException { WpDataSource ds = TestDaoUtil.getWpDataSource(dbDir); WikidataDao wd = new WikidataSqlDao(ds, null, null); Map<Integer, WikidataEntity> props = wd.getProperties(); assertEquals(props.size(), 1304); assertTrue(props.containsKey(127)); WikidataEntity entity = wd.getProperty(127); assertEquals(127, entity.getId()); assertEquals(WikidataEntity.Type.PROPERTY, entity.getType()); assertEquals("owned by", entity.getLabels().get(EN)); assertEquals("propietario", entity.getLabels().get(Language.ES)); assertEquals("owner of the subject", entity.getDescriptions().get(EN)); assertTrue(entity.getAliases().get(Language.getByLangCode("cs")).contains("majitel")); assertEquals(0, entity.getStatements().size()); } @Test public void testItem() throws DaoException, IOException, ClassNotFoundException { WpDataSource ds = TestDaoUtil.getWpDataSource(dbDir); WikidataDao wd = new WikidataSqlDao(ds, null, null); WikidataEntity entity = wd.getItem(23); assertEquals(23, entity.getId()); assertEquals(WikidataEntity.Type.ITEM, entity.getType()); assertEquals("George Washington", entity.getLabels().get(Language.getByLangCode("en"))); assertEquals("\u0414\u0436\u043e\u0440\u0434\u0436 \u0412\u0430\u0448\u0438\u043d\u0433\u0442\u043e\u043d", entity.getLabels().get(Language.getByLangCode("ru"))); assertEquals("American politician, 1st president of the United States (in office from 1789 to 1797)", entity.getDescriptions().get(Language.getByLangCode("en"))); assertTrue(entity.getAliases().get(Language.getByLangCode("ta")).contains("\u0b9c\u0bcb\u0bb0\u0bcd\u0b9c\u0bcd \u0bb5\u0bca\u0bb7\u0bbf\u0b99\u0bcd\u0b9f\u0ba9\u0bcd")); assertEquals(67, entity.getStatements().size()); Map<String, List<WikidataStatement>> statements = entity.getStatementsInLanguage(Language.getByLangCode("en")); assertEquals(2, statements.get("award received").size()); TIntSet ids = new TIntHashSet(); for (WikidataStatement st : statements.get("award received")) { assertEquals(166, st.getProperty().getId()); assertEquals("award received", st.getProperty().getLabels().get(EN)); assertEquals(WikidataValue.Type.ITEM, st.getValue().getType()); ids.add(st.getValue().getItemValue()); } assertEquals(new TIntHashSet(new int[] {3519573, 721743}), ids); } @Test public void testLocalStatements() throws DaoException, IOException, ClassNotFoundException { WpDataSource ds = TestDaoUtil.getWpDataSource(dbDir); WikidataDao wd = new WikidataSqlDao(ds, null, null); Map<String, List<LocalWikidataStatement>> statements = wd.getLocalStatements(EN, WikidataEntity.Type.ITEM, 23); assertEquals(57, statements.keySet().size()); for (String prop : statements.keySet()) { System.out.println("property " + prop + " has statements:"); for (LocalWikidataStatement st : statements.get(prop)) { System.out.println("\t" + st.getFullStatement()); } } List<LocalWikidataStatement> occupations = statements.get("occupation"); assertEquals(4, occupations.size()); for (LocalWikidataStatement lws : occupations) { assertEquals("George Washington occupation unknown", lws.getFullStatement()); } } @Test public void testSearchForValue() throws Exception { WpDataSource ds = TestDaoUtil.getWpDataSource(dbDir); WikidataDao wd = new WikidataSqlDao(ds, null, null); List<WikidataStatement> stats = IteratorUtils.toList( wd.get(new WikidataFilter.Builder().withValue(WikidataValue.forString("11928912p")).build() ).iterator()); assertEquals(1, stats.size()); stats = IteratorUtils.toList(wd.getByValue("BnF identifier", WikidataValue.forString("11928912p")).iterator()); assertEquals(1, stats.size()); stats = IteratorUtils.toList(wd.getByValue(wd.getProperty(268), WikidataValue.forString("11928912p")).iterator()); assertEquals(1, stats.size()); stats = IteratorUtils.toList( wd.get(new WikidataFilter.Builder().withValue(WikidataValue.forItem(142)).build() ).iterator()); assertEquals(34, stats.size()); stats = IteratorUtils.toList(wd.getByValue("country of citizenship", WikidataValue.forItem(142)).iterator()); assertEquals(6, stats.size()); } @Test public void testGeoCoordinates() throws Exception { WpDataSource ds = TestDaoUtil.getWpDataSource(dbDir); WikidataDao wd = new WikidataSqlDao(ds, null, null); WikidataFilter filter = (new WikidataFilter.Builder()).withPropertyId(625).build(); List<WikidataStatement> stats = IteratorUtils.toList(wd.get(filter).iterator()); assertEquals(190, stats.size()); } }