/* * Carrot2 project. * * Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński. * All rights reserved. * * Refer to the full license file "carrot2.LICENSE" * in the root folder of the repository checkout or at: * http://www.carrot2.org/carrot2.LICENSE */ package org.carrot2.source.pubmed; import java.io.ByteArrayInputStream; import java.util.ArrayList; import org.carrot2.util.tests.CarrotTestCase; import org.junit.Test; import org.xml.sax.InputSource; import org.xml.sax.XMLReader; import org.carrot2.shaded.guava.common.collect.Lists; import org.carrot2.shaded.guava.common.io.Resources; public class PubMedIdSearchHandlerTest extends CarrotTestCase { static ArrayList<String> PMID_SET = Lists.newArrayList( "24470789", "24469991", "24468157", "24463729", "24462788", "24462051", "24458711", "24454330", "24445216", "24445045", "24445044", "24444538", "24438079", "24429541", "24424058", "24404629", "24397951", "24394536", "24388434", "24387274", "24385344", "24380833", "24379686", "24377894", "24374005", "24370063", "24369828", "24368111", "24367585", "24366220", "24364133", "24362355", "24358329", "24346072", "24345049", "24342780", "24340336", "24338487", "24333965", "24333561", "24333157", "24330576", "24325088", "24312343", "24303175", "24298777", "24290571", "24287404", "24283215", "24281397", "24275089", "24267039", "24266602", "24263037", "24259558", "24247152", "24234673", "24229349", "24225358", "24220118", "24219891", "24202306", "24196354", "24194732", "24176230", "24172199", "24171041", "24170386", "24163396", "24163066", "24148349", "24148164", "24140661", "24139973", "24124374", "24123166", "24117264", "24116221", "24107491", "24106600", "24101673", "24098813", "24098551", "24098525", "24084690", "24081993", "24080184", "24075193", "24072533", "24072531", "24069865", "24066597", "24063548", "24060958", "24050894", "24050803", "24047636", "24044965", "24044579", "24036126", "24030551", "24029787", "24019901", "24015695", "24015188", "24014826", "24014675", "24009777", "24005471", "24000103", "23999430", "23999279", "23994198", "23986204", "23984293", "23981691", "23981577", "23974119", "23973955", "23970917", "23964081", "23954796", "23954368", "23948693", "23945609", "23939686", "23939398", "23936994", "23934850", "23929677", "23928364", "23926027", "23923985", "23922128", "23921137", "23913707", "23911141", "23906130", "23902937", "23902780", "23895132", "23894285", "23887640", "23885014", "23884889", "23884883", "23881149", "23876511", "23875703", "23874490"); @Test public void testIdHandler() throws Exception { final XMLReader reader = PubMedDocumentSource.newXmlReader(); PubMedIdSearchHandler searchHandler = new PubMedIdSearchHandler(); reader.setContentHandler(searchHandler); byte [] bytes = Resources.toByteArray(Resources.getResource(PubMedIdSearchHandlerTest.class, "ids.xml")); reader.parse(new InputSource(new ByteArrayInputStream(bytes))); assertThat(searchHandler.getMatchCount()).isEqualTo(4561L); assertThat(searchHandler.getPubMedPrimaryIds()).isEqualTo(PMID_SET); } }