package eu.dnetlib.iis.wf.ingest.pmc.metadata;
import static eu.dnetlib.iis.wf.ingest.pmc.metadata.AssertExtractedDocumentMetadata.assertAffiliation;
import static eu.dnetlib.iis.wf.ingest.pmc.metadata.AssertExtractedDocumentMetadata.assertAuthor;
import static org.junit.Assert.assertEquals;
import java.io.File;
import java.util.List;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.junit.Before;
import org.junit.Test;
import org.xml.sax.XMLReader;
import com.google.common.collect.Maps;
import eu.dnetlib.iis.ingest.pmc.metadata.schemas.Affiliation;
import eu.dnetlib.iis.ingest.pmc.metadata.schemas.Author;
import eu.dnetlib.iis.ingest.pmc.metadata.schemas.ExtractedDocumentMetadata;
/**
* @author madryk
*/
public class ArticleMetaXmlHandlerTest {
private final static String XML_BASE_PATH = "src/test/resources/eu/dnetlib/iis/wf/ingest/pmc/metadata/data/articlemeta";
private ArticleMetaXmlHandler articleMetaXmlHandler;
private SAXParser saxParser;
private ExtractedDocumentMetadata.Builder metaBuilder;
@Before
public void init() throws Exception {
// initializing sax parser
SAXParserFactory saxFactory = SAXParserFactory.newInstance();
saxFactory.setValidating(false);
saxParser = saxFactory.newSAXParser();
XMLReader reader = saxParser.getXMLReader();
reader.setFeature("http://xml.org/sax/features/validation", false);
reader.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
reader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
// initializing metadata builder with required fields
metaBuilder = ExtractedDocumentMetadata.newBuilder();
metaBuilder.setId("some-id");
metaBuilder.setText("");
metaBuilder.setExternalIdentifiers(Maps.newHashMap());
metaBuilder.setEntityType("");
articleMetaXmlHandler = new ArticleMetaXmlHandler(metaBuilder);
}
//------------------------ TESTS --------------------------
@Test
public void testAuthorsWithXRefAffiliation() throws Exception {
// given
File xmlFile = new File(XML_BASE_PATH + "/authors_with_aff_refs.xml");
// execute
saxParser.parse(xmlFile, articleMetaXmlHandler);
ExtractedDocumentMetadata metadata = metaBuilder.build();
// assert
List<Affiliation> affiliations = metadata.getAffiliations();
assertEquals(2, affiliations.size());
assertAffiliation(affiliations.get(0), "Graduate School of Bioscience and Biotechnology, Tokyo Institute of Technology",
"Nagatsuta-cho, Midori-ku, Yokohama 226-8501", "JP", "Japan",
"Graduate School of Bioscience and Biotechnology, Tokyo Institute of Technology, Nagatsuta-cho, Midori-ku, Yokohama 226-8501, Japan");
assertAffiliation(affiliations.get(1), "Graduate School of Information Science, Nagoya University",
"Furo-cho, Chikusa-ku, Nagoya 464-8601", "JP", "Japan",
"Graduate School of Information Science, Nagoya University, Furo-cho, Chikusa-ku, Nagoya 464-8601, Japan");
List<Author> authors = metadata.getAuthors();
assertEquals(2, authors.size());
assertAuthor(authors.get(0), "Azuma, Yusuke", 0);
assertAuthor(authors.get(1), "Ota, Motonori", 1);
}
@Test
public void testAuthorsWithAffiliationInContrib() throws Exception {
// given
File xmlFile = new File(XML_BASE_PATH + "/authors_with_aff_in_contrib.xml");
// execute
saxParser.parse(xmlFile, articleMetaXmlHandler);
ExtractedDocumentMetadata metadata = metaBuilder.build();
// assert
List<Affiliation> affiliations = metadata.getAffiliations();
assertEquals(2, affiliations.size());
assertAffiliation(affiliations.get(0), "National Center for Biotechnology Information, National Library of Medicine, NIH",
"8600 Rockville Pike, Bethesda, MD", "US", "USA",
"National Center for Biotechnology Information, National Library of Medicine, NIH, 8600 Rockville Pike, Bethesda, MD, USA");
assertAffiliation(affiliations.get(1), "Consolidated Safety Services",
"10335 Democracy Lane, Suite 202, Fairfax, VA", "US", "USA",
"Consolidated Safety Services, 10335 Democracy Lane, Suite 202, Fairfax, VA, USA");
List<Author> authors = metadata.getAuthors();
assertEquals(2, authors.size());
assertAuthor(authors.get(0), "Tanabe, Lorraine", 0);
assertAuthor(authors.get(1), "Thom, Lynne H.", 1);
}
@Test
public void testAuthorsWithAffiliationInContribGroup() throws Exception {
// given
File xmlFile = new File(XML_BASE_PATH + "/authors_with_aff_in_contrib_group.xml");
// execute
saxParser.parse(xmlFile, articleMetaXmlHandler);
ExtractedDocumentMetadata metadata = metaBuilder.build();
// assert
List<Affiliation> affiliations = metadata.getAffiliations();
assertEquals(2, affiliations.size());
assertAffiliation(affiliations.get(0), "National Center for Biotechnology Information, National Library of Medicine, NIH",
"8600 Rockville Pike, Bethesda, MD", "US", "USA",
"National Center for Biotechnology Information, National Library of Medicine, NIH, 8600 Rockville Pike, Bethesda, MD, USA");
assertAffiliation(affiliations.get(1), "Consolidated Safety Services",
"10335 Democracy Lane, Suite 202, Fairfax, VA", "US", "USA",
"Consolidated Safety Services, 10335 Democracy Lane, Suite 202, Fairfax, VA, USA");
List<Author> authors = metadata.getAuthors();
assertEquals(4, authors.size());
assertAuthor(authors.get(0), "Tanabe, Lorraine", 0);
assertAuthor(authors.get(1), "Xie, Natalie", 0);
assertAuthor(authors.get(2), "Thom, Lynne H.", 1);
assertAuthor(authors.get(3), "Matten, Wayne", 1);
}
@Test
public void testAuthorsWithEncodedCharacters() throws Exception {
// given
File xmlFile = new File(XML_BASE_PATH + "/authors_with_encoded_characters.xml");
// execute
saxParser.parse(xmlFile, articleMetaXmlHandler);
ExtractedDocumentMetadata metadata = metaBuilder.build();
// assert
List<Author> authors = metadata.getAuthors();
assertEquals(3, authors.size());
assertAuthor(authors.get(0), "Ramírez-Romero, Miguel A.");
assertAuthor(authors.get(1), "González, Víctor");
assertAuthor(authors.get(2), "Dávila, Guillermo");
}
}