/*
* Copyright (c) 2006-2014 by Public Library of Science
*
* http://plos.org
* http://ambraproject.org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* You may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ambraproject.service.article;
import org.ambraproject.action.BaseTest;
import org.ambraproject.util.DocumentBuilderFactoryCreator;
import org.springframework.beans.factory.annotation.Autowired;
import org.testng.annotations.Test;
import org.w3c.dom.Document;
import java.io.File;
import java.util.AbstractMap;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNull;
import static org.testng.Assert.assertTrue;
/**
* @author Alex Kudlick
* Date: 7/3/12
*/
public class ArticleClassifierTest extends BaseTest {
@Autowired
protected AIArticleClassifier articleClassifier;
@Test
public void testAppendElementIfExists() throws Exception {
Document article = DocumentBuilderFactoryCreator.createFactory()
.newDocumentBuilder().parse(new File(ClassLoader.getSystemResource("articles/pone.0048915.xml").toURI()));
StringBuilder sb = new StringBuilder();
assertFalse(articleClassifier.appendElementIfExists(sb, article, "elementThatShouldntExist"));
assertTrue(sb.toString().isEmpty());
assertTrue(articleClassifier.appendElementIfExists(sb, article, "article-title"));
String s = sb.toString();
assertTrue(s.startsWith("Maternal Deprivation Exacerbates the Response to a High Fat Diet"));
sb = new StringBuilder();
assertTrue(articleClassifier.appendElementIfExists(sb, article, "abstract"));
s = sb.toString().trim();
assertTrue(s.startsWith(
"Maternal deprivation (MD) during neonatal life has diverse long-term effects"));
}
@Test
public void testAppendSectionIfExists() throws Exception {
Document article = DocumentBuilderFactoryCreator.createFactory()
.newDocumentBuilder().parse(new File(ClassLoader.getSystemResource("articles/pone.0048915.xml").toURI()));
StringBuilder sb = new StringBuilder();
assertFalse(articleClassifier.appendSectionIfExists(sb, article, "sectionThatShouldntExist"));
assertTrue(sb.toString().isEmpty());
assertTrue(articleClassifier.appendSectionIfExists(sb, article, "Materials and Methods"));
String s = sb.toString().trim();
assertTrue(s.startsWith("Materials and Methods"), s);
sb = new StringBuilder();
assertTrue(articleClassifier.appendSectionIfExists(sb, article, "Results"));
s = sb.toString().trim();
assertTrue(s.startsWith("Results"), s);
}
@Test
public void testGetCategorizationContent() throws Exception {
// Arbitrary minimum number of characters that we should be sending for categorization.
// This should be longer than the article title.
int threshold = 500;
Document article = DocumentBuilderFactoryCreator.createFactory()
.newDocumentBuilder().parse(new File(ClassLoader.getSystemResource("articles/pone.0048915.xml").toURI()));
String content = articleClassifier.getCategorizationContent(article);
assertTrue(content.length() > threshold);
// Editorial without an abstract, materials/methods, or results section.
article = DocumentBuilderFactoryCreator.createFactory()
.newDocumentBuilder().parse(new File(ClassLoader.getSystemResource("articles/pntd.0001008.xml").toURI()));
content = articleClassifier.getCategorizationContent(article);
assertTrue(content.length() > threshold);
// Research article with non-standard section titles.
article = DocumentBuilderFactoryCreator.createFactory()
.newDocumentBuilder().parse(new File(ClassLoader.getSystemResource("articles/pone.0040598.xml").toURI()));
content = articleClassifier.getCategorizationContent(article);
// Call it good if we have material that's at least twice as long as the abstract.
assertTrue(content.length()
> article.getElementsByTagName("abstract").item(0).getTextContent().length() * 2);
// Article with a very short, one-sentence "TOC" abstract that we don't even
// display in ambra.
article = DocumentBuilderFactoryCreator.createFactory()
.newDocumentBuilder().parse(new File(ClassLoader.getSystemResource("articles/pbio.0020302.xml").toURI()));
content = articleClassifier.getCategorizationContent(article);
assertTrue(content.length() > threshold);
}
@Test
public void testParseVectorElement() throws Exception {
assertEquals(AIArticleClassifier.parseVectorElement(
"<TERM>/Biology and life sciences/Computational biology/Computational neuroscience/Single neuron function|(5) neuron*(5)</TERM>"),
new AbstractMap.SimpleImmutableEntry<String, Integer>(
"/Biology and life sciences/Computational biology/Computational neuroscience/Single neuron function"
,5));
assertEquals(AIArticleClassifier.parseVectorElement(
"<TERM>/Medicine and health sciences/Anesthesiology/Anesthesia|(5) anesthesia(5)</TERM>"),
new AbstractMap.SimpleImmutableEntry<String, Integer>(
"/Medicine and health sciences/Anesthesiology/Anesthesia"
,5));
assertEquals(AIArticleClassifier.parseVectorElement(
"<TERM>/Medicine and health sciences/Geriatrics/Frailty|(19) frailty(18) frail*(1)</TERM>"),
new AbstractMap.SimpleImmutableEntry<String, Integer>(
"/Medicine and health sciences/Geriatrics/Frailty"
,19));
assertEquals(AIArticleClassifier.parseVectorElement(
"<TERM>/Biology and life sciences/Anatomy/Head/Face/Nose|(311) nose(311)</TERM>"),
new AbstractMap.SimpleImmutableEntry<String, Integer>(
"/Biology and life sciences/Anatomy/Head/Face/Nose"
,311));
assertEquals(AIArticleClassifier.parseVectorElement(
"<TERM>/People and places/Demography|(7) demographics(7)</TERM>"),
new AbstractMap.SimpleImmutableEntry<String, Integer>(
"/People and places/Demography"
,7));
assertEquals(AIArticleClassifier.parseVectorElement(
"<TERM>/Medicine and health sciences/Neurology/Cognitive neurology|(2) cognit*(2)</TERM>"),
new AbstractMap.SimpleImmutableEntry<String, Integer>(
"/Medicine and health sciences/Neurology/Cognitive neurology"
,2));
assertEquals(AIArticleClassifier.parseVectorElement(
"<TERM> /Medicine and health sciences/Neurology/Cognitive neurology| (67) cognit*(2)</TERM>"),
new AbstractMap.SimpleImmutableEntry<String, Integer>(
"/Medicine and health sciences/Neurology/Cognitive neurology"
,67));
// This appears to be a bug in the AI server--it sometimes does not return an
// absolute path to a top-level category. In these cases, the returned value
// should be discarded.
assertNull(AIArticleClassifier.parseVectorElement(
"<TERM>Background noise (acoustics)|(1) background noise(1)</TERM>"));
}
}