/*
* This file is part of CoAnSys project.
* Copyright (c) 2012-2015 ICM-UW
*
* CoAnSys is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* CoAnSys is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with CoAnSys. If not, see <http://www.gnu.org/licenses/>.
*/
package pl.edu.icm.coansys.nlmextraction;
import java.io.File;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
import org.testng.annotations.Test;
import pl.edu.icm.coansys.models.DocumentProtos;
import pl.edu.icm.coansys.models.constants.ProtoConstants;
import pl.edu.icm.oozierunner.OozieRunner;
public class TestIT {
private static final String TITLE_STRING = "<article-title>Eco-friendly methods of protecting flax against weeds</article-title>";
@Test
public void testTest1() throws Exception {
OozieRunner or = new OozieRunner();
File workflowOutputData = or.run();
assertTrue(workflowOutputData.exists());
assertTrue(workflowOutputData.isDirectory());
assertTrue(workflowOutputData.listFiles().length > 0);
int records = 0;
for (File f : FileUtils.listFiles(workflowOutputData, null, true)) {
if (f.isFile() && f.getName().startsWith("part-")) {
Configuration conf = new Configuration();
Path path = new Path("file://" + f.getAbsolutePath());
SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(path));
Text key = new Text();
BytesWritable value = new BytesWritable();
while (reader.next(key, value)) {
DocumentProtos.Media media = DocumentProtos.Media.parseFrom(value.copyBytes());
assertEquals(media.getMediaType(), ProtoConstants.mediaTypeNlm);
String nlmString = media.getContent().toStringUtf8();
assertTrue(nlmString.contains(TITLE_STRING));
records++;
}
reader.close();
}
}
assertTrue(records > 0);
}
}