/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package di.uniba.it.tri.extractor; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.StringReader; /** * Extract textual content from a paper in the AAN corpus * * @author pierpaolo */ public class AANExtractor implements Extractor { @Override public StringReader extract(File txtfile) throws IOException { BufferedReader reader = new BufferedReader(new FileReader(txtfile)); StringBuilder sb = new StringBuilder(); boolean exit = false; while (reader.ready() && !exit) { String line = reader.readLine(); if (line.contains("References") || line.contains("Bibliography")) { exit = true; } else { sb.append(line).append("\n"); } } reader.close(); return new StringReader(sb.toString()); } }