package tbx2rdf.experiments; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.FileReader; import java.io.InputStreamReader; import java.io.OutputStreamWriter; /** * Extracts fragments of IATE per domain * * @author vrodriguez */ public class IATEExtractor { public static void main(String[] args) { String sFile = "D:\\data\\iate\\iate.nt"; String sFile2 = "D:\\data\\iate\\iate1211.nt"; int count=0; int count2=0; try { FileInputStream fis = new FileInputStream(new File(sFile)); FileOutputStream fos = new FileOutputStream(new File(sFile2)); BufferedReader br = new BufferedReader(new InputStreamReader(fis, "UTF-8")); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fos, "UTF-8")); String line; boolean grabando = false; while ((line = br.readLine()) != null) { if (line.contains("<http://tbx2rdf.lider-project.eu/tbx#subjectField> \"1211\"")) { grabando=true; } if (line.contains("<http://tbx2rdf.lider-project.eu/tbx#subjectField>") && !line.contains("<http://tbx2rdf.lider-project.eu/tbx#subjectField> \"1211\"")) { grabando=false; } if (grabando==true) { bw.write(line+"\n"); count2++; } count++; // if (count2==1000) // break; } bw.close(); br.close(); } catch (Exception e) { e.printStackTrace(); } System.out.println(count); System.out.println(count2); } }