import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.IOException; import java.io.InputStreamReader; import java.io.FileReader; import java.io.FileWriter; /** * This class opens a file by a statically set filename in the current working directory. * One line at a time it extracts the text from the file and saves it to corpus.txt * @author Jasmin Suljkic * @author Jonatan Asketorp */ public class smsCorpusToText { public static void main(String[] args) { try { BufferedReader br = new BufferedReader(new FileReader("smsCorpus_en_2014.09.06_all.xml")); BufferedWriter bw = new BufferedWriter(new FileWriter("corpus.txt")); String temp; while((temp=br.readLine())!=null) { String[] text = temp.split("(<text>)|(</text>)"); if(text.length>1) { bw.write(text[1]); bw.newLine(); } } br.close(); bw.close(); }catch(IOException e) { e.printStackTrace(); } } }