package com.berryworks.edireader.util;
import java.io.*;
public class HtmlExtractCodes implements Runnable {
public static final String H1 = "<h1>";
public static final String TR_TD = "<tr><td align=right valign=top><b>";
public static final String TD = "<td>";
private final BufferedReader reader;
private final PrintStream output;
public HtmlExtractCodes(File file, PrintStream output) {
if (!file.exists())
throw new RuntimeException("Cannot find " + file.getName());
try {
reader = new BufferedReader(new FileReader(file));
} catch (FileNotFoundException e) {
throw new RuntimeException("Cannot read " + file.getName());
}
this.output = output;
}
@Override
public void run() {
String codename = "unknown";
String line;
try {
while ((line = reader.readLine()) != null) {
if (line.startsWith(H1)) {
codename = line.substring(H1.length());
codename = codename.substring(0, codename.indexOf('-')).trim();
continue;
}
if (!line.startsWith(TR_TD)) continue;
final int startIndex = TR_TD.length();
final int endIndex = line.indexOf('<', startIndex);
final String particularCodeValue = line.substring(startIndex, endIndex);
String line2 = reader.readLine();
int i = line2.indexOf(TD);
int startOfDescription = i + TD.length();
int endOfDescription = line2.indexOf('<', startOfDescription);
while (endOfDescription == -1) {
String continuedDescription = reader.readLine();
line2 += continuedDescription;
endOfDescription = line2.indexOf('<', startOfDescription);
}
final String description = line2.substring(startOfDescription, endOfDescription).trim();
final String text = codename + "|" + particularCodeValue + "|" + description + "|";
output.println(text);
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String args[]) {
final String directoryName = "/Users/mayberry/Documents/X12_February_2009/X12_February_2009/de/";
File directory = new File(directoryName);
if (!(directory.exists() && directory.isDirectory())) {
throw new RuntimeException("Cannot find directoryName " + directoryName);
}
File htmlFiles[] = directory.listFiles(new FilenameFilter() {
public boolean accept(File directory, String name) {
return name.endsWith(".HTM");
}
});
final PrintStream printStream;
try {
printStream = new PrintStream(new FileOutputStream(new File("codes.tmp")));
} catch (FileNotFoundException e) {
throw new RuntimeException("Unable to write to output file");
}
for (File htmlFile : htmlFiles) {
new HtmlExtractCodes(htmlFile, printStream).run();
}
}
}