package plume; import java.io.*; import java.util.*; import java.util.regex.*; import java.nio.CharBuffer; /** * Clean a BibTeX file by removing text outside BibTeX entries. * * Remove each non-empty line that is not in a BibTeX entry, except retain * any line that starts with "%". * * Arguments are the names of the original files. Cleaned copies of those * files are written in the CURRENT DIRECTORY. Therefore, this should be * run in a different directory from where the argument files are, to avoid * overwriting them. */ // The implementation uses regular expressions rather than a BibTeX parser, // because BibTeX parsers generally do not preserve formatting, such as // indentation, delimiter characters, and order of fields. And, the ones I // looked at were not very well-documented. // The implementation cannot use EntryReader to iterate through the file // because the @ line does not necessarily follow a blank line -- there // might be a comment line before it. But, EntryReader requires that its // "long entries" start after a blank line. (That can be considered an // EntryReader bug, or at least inflexibility in its interface.) public class BibtexClean { private static Pattern entry_end = Pattern.compile("^[ \t]*(?i)(year[ \t]*=[ \t]*[12][0-9][0-9][0-9][ \t]*)?[)}]"); private static Pattern stringDef = Pattern.compile("^@(?i)string(\\{.*\\}|\\(.*\\))$"); public static void main(String[] args) { for (String filename : args) { File in = new File(filename); PrintWriter out; try { out = new PrintWriter(UtilMDE.bufferedFileWriter(in.getName())); // in current directory } catch (IOException e) { System.err.println("Unable to write " + in.getName()); System.exit(2); throw new Error("This can't happen"); // for definite assignment check } EntryReader er; try { er = new EntryReader(filename); } catch (IOException e) { System.err.println("Unable to read " + in); System.exit(2); throw new Error("This can't happen"); // for definite assignment check } for (String line : er) { if (line.equals("") || line.startsWith("%")) { out.println(line); } else if (line.startsWith("@")) { if (stringDef.matcher(line).matches()) { out.println(line); } else { out.println(line); while (er.hasNext() && ((line = er.next()) != null)) { out.println(line); if (entry_end.matcher(line).lookingAt()) { break; } else if (line.equals("")) { System.err.printf("%s:%d: unterminated entry%n", er.getFileName(), er.getLineNumber()); break; } } } } } out.close(); } } }