/* * Created on Mar 19, 2003 * * @author henkel@cs.colorado.edu * */ package bibtex.parser; import java.io.IOException; import java.io.Reader; import java.util.LinkedList; import bibtex.dom.BibtexAbstractValue; import bibtex.dom.BibtexEntry; import bibtex.dom.BibtexFile; /** * The parser will parse the bibtex into a basic AST. Have a look at the * different Expanders defined in the bibtex.expansions package if you need more * than that. * * @author henkel */ public final class BibtexParser { /** * @param throwAllParseExceptions * Setting this to true means that all exceptions will be thrown * immediately. Otherwise, the parser will skip over things it * can't parse and you can use getExceptions to retrieve the * exceptions later. */ public BibtexParser(boolean throwAllParseExceptions) { this.throwAllParseExceptions = throwAllParseExceptions; } private PseudoLexer lexer; private BibtexFile bibtexFile; private LinkedList exceptions; private boolean throwAllParseExceptions; private int multipleFieldValuesPolicy = BibtexMultipleFieldValuesPolicy.KEEP_FIRST; /** * Returns the list of non-fatal exceptions that occured during parsing. * Usually, these occur while parsing an entry. Usually, the remainder of * the entry will be treated as part of a comment - thus the following entry * will be parsed again. * * @return List */ public ParseException[] getExceptions() { if (exceptions == null) return new ParseException[0]; ParseException[] result = new ParseException[exceptions.size()]; exceptions.toArray(result); return result; } /** * Parses the input into bibtexFile - don't forget to check getExceptions() * afterwards (if you don't use throwAllParseExceptions which you can * configure in the constructor)... * * @param bibtexFile * @param input * @throws ParseException * @throws IOException */ public void parse(BibtexFile bibtexFile, Reader input) throws ParseException, IOException { assert bibtexFile != null : "bibtexFile parameter may not be null."; assert input != null : "input parameter may not be null."; this.lexer = new PseudoLexer(input); this.bibtexFile = bibtexFile; this.exceptions = new LinkedList(); while (true) { PseudoLexer.Token token = lexer.scanTopLevelCommentOrAtOrEOF(); switch (token.choice) { case 0: // top level comment bibtexFile.addEntry(bibtexFile.makeToplevelComment(token.content)); break; case 1: // @ sign if (throwAllParseExceptions) parseEntry(); else { try { parseEntry(); } catch (ParseException parseException) { exceptions.add(parseException); } } break; case 2: // EOF return; } } } private final static char[] EXCEPTION_SET_NAMES = new char[] { '"', '#', '%', '\'', '(', ')', ',', '=', '{', '}' }; private final static String[] ENTRY_TYPES = new String[] { "string", "preamble", "article", "book", "booklet", "conference", "inbook", "incollection", "inproceedings", "manual", "mastersthesis", "misc", "phdthesis", "proceedings", "techreport", "unpublished", "periodical" // not // really // standard // but // commonly // used. }; /** * */ private void parseEntry() throws ParseException, IOException { String entryType = lexer.scanEntryTypeName().toLowerCase(); final int bracketChoice = lexer.scanAlternatives(new char[] { '{', '(' }, false); if (entryType.equals("string")) { String stringName = lexer.scanLiteral(EXCEPTION_SET_NAMES, true, true); lexer.scan('='); BibtexAbstractValue value = parseValue(); bibtexFile.addEntry(bibtexFile.makeMacroDefinition(stringName, value)); } else if (entryType.equals("preamble")) { BibtexAbstractValue value = parseValue(); bibtexFile.addEntry(bibtexFile.makePreamble(value)); } else { // all others lexer.skipWhitespace(); String bibkey = (lexer.currentInputChar() == ',') ? "" : lexer.scanLiteral(new char[] { ',' }, true, true); final BibtexEntry entry = bibtexFile.makeEntry(entryType, bibkey); bibtexFile.addEntry(entry); while (true) { lexer.enforceNoEof("',' or corresponding closing bracket", true); //System.out.println("---------->'"+lexer.currentInputChar()+"'"); if (lexer.currentInputChar() == ',') { lexer.scan(','); lexer.enforceNoEof("'}' or [FIELDNAME]", true); if (lexer.currentInputChar() == '}') break; String fieldName = lexer.scanLiteral(EXCEPTION_SET_NAMES, true, true); lexer.scan('='); BibtexAbstractValue value = parseValue(); switch (this.multipleFieldValuesPolicy) { case BibtexMultipleFieldValuesPolicy.KEEP_FIRST: if (entry.getFieldValue(fieldName) == null) entry.setField(fieldName, value); break; case BibtexMultipleFieldValuesPolicy.KEEP_LAST: entry.setField(fieldName, value); break; case BibtexMultipleFieldValuesPolicy.KEEP_ALL: entry.addFieldValue(fieldName,value); break; default: assert false : "this should not happen."; } } else break; } } if (bracketChoice == 0) lexer.scan('}'); else lexer.scan(')'); } private static boolean isNumber(String string) { for (int i = 0; i < string.length(); i++) { char c = string.charAt(i); if (c < '0' || '9' < c) return false; } return true; } /** * */ private BibtexAbstractValue parseValue() throws ParseException, IOException { lexer.enforceNoEof("[STRING] or [STRINGREFERENCE] or [NUMBER]", true); char inputCharacter = lexer.currentInputChar(); BibtexAbstractValue result; if (inputCharacter == '"') { result = parseQuotedString(); } else if (inputCharacter == '{') { result = parseBracketedString(); } else { String stringContent = lexer.scanLiteral(EXCEPTION_SET_NAMES, false, true).trim(); if (isNumber(stringContent)) result = bibtexFile.makeString(stringContent); else result = bibtexFile.makeMacroReference(stringContent); } lexer.enforceNoEof("'#' or something else", true); if (lexer.currentInputChar() == '#') { lexer.scan('#'); return bibtexFile.makeConcatenatedValue(result, parseValue()); } else { return result; } } /** * @return BibtexAbstractValue */ private BibtexAbstractValue parseBracketedString() throws ParseException, IOException { StringBuffer buffer = new StringBuffer(); lexer.scanBracketedString(buffer, false); return bibtexFile.makeString(buffer.toString()); } /** * @return BibtexAbstractValue */ private BibtexAbstractValue parseQuotedString() throws IOException, ParseException { return bibtexFile.makeString(lexer.scanQuotedString()); } /** * In bibtex files, fields can have multiple values - this constant * determines how to deal with them. Check out BibtexMultipleFieldValuesPolicy for * possible values. The default is BibtexMultipleValuesPolicy.KEEP_FIRST. * * @see BibtexMultipleFieldValuesPolicy * @param multipleFieldValuesPolicy * The multipleFieldValuesPolicy to set. */ public void setMultipleFieldValuesPolicy(int multipleFieldValuesPolicy) { assert multipleFieldValuesPolicy == BibtexMultipleFieldValuesPolicy.KEEP_ALL || multipleFieldValuesPolicy == BibtexMultipleFieldValuesPolicy.KEEP_FIRST || multipleFieldValuesPolicy == BibtexMultipleFieldValuesPolicy.KEEP_LAST : "multipleFieldValuesPolicy parameter must be one of the constants defined in BibtexMultiplefieldValuesPolicy."; this.multipleFieldValuesPolicy = multipleFieldValuesPolicy; } }