/* * Created on Mar 27, 2003 * * @author henkel@cs.colorado.edu * */ package bibtex.expansions; import java.util.LinkedList; import java.util.List; import bibtex.dom.*; import bibtex.dom.BibtexString; /** * @author henkel */ final class BibtexPersonListParser { static final class StringIterator { private final char[] chars; private int pos; StringIterator(String string) { chars = string.toCharArray(); pos = 0; } char next() { return chars[pos++]; } char current() { return chars[pos]; } void step() { pos++; } void skipWhiteSpace() { while (pos < chars.length && Character.isWhitespace(chars[pos])) pos++; } boolean hasNext() { return pos + 1 < chars.length; } } public static BibtexPersonList parse(BibtexString personList,String entryKey) throws PersonListParserException { String content = personList.getContent(); String[] tokens = tokenize(content); BibtexPersonList result = personList.getOwnerFile().makePersonList(); if (tokens.length == 0) { return result; } int begin = 0; for (int i = 0; i < tokens.length; i++) { if (tokens[i].toLowerCase().equals(AND) && begin < i) { result.add(makePerson(tokens, begin, i, content, entryKey, personList.getOwnerFile())); begin = i + 1; } } if (begin < tokens.length) result.add(makePerson(tokens, begin, tokens.length, content, entryKey, personList.getOwnerFile())); return result; } private static boolean firstCharAtBracelevel0IsLowerCase(final String string) { StringIterator stringIt =new StringIterator(string); if (Character.isLowerCase(stringIt.current())) return true; while (stringIt.hasNext()) { stringIt.skipWhiteSpace(); if (Character.isLowerCase(stringIt.current())) return true; if (Character.isUpperCase(stringIt.current())) return false; if (stringIt.current() == '{') { stringIt.step(); stringIt.skipWhiteSpace(); if (stringIt.current() == '\\') { scanCommandOrAccent: while (true) { stringIt.step(); stringIt.skipWhiteSpace(); if (Character.isLetter(stringIt.current())) { while (stringIt.hasNext() && stringIt.current() != '{' && !Character.isWhitespace(stringIt.current()) && stringIt.current() != '}') stringIt.step(); stringIt.skipWhiteSpace(); if (!stringIt.hasNext()) return false; if (stringIt.current() == '}') return false; if (stringIt.current() == '{') { stringIt.step(); stringIt.skipWhiteSpace(); if (!stringIt.hasNext()) return false; if (Character.isLowerCase(stringIt.current())) return true; if (Character.isUpperCase(stringIt.current())) return false; if (stringIt.current() == '\\') { continue scanCommandOrAccent; } } } else { while(stringIt.hasNext() && !Character.isLetter(stringIt.current())){ stringIt.step(); } if(!stringIt.hasNext()) return false; if(Character.isLowerCase(stringIt.current())) return true; if(Character.isUpperCase(stringIt.current())) return false; return false; } } } else { // brace level 1 int braces = 1; while (braces > 0) { if (!stringIt.hasNext()) return false; else if (stringIt.current() == '{') braces++; else if (stringIt.current() == '}') braces--; stringIt.step(); } // back at brace level 0 } } else stringIt.step(); } return false; } private static String getString(String[] tokens, int beginIndex, int endIndex) { if (!(beginIndex < endIndex)) return null; assert beginIndex >= 0; assert endIndex >= 0; StringBuffer result = new StringBuffer(); for (int i = beginIndex; i < endIndex; i++) { if (tokens[i] == MINUS) { if (i == beginIndex || i == endIndex - 1) continue; result.append('-'); continue; } if (i > beginIndex && tokens[i - 1] != MINUS) result.append(' '); result.append(tokens[i]); } return result.toString(); } private static BibtexPerson makePerson(String[] tokens, int begin, int end, String fullEntry, String entryKey, BibtexFile factory) throws PersonListParserException { if (tokens[begin].equals("others")) { return factory.makePerson(null, null, null, null, true); } else if (tokens[end - 1] == COMMA) throw new PersonListParserException("Name ends with comma: '" + fullEntry + "' - in '"+entryKey+"'"); else { int numberOfCommas = 0; for (int i = begin; i < end; i++) { if (tokens[i] == COMMA) numberOfCommas++; } if (numberOfCommas == 0) { int lastNameBegin = end - 1; while (true) { if (lastNameBegin - 1 >= begin && !firstCharAtBracelevel0IsLowerCase(tokens[lastNameBegin - 1])) { lastNameBegin -= 1; } else if (lastNameBegin - 2 >= begin && tokens[lastNameBegin - 1] == MINUS && !firstCharAtBracelevel0IsLowerCase(tokens[lastNameBegin - 2])) { lastNameBegin -= 2; } else break; } int firstLowerCase = -1; for (int i = begin; i < end; i++) { if (tokens[i] == MINUS) continue; if (firstCharAtBracelevel0IsLowerCase(tokens[i])) { firstLowerCase = i; break; } } final String last, first, lineage, preLast; if (lastNameBegin == begin || firstLowerCase == -1) { //there is no preLast part lastNameBegin = end - 1; while (lastNameBegin - 2 >= begin && tokens[lastNameBegin - 1] == MINUS && !firstCharAtBracelevel0IsLowerCase(tokens[lastNameBegin - 2])) lastNameBegin -= 2; last = getString(tokens, lastNameBegin, end); first = getString(tokens, begin, lastNameBegin); lineage = null; preLast = null; } else { last = getString(tokens, lastNameBegin, end); first = getString(tokens, begin, firstLowerCase); lineage = null; preLast = getString(tokens, firstLowerCase, lastNameBegin); } if (last == null) throw new PersonListParserException("Found an empty last name in '" + fullEntry + "' in '"+entryKey+"'."); return factory.makePerson(first, preLast, last, lineage, false); } else if (numberOfCommas == 1 || numberOfCommas == 2) { if (numberOfCommas == 1) { int commaIndex = -1; for (int i = begin; i < end; i++) { if (tokens[i] == COMMA) { commaIndex = i; break; } } final int preLastBegin = begin; int preLastEnd = begin; for (int i = preLastEnd; i < commaIndex; i++) { if (tokens[i] == MINUS) continue; if (firstCharAtBracelevel0IsLowerCase(tokens[i])) { preLastEnd = i + 1; } } if (preLastEnd == commaIndex && preLastEnd > preLastBegin) { preLastEnd--; } final String preLast = getString(tokens, preLastBegin, preLastEnd); final String last = getString(tokens, preLastEnd, commaIndex); final String first = getString(tokens, commaIndex + 1, end); if (last == null) throw new PersonListParserException("Found an empty last name in '" + fullEntry + "' in '"+entryKey+"'."); return factory.makePerson(first, preLast, last, null, false); } else { // 2 commas ... int firstComma = -1; int secondComma = -1; for (int i = begin; i < end; i++) { if (tokens[i] == COMMA) { if (firstComma == -1) { firstComma = i; } else { secondComma = i; break; } } } final int preLastBegin = begin; int preLastEnd = begin; for (int i = preLastEnd; i < firstComma; i++) { if (tokens[i] == MINUS) continue; if (firstCharAtBracelevel0IsLowerCase(tokens[i])) { preLastEnd = i + 1; } } if (preLastEnd == firstComma && preLastEnd > preLastBegin) { preLastEnd--; } final String preLast = getString(tokens, preLastBegin, preLastEnd); final String last = getString(tokens, preLastEnd, firstComma); String lineage = getString(tokens, firstComma + 1, secondComma); String first = getString(tokens, secondComma + 1, end); if (first == null && lineage != null) { String tmp = lineage; lineage = first; first = tmp; } if (last == null) throw new PersonListParserException("Found an empty last name in '" + fullEntry + "' in '"+entryKey+"'."); return factory.makePerson(first, preLast, last, lineage, false); } } else { throw new PersonListParserException("Too many commas in '" + fullEntry + "' in '"+entryKey+"'."); } } } private static final String COMMA = ",".intern(); private static final String AND = "and".intern(); private static final String MINUS = "-".intern(); /** * * * @param stringContent * @return String[] */ private static String[] tokenize(String stringContent) { int numberOfOpenBraces = 0; int tokenBegin = 0; stringContent = stringContent + " "; // make sure the last character is whitespace ;-) LinkedList tokens = new LinkedList(); // just some strings ... for (int currentPos = 0; currentPos < stringContent.length(); currentPos++) { switch (stringContent.charAt(currentPos)) { case '{': numberOfOpenBraces++; break; case '}': if(numberOfOpenBraces>0){ numberOfOpenBraces--; } else{ if (tokenBegin <= currentPos - 1) { String potentialToken = stringContent.substring(tokenBegin, currentPos).trim(); if (!potentialToken.equals("")) { tokens.add(potentialToken); } } tokenBegin = currentPos + 1; } break; case ',': if (numberOfOpenBraces == 0) { if (tokenBegin <= currentPos - 1) { String potentialToken = stringContent.substring(tokenBegin, currentPos).trim(); if (!potentialToken.equals("")) { tokens.add(potentialToken); } } tokens.add(COMMA); tokenBegin = currentPos + 1; } default: char currentChar = stringContent.charAt(currentPos); if (Character.isWhitespace(currentChar) || (currentChar == '~') || (currentChar == '-')) { if (numberOfOpenBraces == 0 && tokenBegin <= currentPos) { String potentialToken = stringContent.substring(tokenBegin, currentPos).trim(); if (!potentialToken.equals("")) { tokens.add(potentialToken); if (currentChar == '-') tokens.add(MINUS); } tokenBegin = currentPos + 1; } } } } String[] result = new String[tokens.size()]; tokens.toArray(result); return result; } private static boolean isEqual(String str1, String str2) { if (str1 == null) return str2 == null; return str1.equals(str2); } private static boolean isEqual(BibtexPersonListParserTests.Test test, BibtexPerson person) { return isEqual(test.getFirst(), person.getFirst()) && isEqual(test.getLast(), person.getLast()) && isEqual(test.getLineage(), person.getLineage()) && isEqual(test.getPreLast(), person.getPreLast()); } private static void dumpCompare(BibtexPersonListParserTests.Test test, BibtexPerson person) { System.out.println("test.first=\"" + test.getFirst() + "\" person.first=\"" + person.getFirst() + "\""); System.out.println("test.last=\"" + test.getLast() + "\" person.last=\"" + person.getLast() + "\""); System.out.println("test.preLast=\"" + test.getPreLast() + "\" person.preLast=\"" + person.getPreLast() + "\""); System.out.println("test.lineage=\"" + test.getLineage() + "\" person.lineage=\"" + person.getLineage() + "\""); } public static void main(String args[]) { BibtexPersonListParserTests.Test [] tests = BibtexPersonListParserTests.tests; for (int i = 0; i < tests.length; i++) { BibtexFile file = new BibtexFile(); BibtexString string = file.makeString(tests[i].getString()); List personList; try { personList = parse(string,"testCase"+i).getList(); } catch (PersonListParserException e) { e.printStackTrace(); continue; } assert personList.size() == 1; BibtexPerson person = (BibtexPerson) personList.get(0); if (!isEqual(tests[i], person)) { System.err.println("\ntest failed:"); System.err.println(tests[i].getString()); dumpCompare(tests[i], person); } } } }