/* * $Id$ * * Copyright (c) 2004-2005 by the TeXlapse Team. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html */ package net.sourceforge.texlipse.bibparser; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import net.sourceforge.texlipse.bibparser.analysis.DepthFirstAdapter; import net.sourceforge.texlipse.bibparser.node.ABibeBibEntry; import net.sourceforge.texlipse.bibparser.node.ABibstreBibEntry; import net.sourceforge.texlipse.bibparser.node.ABibtaskBibEntry; import net.sourceforge.texlipse.bibparser.node.ABibtex; import net.sourceforge.texlipse.bibparser.node.AConcat; import net.sourceforge.texlipse.bibparser.node.AEntryDef; import net.sourceforge.texlipse.bibparser.node.AEntrybraceEntry; import net.sourceforge.texlipse.bibparser.node.AEntryparenEntry; import net.sourceforge.texlipse.bibparser.node.AIdValOrSid; import net.sourceforge.texlipse.bibparser.node.AKeyvalDecl; import net.sourceforge.texlipse.bibparser.node.ANumValOrSid; import net.sourceforge.texlipse.bibparser.node.AStrbraceStringEntry; import net.sourceforge.texlipse.bibparser.node.AStrparenStringEntry; import net.sourceforge.texlipse.bibparser.node.AValueBValOrSid; import net.sourceforge.texlipse.bibparser.node.AValueQValOrSid; import net.sourceforge.texlipse.bibparser.node.TIdentifier; import net.sourceforge.texlipse.bibparser.node.TStringLiteral; import net.sourceforge.texlipse.bibparser.node.Token; import net.sourceforge.texlipse.model.ParseErrorMessage; import net.sourceforge.texlipse.model.ReferenceEntry; import org.eclipse.core.resources.IMarker; /** * Retrieves the BibTeX entries from the AST. * * This class is a visitor, that is applied on the AST that is a result of parsing a * BibTeX-file. See <a href="http://www.sablecc.org">http://www.sablecc.org</a> for * more information on the structure of the AST and the visitors. * * @author Oskar Ojala */ public final class EntryRetriever extends DepthFirstAdapter { private final class EntryText { Token token; Set<String> definedFields; public EntryText(Token t, Set<String> df) { token = t; definedFields = df; } } private List<ParseErrorMessage> warnings = new ArrayList<ParseErrorMessage>(); private List<ParseErrorMessage> tasks = new ArrayList<ParseErrorMessage>(); // type: ParseErrorMessage private List<ReferenceEntry> entries = new ArrayList<ReferenceEntry>(); private ReferenceEntry currEntry; private StringBuffer currEntryInfo; private Token currEntryType; private String currField; private String crossref; /** * Currently defined fields for an entry */ private Set<String> currDefinedFields; /** * All defined keys -- can be used for testing whether a key is unique */ private Map<String,Integer> allDefinedKeys; private static final Map<String, String> predefAbbrevs = new HashMap<String, String>(); private Map<String, String> abbrevs; private Map<String, List<EntryText>> crossrefs; // String->List(EntryText) /** * A list of required fields for the different BibTeX entries */ private static final Map<String, ArrayList<String>> requiredFieldsPerType = new HashMap<String, ArrayList<String>>(); static { predefAbbrevs.put("jan", "January"); predefAbbrevs.put("feb", "February"); predefAbbrevs.put("mar", "March"); predefAbbrevs.put("apr", "April"); predefAbbrevs.put("may", "May"); predefAbbrevs.put("jun", "June"); predefAbbrevs.put("jul", "July"); predefAbbrevs.put("aug", "August"); predefAbbrevs.put("sep", "September"); predefAbbrevs.put("oct", "October"); predefAbbrevs.put("nov", "November"); predefAbbrevs.put("dec", "December"); String[] article = {"author", "title", "journal", "year"}; String[] book = {"title", "publisher", "year"}; String[] booklet = {"title"}; String[] conference = {"author", "title", "booktitle", "year"}; String[] inbook = {"title", "publisher", "year"}; String[] incollection = {"author", "title", "booktitle", "publisher", "year"}; String[] inproceedings = {"author", "title", "booktitle", "year"};; String[] manual = {"title"}; String[] mastersthesis = {"author", "title", "school", "year"}; String[] phdthesis = {"author", "title", "school", "year"}; String[] techreport = {"author", "title", "institution", "year"}; String[] proceedings = {"title", "year"}; String[] unpublished = {"author", "title", "note"}; requiredFieldsPerType.put("article", new ArrayList<String>(Arrays.asList(article))); requiredFieldsPerType.put("book", new ArrayList<String>(Arrays.asList(book))); requiredFieldsPerType.put("booklet", new ArrayList<String>(Arrays.asList(booklet))); requiredFieldsPerType.put("conference", new ArrayList<String>(Arrays.asList(conference))); requiredFieldsPerType.put("inbook", new ArrayList<String>(Arrays.asList(inbook))); requiredFieldsPerType.put("incollection", new ArrayList<String>(Arrays.asList(incollection))); requiredFieldsPerType.put("inproceedings", new ArrayList<String>(Arrays.asList(inproceedings))); requiredFieldsPerType.put("manual", new ArrayList<String>(Arrays.asList(manual))); requiredFieldsPerType.put("mastersthesis", new ArrayList<String>(Arrays.asList(mastersthesis))); requiredFieldsPerType.put("phdthesis", new ArrayList<String>(Arrays.asList(phdthesis))); requiredFieldsPerType.put("techreport", new ArrayList<String>(Arrays.asList(techreport))); requiredFieldsPerType.put("proceedings", new ArrayList<String>(Arrays.asList(proceedings))); requiredFieldsPerType.put("unpublished", new ArrayList<String>(Arrays.asList(unpublished))); } public EntryRetriever() { this.currDefinedFields = new HashSet<String>(); this.allDefinedKeys = new HashMap<String, Integer>(); this.abbrevs = new HashMap<String, String>(predefAbbrevs); this.crossrefs = new HashMap<String, List<EntryText>>(); } /** * @return The entries as a list of <code>ReferenceEntry</code>s */ public List<ReferenceEntry> getEntries() { return entries; } /** * @return A list of warnings in the file */ public List<ParseErrorMessage> getWarnings() { return warnings; } /** * @return A list of task markers in the file */ public List<ParseErrorMessage> getTasks() { return tasks; } /** * Finish the parse by setting all remaining warnings */ public void finishParse() { // Set warnings for unfulfilled cross references Set<Map.Entry<String, List<EntryText>>> keys = crossrefs.entrySet(); for (Iterator<Map.Entry<String, List<EntryText>>> iter = keys.iterator(); iter.hasNext();) { Map.Entry<String, List<EntryText>> mapping = iter.next(); List<EntryText> crefs = mapping.getValue(); for (Iterator<EntryText> iter2 = crefs.iterator(); iter2.hasNext();) { EntryText et = iter2.next(); setMissingWarnings(et.token, et.definedFields); warnings.add(new ParseErrorMessage(et.token.getLine(), et.token.getPos() - 1, et.token.getText().length(), "Cross reference " + mapping.getKey() + " does not exist", IMarker.SEVERITY_WARNING)); } } } public void inABibtex(ABibtex node) { } public void outABibtex(ABibtex node) { } public void inABibeBibEntry(ABibeBibEntry node) { } public void inABibstreBibEntry(ABibstreBibEntry node) { } public void inABibtaskBibEntry(ABibtaskBibEntry node) { } public void outABibeBibEntry(ABibeBibEntry node) { } public void outABibstreBibEntry(ABibstreBibEntry node) { } public void outABibtaskBibEntry(ABibtaskBibEntry node) { int start = node.getTaskcomment().getText().indexOf("TODO"); String taskText = node.getTaskcomment().getText().substring(start + 4).trim(); tasks.add(new ParseErrorMessage(node.getTaskcomment().getLine(), node.getTaskcomment().getPos(), taskText.length(), taskText, IMarker.SEVERITY_INFO)); } private void inAnAbbrev(TIdentifier tid, TStringLiteral tsl) { if (abbrevs.put(tid.getText(), tsl.getText()) != null) { warnings.add(new ParseErrorMessage(tid.getLine(), tid.getPos() - 1, tid.getText().length(), "String key " + tid.getText() + " is not unique", IMarker.SEVERITY_WARNING)); } } public void inAStrbraceStringEntry(AStrbraceStringEntry node) { inAnAbbrev(node.getIdentifier(), node.getStringLiteral()); } public void outAStrbraceStringEntry(AStrbraceStringEntry node) { } public void inAStrparenStringEntry(AStrparenStringEntry node) { inAnAbbrev(node.getIdentifier(), node.getStringLiteral()); } public void outAStrparenStringEntry(AStrparenStringEntry node) { } private void inBibtexEntry(TIdentifier tid) { currEntry = new ReferenceEntry(tid.getText()); currEntry.startLine = tid.getLine(); currEntryInfo = new StringBuffer(); Integer x=allDefinedKeys.put(currEntry.key, currEntry.startLine); if (x != null) { warnings.add(new ParseErrorMessage(currEntry.startLine, tid.getPos() - 1, currEntry.key.length(), "BibTex key " + currEntry.key + " is not unique: also defined in line "+x, IMarker.SEVERITY_WARNING)); } } private void setMissingWarnings(Token t, Set<String> fields) { List<String> reqFieldList = requiredFieldsPerType.get(t.getText()); if (reqFieldList != null) { if (!fields.containsAll(reqFieldList)) { for (Iterator<String> iter = reqFieldList.iterator(); iter.hasNext();) { String reqField = (String) iter.next(); if (!fields.contains(reqField)) { // FIXME key warnings.add(new ParseErrorMessage(t.getLine(), t.getPos()-1, t.getText().length(), t + currEntry.key + " is missing required field " + reqField, IMarker.SEVERITY_WARNING)); } } } } } private void outBibtexEntry(Token endToken) { if (currEntry.author == null) { currEntry.author = "-"; } if (currEntry.year == null) { currEntry.year = "-"; } if (currEntry.journal == null) { currEntry.journal = "-"; } currEntry.info = currEntryInfo.toString(); currEntry.endLine = endToken.getLine(); entries.add(currEntry); // TODO useless -- uses the wrong token //currEntry.endLine = node.getIdentifier().getLine(); if (crossref != null) { List<EntryText> crefs = crossrefs.get(crossref); if (crefs == null) { crefs = new ArrayList<EntryText>(); } crefs.add(new EntryText(currEntryType, new HashSet<String>(currDefinedFields))); crossrefs.put(crossref, crefs); crossref = null; } else { setMissingWarnings(currEntryType, currDefinedFields); } if (crossrefs.containsKey(currEntry.key)) { List<EntryText> crefs = crossrefs.remove(currEntry.key); for (Iterator<EntryText> iter = crefs.iterator(); iter.hasNext();) { EntryText et = iter.next(); et.definedFields.addAll(currDefinedFields); setMissingWarnings(et.token, et.definedFields); } } currDefinedFields.clear(); } /** * Called when entering a bibliography entry, starts * forming an entry for the entry list * * @param node an <code>AEntry</code> value */ public void inAEntrybraceEntry(AEntrybraceEntry node) { inBibtexEntry(node.getIdentifier()); } /** * Called when exiting a bibliography entry, adds the formed * entry into the entry list * * @param node an <code>AEntry</code> value */ public void outAEntrybraceEntry(AEntrybraceEntry node) { outBibtexEntry(node.getRBrace()); } public void inAEntryparenEntry(AEntryparenEntry node) { inBibtexEntry(node.getIdentifier()); } public void outAEntryparenEntry(AEntryparenEntry node) { outBibtexEntry(node.getRParen()); } public void inAEntryDef(AEntryDef node) { } /** * Handles the type of the bibliography entry * * @param node an <code>AEntryDef</code> value */ public void outAEntryDef(AEntryDef node) { currEntryInfo.append(node.getEntryName().getText().substring(1)); currEntryInfo.append('\n'); currEntryType = node.getEntryName(); currEntryType.setText(currEntryType.getText().substring(1).toLowerCase()); } public void inAKeyvalDecl(AKeyvalDecl node) { currField = node.getIdentifier().getText().toLowerCase(); currEntryInfo.append(currField); currEntryInfo.append(": "); if (!currDefinedFields.add(currField)) { warnings.add(new ParseErrorMessage(node.getIdentifier().getLine(), node.getIdentifier().getPos() - 1, currField.length(), "Field " + currField + " appears more than once in entry " + currEntry.key, IMarker.SEVERITY_WARNING)); } } public void outAKeyvalDecl(AKeyvalDecl node) { currEntryInfo.append('\n'); } public void inAConcat(AConcat node) { } public void outAConcat(AConcat node) { } public void inAValueBValOrSid(AValueBValOrSid node) { } public void inAValueQValOrSid(AValueQValOrSid node) { } public void outAValueBValOrSid(AValueBValOrSid node) { outAValueValOrSid(node.getStringLiteral().getText(), node.getStringLiteral()); } public void outAValueQValOrSid(AValueQValOrSid node) { TStringLiteral tsl = node.getStringLiteral(); if (tsl != null) { outAValueValOrSid(tsl.getText(), tsl); } else { warnings.add(new ParseErrorMessage(currEntry.startLine, 1, currEntryType.getText().length(), currField + " is empty in " + currEntry.key, IMarker.SEVERITY_WARNING)); } } private void outAValueValOrSid(String text, Token tsl) { String fieldValue = text.replaceAll("\\s+", " "); currEntryInfo.append(fieldValue); if ("author".equals(currField) || "editor".equals(currField)) { currEntry.author = fieldValue; } else if ("journal".equals(currField)) { currEntry.journal = fieldValue; } else if ("year".equals(currField)) { currEntry.year = fieldValue; } else if ("crossref".equals(currField)) { crossref = fieldValue; } // Test for empty fields if (fieldValue.equalsIgnoreCase("")) { warnings.add(new ParseErrorMessage(tsl.getLine(), tsl.getPos(), 0, currField + " is empty in " + currEntry.key, IMarker.SEVERITY_WARNING)); } } public void inANumValOrSid(ANumValOrSid node) { } public void outANumValOrSid(ANumValOrSid node) { outAValueValOrSid(node.getNumber().getText(), node.getNumber()); } public void inAIdValOrSid(AIdValOrSid node) { } public void outAIdValOrSid(AIdValOrSid node) { TIdentifier tid = node.getIdentifier(); String expansion = (String) abbrevs.get(tid.getText()); if (expansion != null) { outAValueValOrSid(expansion, tid); } else { warnings.add(new ParseErrorMessage(tid.getLine(), tid.getPos()-1, tid.getText().length(), "The abbreviation " + tid.getText() + " is undefined", IMarker.SEVERITY_WARNING)); } } }