package net.sf.jabref.imports; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.regex.Pattern; import net.sf.jabref.*; /** * Importer for the Refer/Endnote format. * modified to use article number for pages if pages are missing (some * journals, e.g., Physical Review Letters, don't use pages anymore) * * check here for details on the format * http://www.ecst.csuchico.edu/~jacobsd/bib/formats/endnote.html */ public class EndnoteImporter extends ImportFormat { /** * Return the name of this import format. */ public String getFormatName() { return "Refer/Endnote"; } /* * (non-Javadoc) * @see net.sf.jabref.imports.ImportFormat#getCLIId() */ public String getCLIId() { return "refer"; } /** * Check whether the source is in the correct format for this importer. */ public boolean isRecognizedFormat(InputStream stream) throws IOException { // Our strategy is to look for the "%A *" line. BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream)); Pattern pat1 = Pattern.compile("%A .*"), pat2 = Pattern.compile("%E .*"); String str; while ((str = in.readLine()) != null){ if (pat1.matcher(str).matches() || pat2.matcher(str).matches()) return true; } return false; } /** * Parse the entries in the source, and return a List of BibtexEntry * objects. */ public List<BibtexEntry> importEntries(InputStream stream) throws IOException { ArrayList<BibtexEntry> bibitems = new ArrayList<BibtexEntry>(); StringBuffer sb = new StringBuffer(); BufferedReader in = new BufferedReader(ImportFormatReader.getReaderDefaultEncoding(stream)); String ENDOFRECORD = "__EOREOR__"; String str; boolean first = true; while ((str = in.readLine()) != null){ str = str.trim(); // if(str.equals("")) continue; if (str.indexOf("%0") == 0){ if (first){ first = false; }else{ sb.append(ENDOFRECORD); } sb.append(str); }else sb.append(str); sb.append("\n"); } String[] entries = sb.toString().split(ENDOFRECORD); HashMap<String, String> hm = new HashMap<String, String>(); String author = "", Type = "", editor = "", artnum = ""; for (int i = 0; i < entries.length; i++){ hm.clear(); author = ""; Type = ""; editor = ""; artnum = ""; boolean IsEditedBook = false; String[] fields = entries[i].trim().substring(1).split("\n%"); //String lastPrefix = ""; for (int j = 0; j < fields.length; j++){ if (fields[j].length() < 3) continue; /* * Details of Refer format for Journal Article and Book: * * Generic Ref Journal Article Book Code Author %A Author Author Year %D * Year Year Title %T Title Title Secondary Author %E Series Editor * Secondary Title %B Journal Series Title Place Published %C City * Publisher %I Publisher Volume %V Volume Volume Number of Volumes %6 * Number of Volumes Number %N Issue Pages %P Pages Number of Pages * Edition %7 Edition Subsidiary Author %? Translator Alternate Title %J * Alternate Journal Label %F Label Label Keywords %K Keywords Keywords * Abstract %X Abstract Abstract Notes %O Notes Notes */ String prefix = fields[j].substring(0, 1); String val = fields[j].substring(2); if (prefix.equals("A")){ if (author.equals("")) author = val; else author += " and " + val; }else if (prefix.equals("E")){ if (editor.equals("")) editor = val; else editor += " and " + val; }else if (prefix.equals("T")) hm.put("title", val); else if (prefix.equals("0")){ if (val.indexOf("Journal") == 0) Type = "article"; else if ((val.indexOf("Book Section") == 0)) Type = "incollection"; else if ((val.indexOf("Book") == 0)) Type = "book"; else if (val.indexOf("Edited Book") == 0) { Type = "book"; IsEditedBook = true; }else if (val.indexOf("Conference") == 0) // Proceedings Type = "inproceedings"; else if (val.indexOf("Report") == 0) // Techreport Type = "techreport"; else if (val.indexOf("Review") == 0) Type = "article"; else if (val.indexOf("Thesis") == 0) Type = "phdthesis"; else Type = "misc"; // }else if (prefix.equals("7")) hm.put("edition", val); else if (prefix.equals("C")) hm.put("address", val); else if (prefix.equals("D")) hm.put("year", val); else if (prefix.equals("8")) hm.put("date", val); else if (prefix.equals("J")){ // "Alternate journal. Let's set it only if no journal // has been set with %B. if (hm.get("journal") == null) hm.put("journal", val); }else if (prefix.equals("B")){ // This prefix stands for "journal" in a journal entry, and // "series" in a book entry. if (Type.equals("article")) hm.put("journal", val); else if (Type.equals("book") || Type.equals("inbook")) hm.put( "series", val); else /* if (Type.equals("inproceedings")) */ hm.put("booktitle", val); }else if (prefix.equals("I")) { if (Type.equals("phdthesis")) hm.put("school", val); else hm.put("publisher", val); } // replace single dash page ranges (23-45) with double dashes (23--45): else if (prefix.equals("P")) hm.put("pages", val.replaceAll("([0-9]) *- *([0-9])","$1--$2")); else if (prefix.equals("V")) hm.put("volume", val); else if (prefix.equals("N")) hm.put("number", val); else if (prefix.equals("U")) hm.put("url", val); else if (prefix.equals("R")) { String doi = val; if (doi.startsWith("doi:")) doi = doi.substring(4); hm.put("doi", doi); } else if (prefix.equals("O")) { // Notes may contain Article number if (val.startsWith("Artn")) { String[] tokens = val.split("\\s"); artnum = tokens[1]; } else { hm.put("note", val); } } else if (prefix.equals("K")) hm.put("keywords", val); else if (prefix.equals("X")) hm.put("abstract", val); else if (prefix.equals("9")){ //Util.pr(val); if (val.indexOf("Ph.D.") == 0) Type = "phdthesis"; if (val.indexOf("Masters") == 0) Type = "mastersthesis"; }else if (prefix.equals("F")) hm.put(BibtexFields.KEY_FIELD, Util .checkLegalKey(val)); } // For Edited Book, EndNote puts the editors in the author field. // We want them in the editor field so that bibtex knows it's an edited book if (IsEditedBook && editor.equals("")) { editor = author; author = ""; } //fixauthorscomma if (!author.equals("")) hm.put("author", fixAuthor(author)); if (!editor.equals("")) hm.put("editor", fixAuthor(editor)); //if pages missing and article number given, use the article number if (((hm.get("pages") == null) || hm.get("pages").equals("-")) && !artnum.equals("")) hm.put("pages", artnum); BibtexEntry b = new BibtexEntry(BibtexFields.DEFAULT_BIBTEXENTRY_ID, Globals .getEntryType(Type)); // id assumes an existing database so don't // create one here b.setField(hm); //if (hm.isEmpty()) if (b.getAllFields().size() > 0) bibitems.add(b); } return bibitems; } /** * We must be careful about the author names, since they can be presented differently * by different sources. Normally each %A tag brings one name, and we get the authors * separated by " and ". This is the correct behaviour. * One source lists the names separated by comma, with a comma at the end. We can detect * this format and fix it. * @param s The author string * @return The fixed author string */ private String fixAuthor(String s) { int index = s.indexOf(" and "); if (index >= 0) return AuthorList.fixAuthor_lastNameFirst(s); // Look for the comma at the end: index = s.lastIndexOf(","); if (index == s.length()-1) { String mod = s.substring(0, s.length()-1).replaceAll(", ", " and "); return AuthorList.fixAuthor_lastNameFirst(mod); } else return AuthorList.fixAuthor_lastNameFirst(s); } }