package org.jabref.logic.importer.util; import java.util.Optional; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.FieldName; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; /** * SAX-Handler to parse OAI2-xml files. * * @author Ulrich Stärk * @author Christian Kopf * @author Christopher Oezbek */ public class OAI2Handler extends DefaultHandler { private final BibEntry entry; private StringBuffer authors; private String keyname; private String forenames; private StringBuffer characters; public OAI2Handler(BibEntry be) { this.entry = be; } @Override public void startDocument() throws SAXException { authors = new StringBuffer(); } @Override public void characters(char[] ch, int start, int length) throws SAXException { characters.append(ch, start, length); } @Override public void startElement(String uri, String localName, String qualifiedName, Attributes attributes) throws SAXException { characters = new StringBuffer(); } @Override public void endElement(String uri, String localName, String qualifiedName) throws SAXException { String content = characters.toString(); if ("error".equals(qualifiedName)) { throw new RuntimeException(content); } else if ("id".equals(qualifiedName)) { entry.setField(FieldName.EPRINT, content); } else if ("keyname".equals(qualifiedName)) { keyname = content; } else if ("forenames".equals(qualifiedName)) { forenames = content; } else if ("journal-ref".equals(qualifiedName)) { String journal = content.replaceFirst("[0-9].*", ""); entry.setField(FieldName.JOURNAL, journal); String volume = content.replaceFirst(journal, ""); volume = volume.replaceFirst(" .*", ""); entry.setField(FieldName.VOLUME, volume); String year = content.replaceFirst(".*?\\(", ""); year = year.replaceFirst("\\).*", ""); entry.setField(FieldName.YEAR, year); String pages = content.replaceFirst(journal, ""); pages = pages.replaceFirst(volume, ""); pages = pages.replaceFirst("\\(" + year + "\\)", ""); pages = pages.replace(" ", ""); entry.setField(FieldName.PAGES, pages); } else if ("datestamp".equals(qualifiedName)) { Optional<String> year = entry.getField(FieldName.YEAR); if (!year.isPresent() || year.get().isEmpty()) { entry.setField(FieldName.YEAR, content.replaceFirst("-.*", "")); } } else if ("title".equals(qualifiedName)) { entry.setField(FieldName.TITLE, content); } else if ("abstract".equals(qualifiedName)) { entry.setField(FieldName.ABSTRACT, content); } else if ("comments".equals(qualifiedName)) { entry.setField(FieldName.COMMENT, content); } else if ("report-no".equals(qualifiedName)) { entry.setField(FieldName.REPORTNO, content); } else if ("doi".equals(qualifiedName)) { entry.setField(FieldName.DOI, content); } else if ("author".equals(qualifiedName)) { String author = forenames + " " + keyname; if (authors.length() > 0) { authors.append(" and "); } authors.append(author); } } @Override public void endDocument() throws SAXException { entry.setField(FieldName.AUTHOR, authors.toString()); } public static String correctLineBreaks(String s) { String result = s.replaceAll("\\n(?!\\s*\\n)", " "); result = result.replaceAll("\\s*\\n\\s*", "\n"); return result.replaceAll(" {2,}", " ").replaceAll("(^\\s*|\\s+$)", ""); } }