package org.jabref.logic.importer.fileformat; import java.io.BufferedReader; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import java.util.Objects; import java.util.Scanner; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamConstants; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; import org.jabref.JabRefGUI; import org.jabref.logic.bibtexkeypattern.BibtexKeyPatternUtil; import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.Importer; import org.jabref.logic.importer.ParserResult; import org.jabref.logic.l10n.Localization; import org.jabref.logic.util.FileExtensions; import org.jabref.logic.util.OS; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.BibtexEntryTypes; import org.jabref.model.entry.EntryType; import org.jabref.model.entry.FieldName; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * This importer parses text format citations using the online API of FreeCite - * Open Source Citation Parser http://freecite.library.brown.edu/ */ public class FreeCiteImporter extends Importer { private static final Log LOGGER = LogFactory.getLog(FreeCiteImporter.class); private final ImportFormatPreferences importFormatPreferences; public FreeCiteImporter(ImportFormatPreferences importFormatPreferences) { this.importFormatPreferences = importFormatPreferences; } @Override public boolean isRecognizedFormat(BufferedReader reader) throws IOException { Objects.requireNonNull(reader); // TODO: We don't know how to recognize text files, therefore we return "false" return false; } @Override public ParserResult importDatabase(BufferedReader reader) throws IOException { try (Scanner scan = new Scanner(reader)) { String text = scan.useDelimiter("\\A").next(); return importEntries(text); } } public ParserResult importEntries(String text) { // URLencode the string for transmission String urlencodedCitation = null; try { urlencodedCitation = URLEncoder.encode(text, StandardCharsets.UTF_8.name()); } catch (UnsupportedEncodingException e) { LOGGER.warn("Unsupported encoding", e); } // Send the request URL url; URLConnection conn; try { url = new URL("http://freecite.library.brown.edu/citations/create"); conn = url.openConnection(); } catch (MalformedURLException e) { LOGGER.warn("Bad URL", e); return new ParserResult(); } catch (IOException e) { LOGGER.warn("Could not download", e); return new ParserResult(); } try { conn.setRequestProperty("accept", "text/xml"); conn.setDoOutput(true); OutputStreamWriter writer = new OutputStreamWriter(conn.getOutputStream()); String data = "citation=" + urlencodedCitation; // write parameters writer.write(data); writer.flush(); } catch (IllegalStateException e) { LOGGER.warn("Already connected.", e); } catch (IOException e) { LOGGER.warn("Unable to connect to FreeCite online service.", e); return ParserResult.fromErrorMessage(Localization.lang("Unable to connect to FreeCite online service.")); } // output is in conn.getInputStream(); // new InputStreamReader(conn.getInputStream()) List<BibEntry> res = new ArrayList<>(); XMLInputFactory factory = XMLInputFactory.newInstance(); try { XMLStreamReader parser = factory.createXMLStreamReader(conn.getInputStream()); while (parser.hasNext()) { if ((parser.getEventType() == XMLStreamConstants.START_ELEMENT) && "citation".equals(parser.getLocalName())) { parser.nextTag(); StringBuilder noteSB = new StringBuilder(); BibEntry e = new BibEntry(); // fallback type EntryType type = BibtexEntryTypes.INPROCEEDINGS; while (!((parser.getEventType() == XMLStreamConstants.END_ELEMENT) && "citation".equals(parser.getLocalName()))) { if (parser.getEventType() == XMLStreamConstants.START_ELEMENT) { String ln = parser.getLocalName(); if ("authors".equals(ln)) { StringBuilder sb = new StringBuilder(); parser.nextTag(); while (parser.getEventType() == XMLStreamConstants.START_ELEMENT) { // author is directly nested below authors assert "author".equals(parser.getLocalName()); String author = parser.getElementText(); if (sb.length() == 0) { // first author sb.append(author); } else { sb.append(" and "); sb.append(author); } assert parser.getEventType() == XMLStreamConstants.END_ELEMENT; assert "author".equals(parser.getLocalName()); parser.nextTag(); // current tag is either begin:author or // end:authors } e.setField(FieldName.AUTHOR, sb.toString()); } else if (FieldName.JOURNAL.equals(ln)) { // we guess that the entry is a journal // the alternative way is to parse // ctx:context-objects / ctx:context-object / ctx:referent / ctx:metadata-by-val / ctx:metadata / journal / rft:genre // the drawback is that ctx:context-objects is NOT nested in citation, but a separate element // we would have to change the whole parser to parse that format. type = BibtexEntryTypes.ARTICLE; e.setField(ln, parser.getElementText()); } else if ("tech".equals(ln)) { type = BibtexEntryTypes.TECHREPORT; // the content of the "tech" field seems to contain the number of the technical report e.setField(FieldName.NUMBER, parser.getElementText()); } else if (FieldName.DOI.equals(ln) || FieldName.INSTITUTION.equals(ln) || FieldName.LOCATION.equals(ln) || FieldName.NUMBER.equals(ln) || FieldName.NOTE.equals(ln) || FieldName.TITLE.equals(ln) || FieldName.PAGES.equals(ln) || FieldName.PUBLISHER.equals(ln) || FieldName.VOLUME.equals(ln) || FieldName.YEAR.equals(ln)) { e.setField(ln, parser.getElementText()); } else if (FieldName.BOOKTITLE.equals(ln)) { String booktitle = parser.getElementText(); if (booktitle.startsWith("In ")) { // special treatment for parsing of // "In proceedings of..." references booktitle = booktitle.substring(3); } e.setField(FieldName.BOOKTITLE, booktitle); } else if ("raw_string".equals(ln)) { // raw input string is ignored } else { // all other tags are stored as note noteSB.append(ln); noteSB.append(':'); noteSB.append(parser.getElementText()); noteSB.append(OS.NEWLINE); } } parser.next(); } if (noteSB.length() > 0) { String note; if (e.hasField(FieldName.NOTE)) { // "note" could have been set during the parsing as FreeCite also returns "note" note = e.getField(FieldName.NOTE).get().concat(OS.NEWLINE) .concat(noteSB.toString()); } else { note = noteSB.toString(); } e.setField(FieldName.NOTE, note); } // type has been derived from "genre" // has to be done before label generation as label generation is dependent on entry type e.setType(type); // autogenerate label (BibTeX key) if (JabRefGUI.getMainFrame() != null) { // only possible in GUI mode BibtexKeyPatternUtil.makeAndSetLabel( JabRefGUI.getMainFrame().getCurrentBasePanel().getBibDatabaseContext().getMetaData() .getCiteKeyPattern(importFormatPreferences.getBibtexKeyPatternPreferences().getKeyPattern()), JabRefGUI.getMainFrame().getCurrentBasePanel().getDatabase(), e, importFormatPreferences.getBibtexKeyPatternPreferences()); } res.add(e); } parser.next(); } parser.close(); } catch (IOException | XMLStreamException ex) { LOGGER.warn("Could not parse", ex); return new ParserResult(); } return new ParserResult(res); } @Override public String getName() { return "text citations"; } @Override public FileExtensions getExtensions() { return FileExtensions.FREECITE; } @Override public String getDescription() { return "This importer parses text format citations using the online API of FreeCite."; } }