/* Copyright (C) 2012 JabRef contributors. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ package net.sf.jabref.imports; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.SocketException; import java.net.URL; import java.net.URLConnection; import java.net.URLEncoder; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.List; import java.util.Scanner; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamConstants; import javax.xml.stream.XMLStreamReader; import net.sf.jabref.BibtexEntry; import net.sf.jabref.BibtexEntryType; import net.sf.jabref.Globals; import net.sf.jabref.JabRef; import net.sf.jabref.OutputPrinter; import net.sf.jabref.labelPattern.LabelPatternUtil; /** * This importer parses text format citations using the online API of FreeCite - * Open Source Citation Parser http://freecite.library.brown.edu/ */ public class FreeCiteImporter extends ImportFormat { @Override public boolean isRecognizedFormat(InputStream in) throws IOException { // TODO: We don't know how to recognize text files, therefore we return // "false" return false; } @Override public List<BibtexEntry> importEntries(InputStream in, OutputPrinter status) throws IOException { String text = new Scanner(in).useDelimiter("\\A").next(); return importEntries(text, status); } public List<BibtexEntry> importEntries(String text, OutputPrinter status) { // URLencode the string for transmission String urlencodedCitation = null; try { urlencodedCitation = URLEncoder.encode(text, "UTF-8"); } catch (UnsupportedEncodingException e) { // e.printStackTrace(); } String data = "citation=" + urlencodedCitation; // Send the request URL url; URLConnection conn; try { url = new URL("http://freecite.library.brown.edu/citations/create"); conn = url.openConnection(); } catch (MalformedURLException e) { e.printStackTrace(); return null; } catch (IOException e) { e.printStackTrace(); return null; } try { conn.setRequestProperty("accept", "text/xml"); conn.setDoOutput(true); OutputStreamWriter writer = new OutputStreamWriter(conn.getOutputStream()); // write parameters writer.write(data); writer.flush(); } catch (IOException e) { status.showMessage(Globals.lang("Unable to connect to freecite online service.")); return null; } catch (Exception ex) { ex.printStackTrace(); return null; } // output is in conn.getInputStream(); // new InputStreamReader(conn.getInputStream()) List<BibtexEntry> res = new ArrayList<BibtexEntry>(); XMLInputFactory factory = XMLInputFactory.newInstance(); try { XMLStreamReader parser = factory.createXMLStreamReader(conn.getInputStream()); while (parser.hasNext()) { if ((parser.getEventType() == XMLStreamConstants.START_ELEMENT) && (parser.getLocalName().equals("citation"))) { parser.nextTag(); StringBuilder noteSB = new StringBuilder(); BibtexEntry e = new BibtexEntry(); // fallback type BibtexEntryType type = BibtexEntryType.INPROCEEDINGS; while (! ( parser.getEventType() == XMLStreamConstants.END_ELEMENT && parser.getLocalName().equals("citation"))) { if (parser.getEventType() == XMLStreamConstants.START_ELEMENT) { String ln = parser.getLocalName(); if (ln.equals("authors")) { StringBuilder sb = new StringBuilder(); parser.nextTag(); while (parser.getEventType() == XMLStreamConstants.START_ELEMENT) { // author is directly nested below authors assert (parser.getLocalName() .equals("author")); String author = parser.getElementText(); if (sb.length() == 0) { // first author sb.append(author); } else { sb.append(" and "); sb.append(author); } assert(parser.getEventType() == XMLStreamConstants.END_ELEMENT); assert(parser.getLocalName().equals("author")); parser.nextTag(); // current tag is either begin:author or // end:authors } e.setField("author", sb.toString()); } else if (ln.equals("journal")) { // we guess that the entry is a journal // the alternative way is to parse // ctx:context-objects / ctx:context-object / ctx:referent / ctx:metadata-by-val / ctx:metadata / journal / rft:genre // the drawback is that ctx:context-objects is NOT nested in citation, but a separate element // we would have to change the whole parser to parse that format. type = BibtexEntryType.ARTICLE; e.setField(ln, parser.getElementText()); } else if (ln.equals("tech")) { type = BibtexEntryType.TECHREPORT; // the content of the "tech" field seems to contain the number of the technical report e.setField("number", parser.getElementText()); } else if ( ln.equals("doi") || ln.equals("institution") || ln.equals("location") || ln.equals("number") || ln.equals("note") || ln.equals("title") || ln.equals("pages") || ln.equals("publisher") || ln.equals("volume") || ln.equals("year")) { e.setField(ln, parser.getElementText()); } else if (ln.equals("booktitle")) { String booktitle = parser.getElementText(); if (booktitle.startsWith("In ")) { // special treatment for parsing of // "In proceedings of..." references booktitle = booktitle.substring(3); } e.setField("booktitle", booktitle); } else if (ln.equals("raw_string")) { // raw input string is ignored } else { // all other tags are stored as note noteSB.append(ln); noteSB.append(":"); noteSB.append(parser.getElementText()); noteSB.append(Globals.NEWLINE); } } parser.next(); } if (noteSB.length() > 0) { String note = e.getField("note"); if (note != null) { // "note" could have been set during the parsing as FreeCite also returns "note" note = note.concat(Globals.NEWLINE).concat(noteSB.toString()); } else { note = noteSB.toString(); } e.setField("note", note); } // type has been derived from "genre" // has to be done before label generation as label generation is dependent on entry type e.setType(type); // autogenerate label (BibTeX key) e = LabelPatternUtil.makeLabel(JabRef.jrf.basePanel().metaData(), JabRef.jrf.basePanel().database(), e); res.add(e); } parser.next(); } parser.close(); } catch (Exception ex) { ex.printStackTrace(); return null; } return res; } @Override public String getFormatName() { return "text citations"; } }