/** * */ package org.jabref.logic.importer.fileformat; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.stream.Collectors; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.jabref.logic.importer.Importer; import org.jabref.logic.importer.ParserResult; import org.jabref.logic.util.FileExtensions; import org.jabref.model.database.BibDatabase; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.FieldName; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; /** * * */ public class MrDLibImporter extends Importer { private static final Log LOGGER = LogFactory.getLog(MrDLibImporter.class); public ParserResult parserResult; @Override public boolean isRecognizedFormat(BufferedReader input) throws IOException { String recommendationsAsString = convertToString(input); // check for valid format try { SAXParserFactory factory = SAXParserFactory.newInstance(); SAXParser saxParser = factory.newSAXParser(); DefaultHandler handler = new DefaultHandler() { // No Processing here. Just check for valid xml. // Later here will be the check against the XML schema. }; try (InputStream stream = new ByteArrayInputStream(recommendationsAsString.getBytes())) { saxParser.parse(stream, handler); } catch (Exception e) { LOGGER.error(e.getMessage(), e); return false; } } catch (ParserConfigurationException | SAXException e) { LOGGER.error(e.getMessage(), e); return false; } return true; } @Override public ParserResult importDatabase(BufferedReader input) throws IOException { parse(input); return parserResult; } @Override public String getName() { return "MrDLibImporter"; } @Override public FileExtensions getExtensions() { return FileExtensions.XML; } @Override public String getDescription() { return "Takes valid xml documents. Parses from MrDLib API a BibEntry"; } /** * The SaxParser needs this String. So I convert it here. * @param Takes a BufferedReader with a reference to the XML document delivered by mdl server. * @return Returns an String containing the XML file. * @throws IOException */ private String convertToString(BufferedReader input) throws IOException { String line; StringBuilder stringBuilder = new StringBuilder(); try { while ((line = input.readLine()) != null) { stringBuilder.append(line); } } catch (Exception e) { LOGGER.error(e.getMessage()); } return stringBuilder.toString(); } /** * Small pair-class to ensure the right order of the recommendations. */ private class RankedBibEntry { public BibEntry entry; public Integer rank; public RankedBibEntry(BibEntry entry, Integer rank) { this.rank = rank; this.entry = entry; } } /** * Parses the input from the server to a ParserResult * @param input A BufferedReader with a reference to a string with the servers response * @throws IOException */ private void parse(BufferedReader input) throws IOException { // The Bibdatabase that gets returned in the ParserResult. BibDatabase bibDatabase = new BibDatabase(); // The document to parse String recommendations = convertToString(input); // The sorted BibEntries gets stored here later List<BibEntry> bibEntries = new ArrayList<>(); //Parsing the response with a SAX parser try { SAXParserFactory factory = SAXParserFactory.newInstance(); SAXParser saxParser = factory.newSAXParser(); MrDlibImporterHandler handler = new MrDlibImporterHandler(); try (InputStream stream = new ByteArrayInputStream(recommendations.getBytes())) { saxParser.parse(stream, handler); } catch (SAXException e) { LOGGER.error(e.getMessage(), e); } List<RankedBibEntry> rankedBibEntries = handler.getRankedBibEntries(); rankedBibEntries.sort((RankedBibEntry rankedBibEntry1, RankedBibEntry rankedBibEntry2) -> rankedBibEntry1.rank.compareTo(rankedBibEntry2.rank)); bibEntries = rankedBibEntries.stream().map(e -> e.entry).collect(Collectors.toList()); } catch (ParserConfigurationException | SAXException e) { LOGGER.error(e.getMessage(), e); } for (BibEntry bibentry : bibEntries) { bibDatabase.insertEntry(bibentry); } parserResult = new ParserResult(bibDatabase); } public ParserResult getParserResult() { return parserResult; } /** * Handler that parses the response from Mr. DLib to BibEntries */ private class MrDlibImporterHandler extends DefaultHandler { // The list ob BibEntries with its associated rank private final List<RankedBibEntry> rankedBibEntries = new ArrayList<>(); private boolean authors; private boolean published_in; private boolean title; private boolean year; private boolean snippet; private boolean rank; private boolean type; private String htmlSnippetSingle; private int htmlSnippetSingleRank = -1; private BibEntry currentEntry; public List<RankedBibEntry> getRankedBibEntries() { return rankedBibEntries; } @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { switch (qName.toLowerCase(Locale.ROOT)) { case "related_article": currentEntry = new BibEntry(); htmlSnippetSingle = null; htmlSnippetSingleRank = -1; break; case "authors": authors = true; break; case "published_in": published_in = true; break; case "title": title = true; break; case "year": year = true; break; case "type": type = true; break; case "suggested_rank": rank = true; break; default: break; } if (qName.equalsIgnoreCase("snippet") && attributes.getValue(0).equalsIgnoreCase("html_fully_formatted")) { snippet = true; } } @Override public void endElement(String uri, String localName, String qName) throws SAXException { if (qName.equalsIgnoreCase("related_article")) { rankedBibEntries.add(new RankedBibEntry(currentEntry, htmlSnippetSingleRank)); currentEntry = new BibEntry(); } } @Override public void characters(char ch[], int start, int length) throws SAXException { if (authors) { currentEntry.setField(FieldName.AUTHOR, new String(ch, start, length)); authors = false; } if (published_in) { currentEntry.setField(FieldName.JOURNAL, new String(ch, start, length)); published_in = false; } if (title) { currentEntry.setField(FieldName.TITLE, new String(ch, start, length)); title = false; } if (year) { currentEntry.setField(FieldName.YEAR, new String(ch, start, length)); year = false; } if (rank) { htmlSnippetSingleRank = Integer.parseInt(new String(ch, start, length)); rank = false; } if (snippet) { currentEntry.setField("html_representation", new String(ch, start, length)); snippet = false; } } } ; }