/* Copyright (C) 2011 Sascha Hunold. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ package net.sf.jabref.imports; import java.util.ArrayList; import java.util.List; import net.sf.jabref.BibtexEntry; public class DBLPHelper { private final DBLPQueryCleaner cleaner = new DBLPQueryCleaner(); /* * This is a small helper class that cleans the user submitted query. Right * now, we cannot search for ":" on dblp.org. So, we remove colons from the * user submitted search string. Also, the search is case sensitive if we * use capitals. So, we better change the text to lower case. */ class DBLPQueryCleaner { public String cleanQuery(final String query) { String cleaned = query; cleaned = cleaned.replaceAll("-", " "); cleaned = cleaned.replaceAll(" ", "%20"); cleaned = cleaned.replaceAll(":", ""); cleaned = cleaned.toLowerCase(); return cleaned; } } /** * * @param a * string with the user query * @return a string with the user query, but compatible with dblp.org */ public String cleanDBLPQuery(String query) { return cleaner.cleanQuery(query); } /** * Takes an HTML file (as String) as input and extracts the bibtex * information. After that, it will convert it into a BibtexEntry and return * it (them). * * @param html * page as String * @return list of BibtexEntry */ public List<BibtexEntry> getBibTexFromPage(final String page) throws DBLPParseException { final List<BibtexEntry> bibtexList = new ArrayList<BibtexEntry>(); final String startPattern = "<pre>"; final String endPattern = "</pre>"; String tmpStr = page; int startIdx = tmpStr.indexOf(startPattern); int endIdx = tmpStr.indexOf(endPattern); // this entry exists for sure String entry1 = tmpStr.substring(startIdx + startPattern.length(), endIdx); entry1 = cleanEntry(entry1); bibtexList.add(BibtexParser.singleFromString(entry1)); //System.out.println("'" + entry1 + "'"); // let's see whether there is another entry (crossref) tmpStr = tmpStr .substring(endIdx + endPattern.length(), tmpStr.length()); startIdx = tmpStr.indexOf(startPattern); if (startIdx != -1) { endIdx = tmpStr.indexOf(endPattern); // this entry exists for sure String entry2 = tmpStr.substring(startIdx + startPattern.length(), endIdx); entry2 = cleanEntry(entry2); bibtexList.add(BibtexParser.singleFromString(entry2)); } return bibtexList; } private String cleanEntry(final String bibEntry) { String retStr = bibEntry .replaceFirst("<a href=\".*\">DBLP</a>", "DBLP"); return retStr; } }