package de.geeksfactory.opacclient.apis;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.utils.URLEncodedUtils;
import org.apache.http.message.BasicNameValuePair;
import org.json.JSONException;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import de.geeksfactory.opacclient.networking.HttpClientFactory;
import de.geeksfactory.opacclient.objects.Account;
import de.geeksfactory.opacclient.objects.AccountData;
import de.geeksfactory.opacclient.objects.Copy;
import de.geeksfactory.opacclient.objects.Detail;
import de.geeksfactory.opacclient.objects.DetailedItem;
import de.geeksfactory.opacclient.objects.Filter;
import de.geeksfactory.opacclient.objects.Library;
import de.geeksfactory.opacclient.objects.SearchRequestResult;
import de.geeksfactory.opacclient.objects.SearchResult;
import de.geeksfactory.opacclient.objects.Volume;
import de.geeksfactory.opacclient.searchfields.DropdownSearchField;
import de.geeksfactory.opacclient.searchfields.SearchField;
import de.geeksfactory.opacclient.searchfields.SearchQuery;
import de.geeksfactory.opacclient.searchfields.TextSearchField;
public class VuFind extends BaseApi {
protected static final Pattern idPattern = Pattern.compile("\\/Record\\/([^/]+)");
protected static HashMap<String, String> languageCodes = new HashMap<>();
protected static HashMap<String, SearchResult.MediaType> mediaTypeSelectors = new HashMap<>();
static {
languageCodes.put("de", "de");
languageCodes.put("en", "en");
languageCodes.put("el", "el");
languageCodes.put("es", "es");
languageCodes.put("it", "it");
languageCodes.put("fr", "fr");
languageCodes.put("da", "da");
mediaTypeSelectors
.put(".cd, .audio, .musicrecording, .record, .soundrecordingmedium, " +
".soundrecording",
SearchResult.MediaType.CD_MUSIC);
mediaTypeSelectors.put(".audiotape, .cassette, .soundcassette",
SearchResult.MediaType.AUDIO_CASSETTE);
mediaTypeSelectors.put(".dvdaudio, .sounddisc", SearchResult.MediaType.CD_MUSIC);
mediaTypeSelectors.put(".dvd, .dvdvideo", SearchResult.MediaType.DVD);
mediaTypeSelectors.put(".blueraydisc, .bluraydisc", SearchResult.MediaType.BLURAY);
mediaTypeSelectors.put(".ebook", SearchResult.MediaType.EBOOK);
mediaTypeSelectors.put(".map, .globe, .atlas", SearchResult.MediaType.MAP);
mediaTypeSelectors
.put(".slide, .photo, .artprint, .collage, .drawing, .flashcard, .painting, " +
".photonegative, .placard, .print, .sensorimage, .transparency",
SearchResult.MediaType.ART);
mediaTypeSelectors.put(
".microfilm, .video, .videodisc, .vhs, .video, .videotape, .videocassette, " +
".videocartridge, .audiovisualmedia, .filmstrip, .motionpicture, " +
".videoreel",
SearchResult.MediaType.MOVIE);
mediaTypeSelectors.put(".kit, .sets", SearchResult.MediaType.PACKAGE);
mediaTypeSelectors.put(".musicalscore, .notatedmusic, .electronicmusicalscore",
SearchResult.MediaType.SCORE_MUSIC);
mediaTypeSelectors.put(".manuscript, .book, .articles", SearchResult.MediaType.BOOK);
mediaTypeSelectors
.put(".journal, .journalnewspaper, .serial", SearchResult.MediaType.MAGAZINE);
mediaTypeSelectors.put(".newspaper, .newspaperarticle", SearchResult.MediaType.NEWSPAPER);
mediaTypeSelectors.put(".software, .cdrom, .chipcartridge, .disccartridge, .dvdrom",
SearchResult.MediaType.CD_SOFTWARE);
mediaTypeSelectors.put(".newspaper", SearchResult.MediaType.NEWSPAPER);
mediaTypeSelectors
.put(".electronicnewspaper, .electronic, .electronicarticle, " +
"electronicresourcedatacarrier, .electronicresourceremoteaccess, " +
".electronicserial, .electronicjournal, .electronicthesis",
SearchResult.MediaType.EDOC);
mediaTypeSelectors.put(".newspaper", SearchResult.MediaType.NEWSPAPER);
mediaTypeSelectors.put(".newspaper", SearchResult.MediaType.NEWSPAPER);
mediaTypeSelectors.put(".unknown", SearchResult.MediaType.UNKNOWN);
}
protected String languageCode = "en";
protected String opac_url = "";
protected JSONObject data;
protected List<SearchQuery> last_query;
@Override
public void init(Library lib, HttpClientFactory httpClientFactory) {
super.init(lib, httpClientFactory);
this.library = lib;
this.data = lib.getData();
try {
this.opac_url = data.getString("baseurl");
} catch (JSONException e) {
throw new RuntimeException(e);
}
}
protected List<NameValuePair> buildSearchParams(List<SearchQuery> query) {
List<NameValuePair> params = new ArrayList<>();
params.add(new BasicNameValuePair("sort", "relevance"));
params.add(new BasicNameValuePair("join", "AND"));
for (SearchQuery singleQuery : query) {
if (singleQuery.getValue().equals("")) continue;
if (singleQuery.getKey().contains("filter[]")) {
params.add(new BasicNameValuePair("filter[]", singleQuery.getValue()));
} else {
params.add(new BasicNameValuePair("type0[]", singleQuery.getKey()));
params.add(new BasicNameValuePair("bool0[]", "AND"));
params.add(new BasicNameValuePair("lookfor0[]", singleQuery.getValue()));
}
}
return params;
}
@Override
public SearchRequestResult search(List<SearchQuery> query)
throws IOException, OpacErrorException, JSONException {
if (!initialised) start();
last_query = query;
String html = httpGet(opac_url + "/Search/Results" +
buildHttpGetParams(buildSearchParams(query)),
getDefaultEncoding());
Document doc = Jsoup.parse(html);
return parse_search(doc, 1);
}
protected SearchRequestResult parse_search(Document doc, int page) throws OpacErrorException {
doc.setBaseUri(opac_url + "/Search/Results");
if (doc.select("p.error, p.errorMsg, .alert-error").size() > 0) {
throw new OpacErrorException(doc.select("p.error, p.errorMsg, .alert-error").text());
} else if (doc.select("div.result").size() == 0 && doc.select(".main p").size() > 0) {
throw new OpacErrorException(doc.select(".main p").first().text());
}
int rescount = -1;
if (doc.select(".resulthead").size() == 1) {
rescount = Integer.parseInt(
doc.select(".resulthead strong").get(2).text());
}
List<SearchResult> reslist = new ArrayList<>();
for (Element row : doc.select("div.result")) {
SearchResult res = new SearchResult();
Element z3988el = null;
if (row.select("span.Z3988").size() == 1) {
z3988el = row.select("span.3988").first();
} else if (row.parent().tagName().equals("li") &&
row.parent().select("span.Z3988").size() > 0) {
z3988el = row.parent().select("span.3988").first();
}
if (z3988el != null) {
List<NameValuePair> z3988data;
try {
StringBuilder description = new StringBuilder();
z3988data = URLEncodedUtils.parse(new URI("http://dummy/?"
+ z3988el.select("span.Z3988").attr("title")), "UTF-8");
for (NameValuePair nv : z3988data) {
if (nv.getValue() != null) {
if (!nv.getValue().trim().equals("")) {
if (nv.getName().equals("rft.btitle")) {
description.append("<b>").append(nv.getValue()).append("</b>");
} else if (nv.getName().equals("rft.atitle")) {
description.append("<b>").append(nv.getValue()).append("</b>");
} else if (nv.getName().equals("rft.au")) {
description.append("<br />").append(nv.getValue());
} else if (nv.getName().equals("rft.date")) {
description.append("<br />").append(nv.getValue());
}
}
}
}
res.setInnerhtml(description.toString());
} catch (URISyntaxException e) {
e.printStackTrace();
}
} else {
res.setInnerhtml(row.select("a.title").text());
}
if (row.hasClass("available") || row.hasClass("internet")) {
res.setStatus(SearchResult.Status.GREEN);
} else if (row.hasClass("reservable")) {
res.setStatus(SearchResult.Status.YELLOW);
} else if (row.hasClass("not-available")) {
res.setStatus(SearchResult.Status.RED);
} else if (row.select(".status.available").size() > 0) {
res.setStatus(SearchResult.Status.GREEN);
} else if (row.select(".status .label-success").size() > 0) {
res.setStatus(SearchResult.Status.GREEN);
} else if (row.select(".status .label-important").size() > 0) {
res.setStatus(SearchResult.Status.RED);
} else if (row.select(".status.checkedout").size() > 0) {
res.setStatus(SearchResult.Status.RED);
}
for (Map.Entry<String, SearchResult.MediaType> entry : mediaTypeSelectors.entrySet()) {
if (row.select(entry.getKey()).size() > 0) {
res.setType(entry.getValue());
break;
}
}
for (Element img : row.select("img")) {
String src = img.absUrl("src");
if (src.contains("over")) {
if (!src.contains("Unavailable")) {
res.setCover(src);
}
break;
}
}
res.setPage(page);
String href = row.select("a.title").first().absUrl("href");
try {
URL idurl = new URL(href);
String path = idurl.getPath();
Matcher matcher = idPattern.matcher(path);
if (matcher.find()) {
res.setId(matcher.group(1));
}
} catch (MalformedURLException e) {
e.printStackTrace();
}
reslist.add(res);
}
return new SearchRequestResult(reslist, rescount, page);
}
@Override
public SearchRequestResult filterResults(Filter filter, Filter.Option option)
throws IOException, OpacErrorException {
return null;
}
@Override
public SearchRequestResult searchGetPage(int page)
throws IOException, OpacErrorException, JSONException {
List<NameValuePair> params = buildSearchParams(last_query);
params.add(new BasicNameValuePair("page", String.valueOf(page)));
String html = httpGet(opac_url + "/Search/Results" +
buildHttpGetParams(params),
getDefaultEncoding());
Document doc = Jsoup.parse(html);
return parse_search(doc, page);
}
@Override
public DetailedItem getResultById(String id, String homebranch)
throws IOException, OpacErrorException {
if (!initialised) start();
String url = opac_url + "/Record/" + id;
String html = httpGet(url, getDefaultEncoding());
Document doc = Jsoup.parse(html);
doc.setBaseUri(url);
try {
return parseDetail(id, doc, data);
} catch (JSONException e) {
throw new RuntimeException(e);
}
}
static DetailedItem parseDetail(String id, Document doc, JSONObject data)
throws OpacErrorException, JSONException {
if (doc.select("p.error, p.errorMsg, .alert-error").size() > 0) {
throw new OpacErrorException(doc.select("p.error, p.errorMsg, .alert-error").text());
}
DetailedItem res = new DetailedItem();
res.setId(id);
Elements title = doc.select(".record h1, .record [itemprop=name], .record [property=name]");
if (title.size() > 0) {
res.setTitle(title.first().text());
}
for (Element img : doc.select(".record img, #cover img")) {
String src = img.absUrl("src");
if (src.contains("over")) {
if (!src.contains("Unavailable")) {
res.setCover(src);
}
break;
}
}
for (Element tr : doc.select(".record table").first().select("tr")) {
String text = tr.child(1).text();
if (tr.child(1).select("a").size() > 0) {
String href = tr.child(1).select("a").attr("href");
if (!href.startsWith("/") && !text.contains(data.getString("baseurl"))) {
text += " " + href;
}
}
res.addDetail(new Detail(tr.child(0).text(), text));
}
try {
if (doc.select("#Volumes").size() > 0) {
parseVolumes(res, doc, data);
} else {
parseCopies(res, doc, data);
}
} catch (JSONException e) {
e.printStackTrace();
}
return res;
}
private static void parseVolumes(DetailedItem res, Document doc, JSONObject data) {
// only tested in Münster
// e.g. https://www.stadt-muenster.de/opac2/Record/0900944
Element table = doc.select(".recordsubcontent, .tab-container").first()
.select("table").first();
for (Element link : table.select("tr a")) {
Volume volume = new Volume();
Matcher matcher = idPattern.matcher(link.attr("href"));
if (matcher.find()) volume.setId(matcher.group(1));
volume.setTitle(link.text());
res.addVolume(volume);
}
}
static void parseCopies(DetailedItem res, Document doc, JSONObject data) throws JSONException {
if ("doublestacked".equals(data.optString("copystyle"))) {
// e.g. http://vopac.nlg.gr/Record/393668/Holdings#tabnav
// for Athens_GreekNationalLibrary
Element container = doc.select(".tab-container").first();
String branch = "";
for (Element child : container.children()) {
if (child.tagName().equals("h5")) {
branch = child.text();
} else if (child.tagName().equals("table")) {
int i = 0;
String callNumber = "";
for (Element row : child.select("tr")) {
if (i == 0) {
callNumber = row.child(1).text();
} else {
Copy copy = new Copy();
copy.setBranch(branch);
copy.setShelfmark(callNumber);
copy.setBarcode(row.child(0).text());
copy.setStatus(row.child(1).text());
res.addCopy(copy);
}
i++;
}
}
}
} else if ("stackedtable".equals(data.optString("copystyle"))) {
// e.g. http://search.lib.auth.gr/Record/376356
// or https://katalog.ub.uni-leipzig.de/Record/0000196115
// or https://www.stadt-muenster.de/opac2/Record/0367968
Element container = doc.select(".recordsubcontent, .tab-container").first();
// .tab-container is used in Muenster.
String branch = "";
JSONObject copytable = data.getJSONObject("copytable");
for (Element child : container.children()) {
if (child.tagName().equals("div")) {
child = child.child(0);
}
if (child.tagName().equals("h3")) {
branch = child.text();
} else if (child.tagName().equals("table")) {
if (child.select("caption").size() > 0) {
// Leipzig_Uni
branch = child.select("caption").first().ownText();
}
int i = 0;
String callNumber = null;
if ("headrow".equals(copytable.optString("signature"))) {
callNumber = child.select("tr").get(0).child(1).text();
}
for (Element row : child.select("tr")) {
if (i < copytable.optInt("_offset", 0)) {
i++;
continue;
}
Copy copy = new Copy();
if (callNumber != null) {
copy.setShelfmark(callNumber);
}
copy.setBranch(branch);
Iterator<?> keys = copytable.keys();
while (keys.hasNext()) {
String key = (String) keys.next();
if (key.startsWith("_")) continue;
if (copytable.optString(key, "").contains("/")) {
// Leipzig_Uni
String[] splitted = copytable.getString(key).split("/");
int col = Integer.parseInt(splitted[0]);
int line = Integer.parseInt(splitted[1]);
int j = 0;
for (Node node : row.child(col).childNodes()) {
if (node instanceof Element) {
if (((Element) node).tagName().equals("br")) {
j++;
} else if (j == line) {
copy.set(key, ((Element) node).text());
}
} else if (node instanceof TextNode && j == line &&
!((TextNode) node).text().trim().equals("")) {
copy.set(key, ((TextNode) node).text());
}
}
} else {
// Thessaloniki_University
if (copytable.optInt(key, -1) == -1) continue;
String value = row.child(copytable.getInt(key)).text();
copy.set(key, value);
}
}
res.addCopy(copy);
i++;
}
}
}
}
}
@Override
public DetailedItem getResult(int position) throws IOException, OpacErrorException {
return null;
}
public void start() throws IOException {
super.start();
List<NameValuePair> params = new ArrayList<>();
params.add(new BasicNameValuePair("mylang", languageCode));
httpPost(opac_url + "/Search/Advanced", new UrlEncodedFormEntity(params),
getDefaultEncoding());
}
@Override
public List<SearchField> parseSearchFields()
throws IOException, OpacErrorException, JSONException {
start();
String html = httpGet(opac_url + "/Search/Advanced?mylang = " + languageCode,
getDefaultEncoding());
Document doc = Jsoup.parse(html);
List<SearchField> fields = new ArrayList<>();
Elements options = doc.select("select#search_type0_0 option");
for (Element option : options) {
TextSearchField field = new TextSearchField();
field.setDisplayName(option.text());
field.setId(option.val());
field.setHint("");
field.setData(new JSONObject());
field.getData().put("meaning", option.val());
fields.add(field);
}
if (fields.size() == 0) {
// Weird JavaScript, e.g. view-source:http://vopac.nlg.gr/Search/Advanced
Pattern pattern_key = Pattern
.compile("searchFields\\[\"([^\"]+)\"\\] = \"([^\"]+)\";");
for (Element script : doc.select("script")) {
if (!script.html().contains("searchFields")) continue;
for (String line : script.html().split("\n")) {
Matcher matcher = pattern_key.matcher(line);
if (matcher.find()) {
TextSearchField field = new TextSearchField();
field.setDisplayName(matcher.group(2));
field.setId(matcher.group(1));
field.setHint("");
field.setData(new JSONObject());
field.getData().put("meaning", field.getId());
fields.add(field);
}
}
}
}
Elements selects = doc.select("select");
for (Element select : selects) {
if (!select.attr("name").equals("filter[]")) continue;
DropdownSearchField field = new DropdownSearchField();
if (select.parent().select("label").size() > 0) {
field.setDisplayName(select.parent().select("label").first()
.text());
}
field.setId(select.attr("name") + select.attr("id"));
List<Map<String, String>> dropdownOptions = new ArrayList<>();
String meaning = select.attr("id");
field.addDropdownValue("", "");
for (Element option : select.select("option")) {
if (option.val().contains(":")) {
meaning = option.val().split(":")[0];
}
field.addDropdownValue(option.val(), option.text());
}
field.setData(new JSONObject());
field.getData().put("meaning", meaning);
fields.add(field);
}
return fields;
}
@Override
public String getShareUrl(String id, String title) {
return opac_url + "/Record/" + id;
}
@Override
public int getSupportFlags() {
return SUPPORT_FLAG_ENDLESS_SCROLLING | SUPPORT_FLAG_CHANGE_ACCOUNT;
}
@Override
public Set<String> getSupportedLanguages() throws IOException {
Set<String> langs = new HashSet<>();
String html = httpGet(opac_url + "/Search/Advanced",
getDefaultEncoding());
Document doc = Jsoup.parse(html);
if (doc.select("select[name=mylang]").size() > 0) {
for (Element opt : doc.select("select[name=mylang] option")) {
if (languageCodes.containsValue(opt.val())) {
for (Map.Entry<String, String> lc : languageCodes.entrySet()) {
if (lc.getValue().equals(opt.val())) {
langs.add(lc.getKey());
break;
}
}
} else {
langs.add(opt.val());
}
}
}
return langs;
}
protected String getDefaultEncoding() {
return "UTF-8";
}
@Override
public void setLanguage(String language) {
languageCode = languageCodes.containsKey(language) ? languageCodes.get(language) : language;
}
@Override
public ReservationResult reservation(DetailedItem item, Account account,
int useraction, String selection) throws IOException {
return null;
}
@Override
public ProlongResult prolong(String media, Account account, int useraction,
String selection) throws IOException {
return null;
}
@Override
public ProlongAllResult prolongAll(Account account, int useraction, String selection)
throws IOException {
return null;
}
@Override
public CancelResult cancel(String media, Account account, int useraction,
String selection) throws IOException, OpacErrorException {
return null;
}
@Override
public AccountData account(Account account)
throws IOException, JSONException, OpacErrorException {
return null;
}
@Override
public void checkAccountData(Account account)
throws IOException, JSONException, OpacErrorException {
}
}