/* See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * Esri Inc. licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.esri.gpt.control.webharvest.client.oai; import com.esri.gpt.control.webharvest.IterationContext; import com.esri.gpt.framework.http.HttpClientRequest; import com.esri.gpt.framework.http.XmlHandler; import com.esri.gpt.framework.resource.api.Publishable; import com.esri.gpt.framework.resource.api.Resource; import com.esri.gpt.framework.resource.api.SourceUri; import com.esri.gpt.framework.resource.common.CommonPublishable; import com.esri.gpt.framework.resource.common.StringUri; import com.esri.gpt.framework.resource.query.Criteria; import com.esri.gpt.framework.util.ReadOnlyIterator; import com.esri.gpt.framework.util.Val; import com.esri.gpt.framework.xml.NodeListAdapter; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Iterator; import java.util.NoSuchElementException; import java.util.logging.Logger; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; /** * OAI folders. */ class OaiFolders implements Iterable<Resource> { /** logger */ private static final Logger LOGGER = Logger.getLogger(OaiFolders.class.getCanonicalName()); /** Date format. */ private static final SimpleDateFormat DF = new SimpleDateFormat("yyyy-MM-dd"); /** iteration context */ private IterationContext context; /** service info */ private OaiInfo info; /** service proxy */ private OaiProxy proxy; /** query criteria */ private Criteria criteria; /** * Creates new instance of folders. * @param context iteration context * @param info service info * @param proxy service proxy * @param criteria query criteria */ public OaiFolders(IterationContext context, OaiInfo info, OaiProxy proxy, Criteria criteria) { if (context == null) throw new IllegalArgumentException("No context provided."); if (info == null) throw new IllegalArgumentException("No info provided."); if (proxy == null) throw new IllegalArgumentException("No proxy provided."); this.context = context; this.info = info; this.proxy = proxy; this.criteria = criteria; } @Override public Iterator<Resource> iterator() { return new OaiFolderIterator(); } /** * OAI folders iterator. */ private class OaiFolderIterator extends ReadOnlyIterator<Resource> { /** resumption token */ private String resumptionToken = null; /** next records */ private Iterable<Resource> nextOaiRecords = null; /** records counter */ private int recs; /** no more records*/ private boolean noMore; @Override public boolean hasNext() { if (!noMore && nextOaiRecords == null) { if (resumptionToken == null || resumptionToken.length() > 0) { try { advanceToNextRecords(); } catch (IOException ex) { noMore = true; context.onIterationException(ex); } } else { noMore = true; } } return !noMore; } @Override public Resource next() { if (!hasNext()) { throw new NoSuchElementException(); } final Iterable<Resource> records = nextOaiRecords; nextOaiRecords = null; return new Resource() { @Override public Iterable<Resource> getNodes() { return records; } }; } /** * Advances to the next set of records. * @throws IOException if advancing fails */ private void advanceToNextRecords() throws IOException { LOGGER.finer("Advancing to the next group of records."); try { HttpClientRequest cr = new HttpClientRequest(); cr.setUrl(info.newListIdsUrl(resumptionToken, criteria.getFromDate(), criteria.getToDate())); XmlHandler sh = new XmlHandler(false); cr.setContentHandler(sh); cr.setCredentialProvider(info.newCredentialProvider()); cr.execute(); Document doc = sh.getDocument(); XPath xPath = XPathFactory.newInstance().newXPath(); NodeList nodeList = (NodeList) xPath.evaluate("/OAI-PMH/ListIdentifiers/header", doc, XPathConstants.NODESET); boolean maxReached = false; ArrayList<Resource> resources = new ArrayList<Resource>(); for (Node nd : new NodeListAdapter(nodeList)) { recs++; maxReached = criteria!=null && criteria.getMaxRecords()!=null && recs>criteria.getMaxRecords(); if (maxReached) break; final String id = (String) xPath.evaluate("identifier/text()", nd, XPathConstants.STRING); Publishable publishable = new CommonPublishable() { private StringUri uri = new StringUri(id); @Override public SourceUri getSourceUri() { return uri; } @Override public String getContent() throws IOException { return proxy.read(id); } }; resources.add(publishable); } nextOaiRecords = resources; if (!maxReached) { resumptionToken = Val.chkStr((String) xPath.evaluate( "/OAI-PMH/ListIdentifiers/resumptionToken/text()", doc, XPathConstants.STRING)); } else { resumptionToken = ""; // this is to stop advancing } } catch (XPathExpressionException ex) { throw new IOException("Error accessing metadata. Cause: " + ex.getMessage()); } } } }