/*
* Copyright (C) 2012 Jan Pokorsky
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package cz.cas.lib.proarc.common.export;
import com.yourmediashelf.fedora.client.FedoraClient;
import com.yourmediashelf.fedora.client.FedoraClientException;
import com.yourmediashelf.fedora.client.response.FedoraResponse;
import com.yourmediashelf.fedora.client.response.ListDatastreamsResponse;
import com.yourmediashelf.fedora.generated.access.DatastreamType;
import cz.cas.lib.proarc.common.fedora.DigitalObjectException;
import cz.cas.lib.proarc.common.fedora.FoxmlUtils;
import cz.cas.lib.proarc.common.fedora.RemoteStorage;
import cz.cas.lib.proarc.common.fedora.RemoteStorage.RemoteObject;
import cz.cas.lib.proarc.common.fedora.relation.RelationEditor;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* Exports particular data streams of queried digital objects.
* It can traverse hierarchy of digital objects.
*
* @author Jan Pokorsky
*/
public final class DataStreamExport {
private static final Logger LOG = Logger.getLogger(DataStreamExport.class.getName());
private RemoteStorage rstorage;
/** already exported PIDs to prevent loops */
private HashSet<String> exportedPids = new HashSet<String>();
/** PIDs scheduled for export */
private Queue<String> toExport = new LinkedList<String>();
private byte[] buffer = new byte[10*1024];
public DataStreamExport(RemoteStorage rstorage) {
this.rstorage = rstorage;
}
public File export(File output, boolean hierarchy, List<String> pids, List<String> dsIds) throws ExportException {
if (!output.exists() || !output.isDirectory()) {
throw new IllegalStateException(String.valueOf(output));
}
if (pids == null || pids.isEmpty() || dsIds == null || dsIds.isEmpty()) {
throw new IllegalArgumentException();
}
File target = ExportUtils.createFolder(output, filename(pids.get(0), dsIds.get(0)));
toExport.addAll(pids);
for (String pid = toExport.poll(); pid != null; pid = toExport.poll()) {
exportPid(target, hierarchy, pid, dsIds);
}
return target;
}
private void exportPid(File target, boolean hierarchy, String pid, List<String> dsIds) throws ExportException {
if (exportedPids.contains(pid)) {
return ;
}
exportedPids.add(pid);
RemoteObject remote = rstorage.find(pid);
try {
dsIds = filterDataStreams(dsIds, getDataStreams(remote));
} catch (FedoraClientException ex) {
throw new ExportException(pid, ex);
}
if (hierarchy) {
try {
RelationEditor relationEditor = new RelationEditor(remote);
List<String> members = relationEditor.getMembers();
toExport.addAll(members);
} catch (DigitalObjectException ex) {
throw new ExportException(ex);
}
}
for (String dsId : dsIds) {
try {
exportPid(target, remote, dsId);
} catch (FedoraClientException ex) {
throw new ExportException(filename(pid, dsId), ex);
} catch (IOException ex) {
throw new ExportException(filename(pid, dsId), ex);
}
}
}
private void exportPid(File target, RemoteObject remote, String dsId) throws FedoraClientException, IOException {
InputStream input = getDataStreamDissemination(remote, dsId);
File f = new File(target, filename(remote.getPid(), dsId));
boolean done = false;
try {
FileOutputStream output = new FileOutputStream(f);
try {
copy(input, output);
done = true;
} finally {
try {
output.close();
} catch (IOException ex) {
if (done) {
throw ex;
} else {
LOG.log(Level.SEVERE, f.toString(), ex);
}
}
}
} finally {
try {
input.close();
} catch (IOException ex) {
if (done) {
throw ex;
} else {
LOG.log(Level.SEVERE, f.toString(), ex);
}
}
}
}
private InputStream getDataStreamDissemination(RemoteObject remote, String dsId) throws FedoraClientException {
FedoraResponse response = FedoraClient.getDatastreamDissemination(remote.getPid(), dsId)
.execute(remote.getClient());
InputStream input = response.getEntityInputStream();
return input;
}
private List<DatastreamType> getDataStreams(RemoteObject remote) throws FedoraClientException {
ListDatastreamsResponse response = FedoraClient.listDatastreams(remote.getPid())
.execute(remote.getClient());
return response.getDatastreams();
}
/**
* FedoraClient.getDatastreamDissemination throws HTTP 404 in case of
* undefined streams.
*/
private List<String> filterDataStreams(List<String> dsId, List<DatastreamType> streams) {
ArrayList<String> result = new ArrayList<String>(dsId.size());
for (DatastreamType stream : streams) {
if (dsId.contains(stream.getDsid())) {
result.add(stream.getDsid());
if (dsId.size() == result.size()) {
break;
}
}
}
return result;
}
private void copy(InputStream is, OutputStream os) throws IOException {
for (int length = 0; (length = is.read(buffer)) > 0; ) {
os.write(buffer, 0, length);
}
os.close();
}
static String filename(String pid, String dsId) {
return FoxmlUtils.pidAsUuid(pid) + '.' + dsId;
}
}