package fr.acxio.tools.agia.alfresco;
/*
* Copyright 2014 Acxio
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayDeque;
import java.util.Arrays;
import java.util.Deque;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.namespace.NamespaceContext;
import org.alfresco.webservice.repository.RepositoryServiceSoapBindingStub;
import org.alfresco.webservice.types.NamedValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.batch.core.ExitStatus;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.StepExecutionListener;
import org.springframework.batch.item.ExecutionContext;
import org.springframework.batch.item.ItemReader;
import org.springframework.batch.item.ItemStream;
import org.springframework.batch.item.ItemStreamException;
import org.springframework.batch.item.NonTransientResourceException;
import org.springframework.batch.item.ParseException;
import org.springframework.batch.item.UnexpectedInputException;
import org.springframework.beans.factory.DisposableBean;
import org.springframework.beans.factory.InitializingBean;
import com.googlecode.sardine.DavResource;
import com.googlecode.sardine.Sardine;
import fr.acxio.tools.agia.alfresco.domain.Aspect;
import fr.acxio.tools.agia.alfresco.domain.Document;
import fr.acxio.tools.agia.alfresco.domain.Folder;
import fr.acxio.tools.agia.alfresco.domain.Node;
import fr.acxio.tools.agia.alfresco.domain.Property;
import fr.acxio.tools.agia.alfresco.domain.QName;
public class AlfrescoNodeReader extends AlfrescoServicesConsumer implements ItemReader<Node>, ItemStream, StepExecutionListener, InitializingBean,
DisposableBean {
private static final Logger LOGGER = LoggerFactory.getLogger(AlfrescoNodeReader.class);
private static final Pattern PATH_EXTRACT_PATTERN = Pattern.compile("^(?:(.*/)[^/]*/|(?:(.*/))?[^/]*)$");
private static final Pattern CMCONTENT_PATTERN = Pattern.compile("([^|=]+)=([^|=]+)");
private static final String CONTEXT_KEY_CURRENTINDEXES = "alfresco.reader.currentIndexdes";
private static final String CONTEXT_KEY_CURRENTPATH = "alfresco.reader.currentPath";
private static final String SUBPROP_ENCODING = "encoding";
private static final String SUBPROP_CONTENT_URL = "contentUrl";
private static final String SUBPROP_MIMETYPE = "mimetype";
private static final String PROP_CM_CONTENT = "cm:content";
private static final String WEBDAV_PATH = "webdav";
private NamespaceContext namespaceContext;
private DavResourcesResolver davResourcesResolver;
private String path;
private String currentDirPath;
private Deque<Integer> currentIndexes;
private Sardine sardine;
private URI baseURI;
public void setNamespaceContext(NamespaceContext sNamespaceContext) {
namespaceContext = sNamespaceContext;
}
public void setDavResourcesResolver(DavResourcesResolver sDavResourcesResolver) {
davResourcesResolver = sDavResourcesResolver;
}
public void setPath(String sPath) {
path = sPath;
}
@Override
public void destroy() throws Exception {
currentDirPath = null;
currentIndexes = null;
sardine = null;
baseURI = null;
}
@Override
public void afterPropertiesSet() throws Exception {
// TODO Add properties checks
}
@Override
public void beforeStep(StepExecution sStepExecution) {
// Nothing to do
}
@Override
public ExitStatus afterStep(StepExecution sStepExecution) {
return ExitStatus.COMPLETED;
}
@Override
public void open(ExecutionContext sExecutionContext) throws ItemStreamException {
String aFullPath = null;
try {
baseURI = new URI(getAlfrescoService().getWebappAddress()).resolve(WEBDAV_PATH);
aFullPath = getWebDavDirectoryURI(baseURI.getPath() + path).getPath();
} catch (URISyntaxException e) {
throw new ItemStreamException(e);
}
currentDirPath = sExecutionContext.getString(CONTEXT_KEY_CURRENTPATH, aFullPath);
Object aCurrentIndexes = sExecutionContext.get(CONTEXT_KEY_CURRENTINDEXES);
if (aCurrentIndexes == null) {
currentIndexes = new ArrayDeque<Integer>();
currentIndexes.addFirst(0);
} else {
Integer[] aArray = (Integer[]) aCurrentIndexes;
currentIndexes = new ArrayDeque<Integer>(Arrays.asList(aArray));
}
sardine = getAlfrescoService().startWebDavSession();
}
@Override
public void update(ExecutionContext sExecutionContext) throws ItemStreamException {
sExecutionContext.putString(CONTEXT_KEY_CURRENTPATH, currentDirPath);
sExecutionContext.put(CONTEXT_KEY_CURRENTINDEXES, currentIndexes.toArray(new Integer[] {}));
}
@Override
public void close() throws ItemStreamException {
// Nothing to do
}
protected URI getWebDavDirectoryURI(String sAbsolutePath) throws URISyntaxException {
return new URI(baseURI.getScheme(), baseURI.getUserInfo(), baseURI.getHost(), baseURI.getPort(), sAbsolutePath, null, null);
}
@Override
public Node read() throws Exception, UnexpectedInputException, ParseException, NonTransientResourceException {
Node aResult = null;
init();
RepositoryServiceSoapBindingStub repositoryService = getAlfrescoService().getRepositoryService();
List<DavResource> aResources = davResourcesResolver.getDirectoryList(sardine, getWebDavDirectoryURI(currentDirPath).toASCIIString());
boolean isFolderItself;
do {
isFolderItself = false;
boolean hasMore = !currentIndexes.isEmpty();
int aLength = aResources.size();
int aCurrentIndex = currentIndexes.removeFirst();
if ((aLength == 0) || (aCurrentIndex >= aLength)) {
// Go 1 step upper
hasMore = !currentIndexes.isEmpty();
if (hasMore) {
aCurrentIndex = currentIndexes.removeFirst() + 1;
Matcher aPathMatcher = PATH_EXTRACT_PATTERN.matcher(currentDirPath);
if (aPathMatcher.matches()) {
currentDirPath = (aPathMatcher.group(1) != null) ? aPathMatcher.group(1) : aPathMatcher.group(2);
}
aResources = davResourcesResolver.getDirectoryList(sardine, getWebDavDirectoryURI(currentDirPath).toASCIIString());
isFolderItself = true; // FIXME : change the name of this
// variable
}
} else {
DavResource aResource = aResources.get(aCurrentIndex);
if (aResource.isDirectory()) {
if (!currentDirPath.equals(aResource.getPath())) {
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Row " + aCurrentIndex + ": " + aResource.getPath());
}
String aPath = aResource.getPath().substring(baseURI.getPath().length());
aResult = buildNode(repositoryService, aResource, aPath);
// Go 1 step deeper
currentDirPath = aResource.getPath();
currentIndexes.addFirst(aCurrentIndex);
aCurrentIndex = 0;
aResources = davResourcesResolver.getDirectoryList(sardine, getWebDavDirectoryURI(currentDirPath).toASCIIString());
} else {
// Skip current dir (webdav node lists itself)
aCurrentIndex++;
isFolderItself = true;
}
} else {
// Handle content node
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Row " + aCurrentIndex + ": " + aResource.getPath());
}
String aPath = aResource.getPath().substring(baseURI.getPath().length());
aResult = buildNode(repositoryService, aResource, aPath);
aCurrentIndex++;
}
}
if (hasMore) {
currentIndexes.addFirst(aCurrentIndex);
}
} while (isFolderItself);
return aResult;
}
protected Node buildNode(RepositoryServiceSoapBindingStub repositoryService, DavResource aResource, String aPath) throws NodePathException {
Node aResult;
org.alfresco.webservice.types.Node[] aNodes = getRepositoryMatchingNodes(repositoryService, aPath);
if (aResource.isDirectory()) {
Folder aFolder = new Folder();
aResult = aFolder;
} else {
Document aDocument = new Document();
aResult = aDocument;
}
if ((aNodes != null) && (aNodes.length > 0)) {
aResult.setType(new QName(aNodes[0].getType(), namespaceContext));
for (NamedValue aProperty : aNodes[0].getProperties()) {
Property aNodeProperty = new Property();
aNodeProperty.setName(new QName(aProperty.getName(), namespaceContext));
aNodeProperty.addValue(aProperty.getValue());
aResult.addProperty(aNodeProperty);
if ((PROP_CM_CONTENT.equals(aNodeProperty.getName().getShortName())) && (!aResource.isDirectory())) {
Document aDocument = (Document) aResult;
Map<String, String> aValues = readCMContent(aProperty.getValue());
aDocument.setMimeType(aValues.get(SUBPROP_MIMETYPE));
try {
aDocument.setContentPath(getWebDavDirectoryURI(aResource.getPath()).toASCIIString());
} catch (URISyntaxException e) {
throw new NodePathException(e);
}
aDocument.setEncoding(aValues.get(SUBPROP_ENCODING));
}
}
for (String aAspect : aNodes[0].getAspects()) {
Aspect aNodeAspect = new Aspect();
aNodeAspect.setName(new QName(aAspect, namespaceContext));
aResult.addAspect(aNodeAspect);
}
}
return aResult;
}
// 1. Query Nodes via WebDav => no limit on result, but file-like result,
// without any property (we may use a query if the size of a dir is < 1000)
// 2. ProcessIndicator will use the NodeRef => Processor can mark node
// (really necessary ??)
// 3. Store file index in the job => allow restart and continue (see
// sbia/ch08/FilesInDirectoryItemReader)
// 4. NodeProcessor will aggregate nodes into NodeList (Hibernate / Alf /
// Drive) or tranform nodes into a FieldSet (CSV)
protected Map<String, String> readCMContent(String sValue) {
Map<String, String> aResult = new HashMap<String, String>(5);
Matcher aMatcher = CMCONTENT_PATTERN.matcher(sValue);
while (aMatcher.find()) {
aResult.put(aMatcher.group(1), aMatcher.group(2));
}
return aResult;
}
}