/* $Id: RepositoryDocument.java 988245 2010-08-23 18:39:35Z kwright $ */
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.agents.interfaces;
import org.apache.manifoldcf.core.interfaces.*;
import org.apache.manifoldcf.core.common.DateParser;
import java.util.*;
import java.io.*;
/** This class contains the complete information for a document, as read
* from a repository. The generator of this document is one of the
* repository connectors; the user of the class is the incremental ingester.
*
* Data contained within is described in part by a binary stream (which is expected to be processed),
* and partly by already-extracted textual data. These
* streams MUST BE CLOSED BY THE CALLER when the repository document instance has been ingested.
* The streams also WILL NOT ever be reset; they are read to the end once only.
*/
public class RepositoryDocument
{
public static final String _rcsid = "@(#)$Id: RepositoryDocument.java 988245 2010-08-23 18:39:35Z kwright $";
// Security types
public final static String SECURITY_TYPE_DOCUMENT = "document";
public final static String SECURITY_TYPE_SHARE = "share";
public final static String SECURITY_TYPE_PARENT = "parent";
// Enumerated security type; add an integer to the end (deprecated)
public final static String SECURITY_TYPE_DIRECTORY_LEVEL = "directory_";
// Member variables.
protected InputStream binaryFieldData = null;
protected long binaryLength = 0;
protected final Set<String> fieldSet = new HashSet<String>(); // MUST be independent of fields map because we iterate over this and may change fields
protected final Map<String,Object> fields = new HashMap<String,Object>();
protected final Map<String,String[]> stringFields = new HashMap<String,String[]>();
protected final Map<String,Reader[]> readerFields = new HashMap<String,Reader[]>();
protected final Map<String,Date[]> dateFields = new HashMap<String,Date[]>();
protected final Map<String,Security> securityLevels = new HashMap<String,Security>();
protected String fileName = "docname";
protected String contentMimeType = "application/octet-stream";
protected Date createdDate = null;
protected Date modifiedDate = null;
protected Date indexingDate = null;
protected Long originalSize = null;
/** Constructor.
*/
public RepositoryDocument()
{
}
/** Create an exact duplicate of this Repository Document. This is how you are expected to write
* transformation connectors: you create a duplicate, and override the fields you want to change.
* For streams etc, only the overridden fields need to be explicitly managed by the transformation
* connector, since the original fields will be handled by the connector's caller.
*@return the exact duplicate.
*/
public RepositoryDocument duplicate()
{
RepositoryDocument rval = new RepositoryDocument();
rval.binaryFieldData = binaryFieldData;
rval.binaryLength = binaryLength;
rval.fileName = fileName;
rval.contentMimeType = contentMimeType;
rval.createdDate = createdDate;
rval.modifiedDate = modifiedDate;
rval.indexingDate = indexingDate;
rval.originalSize = originalSize;
for (String key : fieldSet)
{
rval.fieldSet.add(key);
}
for (String key : fields.keySet())
{
rval.fields.put(key,fields.get(key));
}
for (String key : stringFields.keySet())
{
rval.stringFields.put(key,stringFields.get(key));
}
for (String key : readerFields.keySet())
{
rval.readerFields.put(key,readerFields.get(key));
}
for (String key : dateFields.keySet())
{
rval.dateFields.put(key,dateFields.get(key));
}
for (String key : securityLevels.keySet())
{
rval.securityLevels.put(key,securityLevels.get(key));
}
return rval;
}
/** Clear all fields.
*/
public void clearFields()
{
fieldSet.clear();
fields.clear();
stringFields.clear();
dateFields.clear();
readerFields.clear();
}
/** Set the document's original (repository) size. Use null to indicate that the size is
* unknown.
*@param size is the size.
*/
public void setOriginalSize(Long size)
{
originalSize = size;
}
/** Get the document's original size.
*@return the original repository document size, or null if unknown.
*/
public Long getOriginalSize()
{
return originalSize;
}
/** Set the document's created date. Use null to indicate that the date is unknown.
*@param date is the date.
*/
public void setCreatedDate(Date date)
{
createdDate = date;
}
/** Get the document's created date. Returns null of the date is unknown.
*@return the date.
*/
public Date getCreatedDate()
{
return createdDate;
}
/** Set the document's last-modified date. Use null to indicate that the date is unknown.
*@param date is the date.
*/
public void setModifiedDate(Date date)
{
modifiedDate = date;
}
/** Get the document's modified date. Returns null of the date is unknown.
*@return the date.
*/
public Date getModifiedDate()
{
return modifiedDate;
}
/** Set the document's indexing date. Use null to indicate that the date is unknown.
*@param date is the date.
*/
public void setIndexingDate(Date date)
{
indexingDate = date;
}
/** Get the document's indexing date. Returns null of the date is unknown.
*@return the date.
*/
public Date getIndexingDate()
{
return indexingDate;
}
/** Set the document's mime type.
*@param mimeType is the mime type.
*/
public void setMimeType(String mimeType)
{
contentMimeType = mimeType;
}
/** Get the document's mime type.
*@return the mime type.
*/
public String getMimeType()
{
return contentMimeType;
}
/** Locate or create a specified security level.
*@param securityType is the security type.
*/
protected Security getSecurityLevel(String securityType)
{
Security s = securityLevels.get(securityType);
if (s == null)
{
s = new Security();
securityLevels.put(securityType, s);
}
return s;
}
/** Enumerate the active security types for this document.
*@return an iterator over the security types.
*/
public Iterator<String> securityTypesIterator()
{
return securityLevels.keySet().iterator();
}
/** Set security values for a given security type.
*@param securityType is the security type.
*@param acl is the acl.
*@param denyAcl is the deny acl.
*/
public void setSecurity(String securityType, String[] acl, String[] denyAcl)
{
if (acl != null && denyAcl != null)
{
Security s = getSecurityLevel(securityType);
s.setACL(acl);
s.setDenyACL(denyAcl);
}
}
/** Set security acl for a given security type.
*@param securityType is the security type.
*@param acl is the acl;
*/
public void setSecurityACL(String securityType, String[] acl)
{
if (acl != null)
{
Security s = getSecurityLevel(securityType);
s.setACL(acl);
}
}
/** Set security deny acl for a given security type.
*@param securityType is the security type.
*@param denyAcl is the deny acl.
*/
public void setSecurityDenyACL(String securityType, String[] denyAcl)
{
if (denyAcl != null)
{
Security s = getSecurityLevel(securityType);
s.setDenyACL(denyAcl);
}
}
/** Get security acl for a given security type.
*@param securityType is the security type.
*@return the acl, which may be null.
*/
public String[] getSecurityACL(String securityType)
{
Security s = securityLevels.get(securityType);
if (s != null)
return s.getACL();
return null;
}
/** Get security deny acl for a given security type.
*@param securityType is the security type.
*@return the acl, which may be null.
*/
public String[] getSecurityDenyACL(String securityType)
{
Security s = securityLevels.get(securityType);
if (s != null)
return s.getDenyACL();
return null;
}
/** Set the binary field.
* Data is described by a binary stream (which is expected to be processed),
* This stream MUST BE CLOSED BY THE CALLER when the repository document instance has been ingested.
* The stream also WILL NOT ever be reset; it is read to the end once only.
*@param binaryFieldData is the input stream containing binary data.
*@param binaryLength is the length of the stream, in bytes. This is a REQUIRED parameter.
*/
public void setBinary(InputStream binaryFieldData, long binaryLength)
{
this.binaryFieldData = binaryFieldData;
this.binaryLength = binaryLength;
}
/** Get the binary fields (if any).
*@return the binary stream.
*/
public InputStream getBinaryStream()
{
return binaryFieldData;
}
/** Set the file name.
*@param fileName is the file name.
*/
public void setFileName(String fileName)
{
this.fileName = fileName;
}
/** Get the file Name.
*@return the string of file name.
*/
public String getFileName()
{
return fileName;
}
/** Get the binary length.
*@return the length in bytes.
*/
public long getBinaryLength()
{
return binaryLength;
}
/** Remove a field.
*@param fieldName is the field name.
*/
public void removeField(String fieldName)
{
fieldSet.remove(fieldName);
fields.remove(fieldName);
stringFields.remove(fieldName);
readerFields.remove(fieldName);
dateFields.remove(fieldName);
}
/** Add/remove a multivalue date field.
*@param fieldName is the field name.
*@param fieldData is the multi-valued data (an array of Dates). Null means
* to remove the entry.
*/
public void addField(String fieldName, Date[] fieldData)
throws ManifoldCFException
{
if (fieldData == null)
{
fieldSet.remove(fieldName);
fields.remove(fieldName);
stringFields.remove(fieldName);
readerFields.remove(fieldName);
dateFields.remove(fieldName);
}
else
{
fieldSet.add(fieldName);
fields.put(fieldName,fieldData);
stringFields.remove(fieldName);
readerFields.remove(fieldName);
dateFields.put(fieldName,fieldData);
}
}
/** Add/remove a date field.
*@param fieldName is the field name.
*@param fieldData is the single-valued data (a Date). Null means "no value".
*/
public void addField(String fieldName, Date fieldData)
throws ManifoldCFException
{
if (fieldData == null)
addField(fieldName, (Date[])null);
else
addField(fieldName,new Date[]{fieldData});
}
/** Add/remove a multivalue character field.
* Data is described here by an array of Readers (which are expected to be processed),
* These Readers MUST BE CLOSED BY THE CALLER when the repository document instance has been ingested.
* The Readers also WILL NOT ever be reset; they are read to the end once only.
*@param fieldName is the field name.
*@param fieldData is the multi-valued data (as an array of Readers). Null means
* to remove the entry from the document.
*/
public void addField(String fieldName, Reader[] fieldData)
throws ManifoldCFException
{
if (fieldData == null)
{
fieldSet.remove(fieldName);
fields.remove(fieldName);
stringFields.remove(fieldName);
readerFields.remove(fieldName);
dateFields.remove(fieldName);
}
else
{
fieldSet.add(fieldName);
fields.put(fieldName,fieldData);
stringFields.remove(fieldName);
readerFields.put(fieldName,fieldData);
dateFields.remove(fieldName);
}
}
/** Add/remove a character field.
* Data is described here by a Reader (which is expected to be processed),
* This Reader MUST BE CLOSED BY THE CALLER when the repository document instance has been ingested.
* The Reader also WILL NOT ever be reset; it is read to the end once only.
*@param fieldName is the field name.
*@param fieldData is the single-valued data (as a Reader). Null means "no value".
*/
public void addField(String fieldName, Reader fieldData)
throws ManifoldCFException
{
if (fieldData == null)
addField(fieldName, (Reader[])null);
else
addField(fieldName,new Reader[]{fieldData});
}
/** Add/Remove a multivalue character field.
*@param fieldName is the field name.
*@param fieldData is the multi-valued data (as a an array of Strings). Null means
* to remove the entry from the document.
*/
public void addField(String fieldName, String[] fieldData)
throws ManifoldCFException
{
if (fieldData == null)
{
fieldSet.remove(fieldName);
fields.remove(fieldName);
stringFields.remove(fieldName);
readerFields.remove(fieldName);
dateFields.remove(fieldName);
}
else
{
fieldSet.add(fieldName);
fields.put(fieldName,fieldData);
readerFields.remove(fieldName);
stringFields.put(fieldName,fieldData);
dateFields.remove(fieldName);
}
}
/** Add a character field.
*@param fieldName is the field name.
*@param fieldData is the single-valued data (as a String). Null means "no value".
*/
public void addField(String fieldName, String fieldData)
throws ManifoldCFException
{
if (fieldData == null)
addField(fieldName,(String[])null);
else
addField(fieldName,new String[]{fieldData});
}
/** Get a field.
*@param fieldName is the field name.
*@return the field data (either a Reader array or a String array).
*/
public Object[] getField(String fieldName)
{
return (Object[])fields.get(fieldName);
}
/** Get a field as an array of strings. If the data was originally in the form
* of Readers, a one-time conversion is made to the String form, so that the same
* field can be fetched multiple times. If the data was originally in the form
* of Dates, then the dates are converted to standard ISO8601 format.
*@param fieldName is the field name.
*@return the field data.
*/
public String[] getFieldAsStrings(String fieldName)
throws IOException
{
String[] stringFieldData = stringFields.get(fieldName);
if (stringFieldData != null)
return stringFieldData;
Date[] dateFieldData = dateFields.get(fieldName);
if (dateFieldData != null)
{
String[] newValues = new String[dateFieldData.length];
for (int i = 0; i < dateFieldData.length; i++)
{
newValues[i] = DateParser.formatISO8601Date(dateFieldData[i]);
}
return newValues;
}
Reader[] oldValues = readerFields.get(fieldName);
if (oldValues != null)
{
String[] newValues = new String[oldValues.length];
char[] buffer = new char[65536];
for (int i = 0; i < newValues.length; i++)
{
Reader oldValue = oldValues[i];
StringBuilder newValue = new StringBuilder();
while (true)
{
int amt = oldValue.read(buffer);
if (amt == -1)
break;
newValue.append(buffer,0,amt);
}
newValues[i] = newValue.toString();
}
stringFields.put(fieldName,newValues);
// Reader is no longer useful, since we've read it to the end.
// Remove it from the record accordingly.
// NOTE WELL: This could cause side effects if the same
// field is accessed simultaneously two different ways!
readerFields.remove(fieldName);
fields.put(fieldName,newValues);
return newValues;
}
else
return null;
}
/** Get a field as an array of Readers. If the field was originally
* strings, a one-time creation of a Readers array is made.
*@param fieldName is the field name.
*@return the field data.
*/
public Reader[] getFieldAsReaders(String fieldName)
{
Reader[] readerFieldData = readerFields.get(fieldName);
if (readerFieldData != null)
return readerFieldData;
Date[] dateFieldData = dateFields.get(fieldName);
if (dateFieldData != null)
{
Reader[] newValues = new Reader[dateFieldData.length];
for (int i = 0; i < newValues.length; i++)
{
newValues[i] = new StringReader(DateParser.formatISO8601Date(dateFieldData[i]));
}
readerFields.put(fieldName,newValues);
return newValues;
}
String[] oldValues = stringFields.get(fieldName);
if (oldValues != null)
{
Reader[] newValues = new Reader[oldValues.length];
for (int i = 0; i < newValues.length; i++)
{
newValues[i] = new StringReader(oldValues[i]);
}
readerFields.put(fieldName,newValues);
return newValues;
}
else
return null;
}
/** Get field as an array of Date objects.
* If the field was originally not a Date field, null is returned.
*@param fieldName is the field name.
*@return the field data.
*/
public Date[] getFieldAsDates(String fieldName)
{
return dateFields.get(fieldName);
}
/** Get the number of fields.
*/
public int fieldCount()
{
return fieldSet.size();
}
/** Iterate through the field name Strings.
*/
public Iterator<String> getFields()
{
return fieldSet.iterator();
}
/** This class describes allow and deny tokens for a specific security class. */
protected static class Security
{
/** Allow tokens */
protected String[] tokens = null;
/** Deny tokens */
protected String[] denyTokens = null;
/** Constructor. */
public Security()
{
}
/** Set allow tokens. */
public void setACL(String[] tokens)
{
this.tokens = tokens;
}
/** Get allow tokens */
public String[] getACL()
{
return tokens;
}
/** Set deny tokens */
public void setDenyACL(String[] tokens)
{
denyTokens = tokens;
}
/** Get deny tokens */
public String[] getDenyACL()
{
return denyTokens;
}
}
}