/*
* ModeShape (http://www.modeshape.org)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.modeshape.sequencer.text;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import javax.jcr.Binary;
import javax.jcr.NamespaceRegistry;
import javax.jcr.Node;
import javax.jcr.Property;
import javax.jcr.RepositoryException;
import org.modeshape.common.annotation.ThreadSafe;
import org.modeshape.common.util.CheckArg;
import org.modeshape.jcr.api.nodetype.NodeTypeManager;
import org.modeshape.jcr.api.sequencer.Sequencer;
/**
* The base class for the text sequencers. This class treats the text to be sequenced as a series of rows, with each row delimited
* by a line terminator. Concrete subclasses provide their own mechanisms for splitting a row of data into a series of columns.
* <p>
* This class provides some fundamental capabilities, including the ability to set a {@link #setCommentMarker(String) comment
* marker}, {@link #setMaximumLinesToRead(int) limit the number of lines} to be read from a file, and
* {@link #setRowFactoryClassName(String) provide custom transformations} from the sets of columns to the graph structure.
* </p>
*/
@ThreadSafe
public abstract class AbstractTextSequencer extends Sequencer {
private String rowFactoryClassName = null;
private String commentMarker = null;
private int maximumLinesToRead = -1;
@Override
public void initialize( NamespaceRegistry registry, NodeTypeManager nodeTypeManager ) throws RepositoryException, IOException {
registerNodeTypes("sequencer-text.cnd", nodeTypeManager, true);
}
@Override
public boolean execute( Property inputProperty, Node outputNode, Context context ) throws Exception {
Binary binaryValue = inputProperty.getBinary();
CheckArg.isNotNull(binaryValue, "binary");
int rowCount = 0;
RowFactory rowFactory = createRowFactory();
String line = null;
BufferedReader reader = null;
try {
reader = new BufferedReader(new InputStreamReader(binaryValue.getStream()));
while ((line = reader.readLine()) != null) {
if (isComment(line)) {
continue;
}
if (shouldReadLine(++rowCount)) {
String[] columns = parseLine(line);
rowFactory.recordRow(outputNode, columns);
}
}
} finally {
try {
if (reader != null) {
reader.close();
}
} catch (Exception e) {
getLogger().warn(e, "Cannot close reader ");
}
}
return true;
}
private boolean isComment(String line ) {
return this.commentMarker != null && line.startsWith(this.commentMarker);
}
private boolean shouldReadLine(int rowCount) {
return maximumLinesToRead < 0 || rowCount <= maximumLinesToRead;
}
/**
* Sets the comment marker to use. Any line that begins with the comment marker will be ignored and will not be counted as a
* read line for the purposes of the {@link #getMaximumLinesToRead() maximum line limitation}.
*
* @param commentMarker the string that indicates that the line is a comment and should be ignored; null indicates that there
* is no comment marker
*/
public void setCommentMarker( String commentMarker ) {
this.commentMarker = commentMarker;
}
/**
* @return the current comment marker; may be null
*/
public String getCommentMarker() {
return commentMarker;
}
/**
* @return the maximum number of lines to read when sequencing; non-positive numbers indicate that all lines should be read
* and sequenced
*/
public int getMaximumLinesToRead() {
return maximumLinesToRead;
}
/**
* Sets the maximum number of lines to read. When this number is reached during the sequencing of any particular stream, the
* stream will be closed and remaining lines (if any) will be ignored. {@link #setCommentMarker(String) Comment lines} do not
* count towards the number of lines read.
*
* @param maximumLinesToRead the maximum number of lines to read; a non-positive number indicates that all lines should be
* read and sequenced.
*/
public void setMaximumLinesToRead( int maximumLinesToRead ) {
this.maximumLinesToRead = maximumLinesToRead;
}
/**
* @return the current row factory class name; may not be null
*/
public String getRowFactoryClassName() {
return rowFactoryClassName;
}
/**
* Sets the custom row factory class name.
*
* @param rowFactoryClassName the fully-qualified class name of the new custom row factory implementation; null indicates that
* {@link DefaultRowFactory the default row factory} should be used.
*/
public void setRowFactoryClassName( String rowFactoryClassName ) {
this.rowFactoryClassName = rowFactoryClassName;
}
/**
* Parse the given row into its constituent columns.
*
* @param row the row to be parsed
* @return an array of columns; never null
*/
protected abstract String[] parseLine( String row );
/**
* Creates an instance of the {@link #getRowFactoryClassName() row factory} configured for this sequencer.
*
* @return an implementation of the named class; never null
* @throws ClassNotFoundException if the the named row factory class cannot be located
* @throws IllegalAccessException if the row factory class or its null constructor is not accessible.
* @throws InstantiationException if the row factory represents an abstract class, an interface, an array class, a primitive
* type, or void; or if the class has no null constructor; or if the instantiation fails for some other reason.
*/
private RowFactory createRowFactory()
throws ClassNotFoundException, IllegalAccessException, InstantiationException {
if (this.rowFactoryClassName == null) {
return new DefaultRowFactory();
}
Class<?> rowFactoryClass = Class.forName(this.rowFactoryClassName);
return (RowFactory)rowFactoryClass.newInstance();
}
}