/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package com.bigdata.bop.rdf.aggregate;
import java.util.Map;
import com.bigdata.bop.BOp;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IValueExpression;
import com.bigdata.bop.NV;
import com.bigdata.bop.aggregate.AggregateBase;
import com.bigdata.bop.solutions.PipelinedAggregationOp;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.constraints.INeedsMaterialization;
import com.bigdata.rdf.internal.constraints.IVValueExpression;
import com.bigdata.rdf.model.BigdataValueFactory;
import com.bigdata.rdf.model.BigdataValueFactoryImpl;
import com.bigdata.rdf.sparql.ast.DummyConstantNode;
/**
* Operator combines the string values over the presented binding sets for the
* given variable. Missing values are ignored. The initial value is an empty
* plain literal.
*
* @author thompsonbry
*/
public class GROUP_CONCAT extends AggregateBase<IV> implements INeedsMaterialization {
/**
*
*/
private static final long serialVersionUID = 1L;
public interface Annotations extends AggregateBase.Annotations {
/**
* The namespace of the lexicon relation.
*/
public String NAMESPACE = GROUP_CONCAT.class.getName() + ".namespace";
/**
* Required string property provides the separator used when combining
* the {@link IValueExpression} computed for each solution within the
* group.
*
* Use basic string to match sparql scalarValues param
*/
String SEPARATOR = "separator";
/**
* The maximum #of values to concatenate (positive integer and
* <code>-1</code> to indicate no bound) (default
* {@value #DEFAULT_VALUE_LIMIT})
*/
String VALUE_LIMIT = GROUP_CONCAT.class.getName() + ".valueLimit";
/**
* The default indicates no limit.
*/
final int DEFAULT_VALUE_LIMIT = -1;
/**
* The maximum #of characters permitted in the generated value (positive
* integer and <code>-1</code> to indicate no bound) (default
* {@value #DEFAULT_CHARACTER_LIMIT}).
*/
String CHARACTER_LIMIT = GROUP_CONCAT.class.getName()
+ ".characterLimit";
/**
* The default indicates no limit.
*/
final int DEFAULT_CHARACTER_LIMIT = -1;
}
public GROUP_CONCAT(GROUP_CONCAT op) {
super(op);
}
public GROUP_CONCAT(BOp[] args, Map<String, Object> annotations) {
super(args, annotations);
if (!(getRequiredProperty(Annotations.NAMESPACE) instanceof String))
throw new IllegalArgumentException();
// if (getProperty(Annotations.NAMESPACE) == null)
// throw new IllegalArgumentException();
}
/**
*
* @param var
* The variable whose values will be combined.
* @param sep
* The separator string (note that a space (0x20) is the default
* in the SPARQL recommendation).
*/
public GROUP_CONCAT(final boolean distinct,
final IValueExpression<IV> expr, final String namespace,
final String sep) {
this(new BOp[] { expr }, NV.asMap(//
// new NV(Annotations.FUNCTION_CODE, FunctionCode.GROUP_CONCAT),//
new NV(Annotations.DISTINCT, distinct),//
new NV(Annotations.NAMESPACE, namespace),//
new NV(Annotations.SEPARATOR, sep)//
));
}
private String sep() {
if (sep == null) {
sep = (String) getRequiredProperty(Annotations.SEPARATOR);
}
return sep;
}
private transient String sep;
private int valueLimit() {
if (valueLimit == 0) {
valueLimit = getProperty(Annotations.VALUE_LIMIT,
Annotations.DEFAULT_VALUE_LIMIT);
}
return valueLimit;
}
private transient int valueLimit;
private int characterLimit() {
if (characterLimit == 0) {
characterLimit = getProperty(Annotations.CHARACTER_LIMIT,
Annotations.DEFAULT_CHARACTER_LIMIT);
}
return characterLimit;
}
private transient int characterLimit;
private BigdataValueFactory getValueFactory(){
if (vf == null) {
final String namespace = (String) getRequiredProperty(Annotations.NAMESPACE);
vf = BigdataValueFactoryImpl.getInstance(namespace);
}
return vf;
}
protected transient BigdataValueFactory vf;
/**
* The running concatenation of observed bound values.
* <p>
* Note: This field is guarded by the monitor on the {@link GROUP_CONCAT}
* instance.
*/
private transient StringBuilder aggregated = null;
/**
* The #of values in {@link #aggregated}.
* <p>
* Note: This field is guarded by the monitor on the {@link GROUP_CONCAT}
* instance.
*/
private transient long nvalues = 0;
/**
* <code>false</code> unless either the value limit and/or the character
* length limit has been exceeded.
*/
private transient boolean done = false;
private Throwable firstCause = null;
synchronized public void reset() {
aggregated = null;
nvalues = 0;
done = false;
firstCause = null;
cache();
}
/**
* Cache stuff.
* <p>
* Note: The {@link PipelinedAggregationOp} does NOT invoke {@link #reset()}
* so we have to conditionally cache stuff from {@link #get(IBindingSet)}
* and {@link #done()} as well.
*/
private void cache() {
sep();
valueLimit();
characterLimit();
getValueFactory();
}
synchronized public IV done() {
if(sep == null)
cache();
if (firstCause != null) {
throw new RuntimeException(firstCause);
}
final BigdataValueFactory vf = getValueFactory();
IV ret;
if (aggregated == null) {
ret = DummyConstantNode.toDummyIV(vf.createLiteral(""));
} else {
ret = DummyConstantNode.toDummyIV(vf.createLiteral(aggregated
.toString()));
}
// System.err.println("aggregated:=" + aggregated+" : "+ret);
return ret;
}
synchronized public IV get(final IBindingSet bindingSet) {
try {
return doGet(bindingSet);
} catch (Throwable t) {
if (firstCause == null) {
firstCause = t;
}
throw new RuntimeException(t);
}
}
private IV doGet(final IBindingSet bindingSet) {
if(sep == null)
cache();
final IValueExpression<IV<?, ?>> expr = (IValueExpression<IV<?, ?>>) get(0);
final IV<?, ?> iv = expr.get(bindingSet);
if (iv != null && !done) {
final String str = IVValueExpression.asValue(iv).stringValue();
/*
* This kind of logic needs to be hidden. It causes too many
* problems. Right now IVValueExpression.asValue(IV) knows how
* to handle this correctly.
*/
// if (iv.isInline() && !iv.isExtension()) {
// str = iv.getInlineValue().toString();
// } else {
// str = iv.getValue().stringValue();
// }
if (aggregated == null)
aggregated = new StringBuilder(str);
else {
aggregated.append(sep());
aggregated.append(str);
// System.err.println("aggregated:=" + aggregated);
}
nvalues++;
if (characterLimit() != -1 && aggregated.length() >= characterLimit) {
// Exceeded the character length limit.
aggregated.setLength(characterLimit); // truncate.
done = true;
} else if (valueLimit() != -1 && nvalues >= valueLimit) {
// Exceeded the value limit.
done = true;
}
}
// Note: Nothing returned until done().
return null;
}
/**
* We always need to have the materialized values.
*/
public Requirement getRequirement() {
return INeedsMaterialization.Requirement.ALWAYS;
}
}