package eu.fbk.knowledgestore.data; import java.io.Serializable; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.UUID; import javax.annotation.Nullable; import com.google.common.base.Charsets; import com.google.common.base.Function; import com.google.common.base.Objects; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Ordering; import com.google.common.collect.Sets; import com.google.common.hash.Hasher; import com.google.common.hash.Hashing; import org.openrdf.model.BNode; import org.openrdf.model.Literal; import org.openrdf.model.Resource; import org.openrdf.model.Statement; import org.openrdf.model.URI; import org.openrdf.model.Value; import org.openrdf.model.vocabulary.RDF; import org.openrdf.rio.RDFHandlerException; import eu.fbk.knowledgestore.vocabulary.KS; /** * A record structure characterized by an ID and a generic set of properties. * <p> * A record is a structured identified by an {@link URI} ID and having a number of key-value * properties, where the key is a {@code URI} and the value is a non-empty list of objects, which * can be other {@code Record}s, {@link URI}s, {@link BNode}s, {@link Literal}s or * {@link Statement}s. Records are used to carry the data of resources, representations, mentions, * entities, axioms, contexts and of any structured property value. * </p> * <p> * Records are created via factory methods {@code create()}: * </p> * <ul> * <li>method {@link #create()} creates an empty record, without ID and properties;</li> * <li>method {@link #create(URI, URI...)} creates a new record with the ID and the types (values * of property {@code rdf:type} supplied;</li> * <li>method {@link #create(Record, boolean)} creates a copy of a supplied method, possibly * performing deep-cloning of its properties.</li> * </ul> * <p> * Record equality ({@link #equals(Object)}, {@link #hashCode()}) is defined in terms of the * record ID only, while {@link #toString()} emits only the record type and ID. Beware that the ID * can change during a {@code Record} lifetime (via method {@link #setID(URI)}): this provides for * increased flexibility, but pay attention not to change the ID when storing records in indexed * data structure such as {@code Set}s and {@code Map}s, which rely on {@code hashCode()} and * {@code equals()} to produce constant outcomes. Additional method {@link #toString(boolean)} * allows for emitting a complete record representation including its properties, while equality * of (selected) properties in different records can be checked by comparing the respective * hashes, computed via {@link #hash(URI...)}; the same {@code hash()} method can help in creating * syntetic IDs based on the values of some properties (e.g., following a pattern * {@code PREFIX + record.hash(p1, p2, ...)}. * </p> * <p> * Access to and manipulation of properties is performed as follows: * <ul> * <li><b>Listing available properties</b>. Method {@link #getProperties()} returns the list of * properties having some value for a record instance.</li> * <li><b>Reading properties</b>. The main method is {@link #get(URI)}, which is complemented by a * number of auxiliary methods for ease of use. They are described below: * <ul> * <li>{@link #get(URI)}, {@link #get(URI, Class)} and {@link #get(URI, Class, List)} allow * retrieving all the values of a specific property, either as a list of objects or converted to a * specific target class; the last methed supports also the specification of a default value, * which is returned if the property has no values or if conversion fails.</li> * <li>{@link #getUnique(URI)}, {@link #getUnique(URI, Class)}, * {@link #getUnique(URI, Class, Object)} allow retrieving the unique value of a property, either * as an object or converted to a specific class; unless a default value is specified, the methods * fail in case multiple values are associated to the property, thus helping enforcing the * uniqueness expectation.</li> * <li>{@link #isTrue(URI)}, {@link #isFalse(URI)} are convenience methods that can be used for * boolean properties; they fail if used on properties having multiple or non-boolean values.</li> * <li>{@link #isNull(URI)}, {@link #isUnique(URI)} are convenience methods that can be used to * test whether a property has at least or at most one value.</li> * <li>{@link #count(URI)} is a convenience method for counting the values of a property; it may * be faster than using {@code get()}.</li> * </ul> * </li> * <li><b>Modifying properties</b>. Two types of methods are offered: * <ul> * <li>modification of individual properties is done via {@link #set(URI, Object, Object...)}, * {@link #add(URI, Object, Object...)} and {@link #remove(URI, Object, Object...)}, that allow, * respectively, to set all the values of a property, to add some new values to a property or to * remove existing values from the values of a property. For ease of use, these methods accept (at * least) an argument object which can be a {@code Record}, {@code URI}, {@code BNode}, * {@code Statement}, {@code Literal}, an object convertible to {@code Literal} or any array or * iterable of the former types. A list of values is extracted from the supplied objects, and used * for modifying the values of the property.</li> * <li>modification of multiple properties at once is done via {@link #clear(URI...)} and * {@link #retain(URI...)}, which remove all the properties respectively matching or not matching * a supplied list, allowing as a special case (no properties specified) to remove all the * properties of a record instance.</li> * </ul> * </ul> * </p> * <p> * Instances of this interface are thread safe. Cloning of record instances (via * {@link #create(Record, boolean)}) is supported and is a relatively inexpensive operation; a * copy-on-write approach is adopted to reduce the memory usage of cloned objects, which share * their state with the source object as long as one of the two is changed. * </p> */ public final class Record implements Serializable, Comparable<Record> { private static final long serialVersionUID = 1L; private static final int LENGTH_INCREMENT = 8; private static final int OFFSET_OF_ID = 0; private static final int OFFSET_OF_SHARED = 1; private static final int OFFSET_OF_PROPERTIES = 2; private static final ThreadLocal<Integer> INDENT_LEVEL = new ThreadLocal<Integer>(); private static final String INDENT_STRING = " "; private Object[] state; private Record(final URI id) { this.state = new Object[OFFSET_OF_PROPERTIES + LENGTH_INCREMENT]; this.state[OFFSET_OF_ID] = id; this.state[OFFSET_OF_SHARED] = Boolean.FALSE; } private Record(final Record record, final boolean deepClone) { synchronized (record) { Object[] state = record.state; if (deepClone) { state = cloneRecursively(state); } if (state != record.state) { state[OFFSET_OF_SHARED] = Boolean.FALSE; } else if (state[OFFSET_OF_SHARED] == Boolean.FALSE) { state[OFFSET_OF_SHARED] = Boolean.TRUE; } this.state = state; } } private static Object[] cloneRecursively(final Object[] array) { Object[] result = array; for (int i = 0; i < array.length; ++i) { final Object element = array[i]; Object newElement = element; if (element instanceof Record) { newElement = new Record((Record) element, true); } else if (element instanceof Object[]) { newElement = cloneRecursively((Object[]) element); } if (newElement != element) { if (result == array) { result = array.clone(); } result[i] = newElement; } } return result; } private static Object encode(final Object object) { // the node unchanged is stored; this may change in order to save some memory return object; } private static <T> T decode(final Object object, final Class<T> clazz) { return Data.convert(object, clazz); } @Nullable private URI doGetID() { return (URI) this.state[OFFSET_OF_ID]; } private void doSetID(@Nullable final URI id) { if (!Objects.equal(id, this.state[OFFSET_OF_ID])) { if ((Boolean) this.state[OFFSET_OF_SHARED]) { this.state = this.state.clone(); } this.state[OFFSET_OF_ID] = id; } } private List<URI> doGetProperties() { final int capacity = this.state.length / 2; final List<URI> properties = Lists.newArrayListWithCapacity(capacity); for (int i = OFFSET_OF_PROPERTIES; i < this.state.length; i += 2) { final URI property = (URI) this.state[i]; if (property != null) { properties.add(property); } } return properties; } private int doCount(final URI property) { final int length = this.state.length; for (int i = OFFSET_OF_PROPERTIES; i < length; i += 2) { if (property.equals(this.state[i])) { final Object object = this.state[i + 1]; if (object instanceof Object[]) { return ((Object[]) object).length; } else { return 1; } } } return 0; } @Nullable private <T> Object doGet(final URI property, final Class<T> clazz) { final int length = this.state.length; for (int i = OFFSET_OF_PROPERTIES; i < length; i += 2) { if (property.equals(this.state[i])) { final Object object = this.state[i + 1]; if (object instanceof Object[]) { final Object[] array = (Object[]) object; final List<T> list = Lists.newArrayListWithCapacity(array.length); for (final Object element : array) { list.add(decode(element, clazz)); } return list; } else { return decode(object, clazz); } } } return null; } private void doSet(final URI property, final Collection<Object> nodes) { if ((Boolean) this.state[OFFSET_OF_SHARED]) { this.state = this.state.clone(); this.state[OFFSET_OF_SHARED] = Boolean.FALSE; } final int length = this.state.length; if (nodes.isEmpty()) { for (int i = OFFSET_OF_PROPERTIES; i < length; i += 2) { if (property.equals(this.state[i])) { this.state[i] = null; this.state[i + 1] = null; return; } } return; } final Object value; final int size = nodes.size(); if (size == 1) { value = encode(Iterables.get(nodes, 0)); } else { final Object[] array = new Object[size]; int index = 0; for (final Object node : nodes) { array[index++] = encode(node); } value = array; } int nullIndex = -1; for (int i = OFFSET_OF_PROPERTIES; i < length; i += 2) { if (this.state[i] == null) { if (nullIndex < 0) { nullIndex = i; } } else if (property.equals(this.state[i])) { this.state[i + 1] = value; return; } } if (nullIndex >= 0) { this.state[nullIndex] = property; this.state[nullIndex + 1] = value; } else { final Object[] oldState = this.state; this.state = new Object[length + LENGTH_INCREMENT]; System.arraycopy(oldState, 0, this.state, 0, length); this.state[length] = property; this.state[length + 1] = value; } } /** * Creates a new record with no properties and ID assigned. * * @return the created record */ public static Record create() { return new Record(null); } /** * Creates a new record with the ID and the types specified (property {@code rdf:type}), and * no additional properties. * * @param id * the ID of the new record, possibly null in order not to assign it * @param types * the types of the record, assigned to property {@code rdf:type} * @return the created record */ public static Record create(final URI id, final URI... types) { final Record record = new Record(id); if (types.length > 0) { record.set(RDF.TYPE, types); } return record; } /** * Creates a new record having the same ID and properties of the supplied record, possibly * performing a deep-cloning (copy constructor). The difference between shallow- and * deep-cloning lies in the handling of property values of {@code Record} type, which are * shared by the source and cloned object in case of shallow-cloning, and cloned themselves in * case of deep-cloning. * * @param record * the reference record to clone * @param deepClone * true to perform a deep-cloning, false to perform a shallow-cloning * @return the created record clone */ public static Record create(final Record record, final boolean deepClone) { return new Record(record, deepClone); } /** * Returns the ID of this record. * * @return the ID of this record, possibly null if not previously assigned */ @Nullable public synchronized URI getID() { return doGetID(); } /** * Sets the ID of this record. * * @param id * the new ID of this record or null to clear it * @return this record object, for call chaining */ public synchronized Record setID(@Nullable final URI id) { doSetID(id); return this; } /** * Returns the system type for this record, i.e., the {@code rdf:type} URI under the * {@code ks:} namespace, if any. * * @return the system type or null if not set * @throws IllegalArgumentException * in case multiple system types are bound to the record */ @Nullable public synchronized URI getSystemType() throws IllegalArgumentException { URI result = null; for (final URI type : get(RDF.TYPE, URI.class)) { if (type.getNamespace().equals(KS.NAMESPACE)) { Preconditions.checkArgument(result == null, "Multiple system types: " + result + ", " + type); result = type; } } return result; } /** * Returns all the properties currently defined for this record. * * @return an immutable list with the properties currently defined for this record, without * repetitions and in no particular order */ public synchronized List<URI> getProperties() { return doGetProperties(); } /** * Determines whether the property specified is null, i.e., it has no value. * * @param property * the property to read * @return true if the property has no value */ public synchronized boolean isNull(final URI property) { return doCount(property) == 0; } /** * Determines whether the property specified has at most one value. * * @param property * the property to read * @return true if the property has at most value; false if it has multiple values */ public synchronized boolean isUnique(final URI property) { return doCount(property) <= 1; } /** * Determines whether the property specified has been set to true. The method fails if the * property has multiple values or has a non-boolean type; if this behaviour is not desired, * use {@link #getUnique(URI, Class, Object)} specifying {@code Boolean.class} as the class * and an appropriate default value to be returned in case of failure. * * @param property * the property to read * @return true if the property is set to true; false if the property has no value or has been * set to false * @throws IllegalStateException * in case the property has multiple values * @throws IllegalArgumentException * in case the property value is not of boolean type */ public boolean isTrue(final URI property) throws IllegalStateException, IllegalArgumentException { final Boolean value = getUnique(property, Boolean.class); return value != null && value.booleanValue(); } /** * Determines whether the property specified has been set to false. The method fails if the * property has multiple values or has a non-boolean type; if this behaviour is not desired, * use {@link #getUnique(URI, Class, Object)} specifying {@code Boolean.class} as the class * and an appropriate default value to be returned in case of failure. * * @param property * the property to read * @return true if the property is set to false; false if the property has no value or has * been set to true * @throws IllegalStateException * in case the property has multiple values * @throws IllegalArgumentException * in case the property value is not of boolean type */ public boolean isFalse(final URI property) throws IllegalStateException, IllegalArgumentException { final Boolean value = getUnique(property, Boolean.class); return value != null && !value.booleanValue(); } /** * Returns the number of values assigned to the property specified. Calling this method may be * faster that using {@link #get(URI)}. * * @param property * the property * @return the number of values */ public synchronized int count(final URI property) { return doCount(property); } /** * Returns the unique {@code Object} value of a property, or null if it has no value. Note * that this method fails if the property has multiple values; if this is not the desired * behaviour, use {@link #getUnique(URI, Class, Object)} supplying an appropriate type (could * be {@code Object.class}) and default value to be returned in case of failure. * * @param property * the property to read * @return the unique {@code Object} value of the property; null if it has no value * @throws IllegalStateException * in case the property has multiple values */ @Nullable public Object getUnique(final URI property) throws IllegalStateException { return getUnique(property, Object.class); } /** * Returns the unique value of the property converted to an instance of a certain class, or * null if the property has no value. Note that this method fails if the property has multiple * values or its unique value cannot be converted to the requested class; if this is not the * desired behavior, use {@link #getUnique(URI, Class, Object)} supplying an appropriate * default value to be returned in case of failure. * * @param property * the property to read * @param valueClass * the class to convert the value to * @param <T> * the type of result * @return the unique value of the property, converted to the class specified; null if the * property has no value * @throws IllegalStateException * in case the property has multiple values * @throws IllegalArgumentException * in case the unique property value cannot be converted to the class specified */ @SuppressWarnings("unchecked") @Nullable public <T> T getUnique(final URI property, final Class<T> valueClass) throws IllegalStateException, IllegalArgumentException { final Object result; synchronized (this) { result = doGet(property, valueClass); } if (result == null) { return null; } else if (result instanceof List<?>) { final List<T> list = (List<T>) result; final StringBuilder builder = new StringBuilder("Expected one value for property ") .append(property).append(", found ").append(list.size()).append(" values: "); for (int i = 0; i < Math.min(3, list.size()); ++i) { builder.append(i > 0 ? ", " : "").append(list.get(i)); } builder.append(list.size() > 3 ? ", ..." : ""); throw new IllegalStateException(builder.toString()); } else { return (T) result; } } /** * Returns the unique value of the property converted to an instance of a certain class, or * the default value supplied in case of failure. * * @param property * the property to read * @param valueClass * the class to convert the value to * @param defaultValue * the default value to return in case the property has no value * @param <T> * the type of result * @return the unique value of the property converted to the class specified, on success; the * default value supplied in case the property has no value, has multiple values or * its unique value cannot be converted to the class specified */ @Nullable public <T> T getUnique(final URI property, final Class<T> valueClass, @Nullable final T defaultValue) { try { final T value = getUnique(property, valueClass); return value == null ? defaultValue : value; } catch (final IllegalStateException ex) { return defaultValue; } catch (final IllegalArgumentException ex) { return defaultValue; } } /** * Returns the {@code Object} values of the property specified. * * @param property * the property to read * @return an immutable list with the {@code Object} values of the property, without * repetitions, in no particular order and possibly empty */ public List<Object> get(final URI property) { return get(property, Object.class); } /** * Returns the values of the property converted to instances of a certain class. Note that * this method fails if conversion is not possible for one or more of the property values; if * this is not the desired behavior, use {@link #get(URI, Class, List)} specifying an * appropriate default value to be returned in case of conversion failure. * * @param property * the property to read * @param valueClass * the class values have to be converted to * @param <T> * the type of property values * @return an immutable list with the values of the property, converted to the class * specified, possibly empty * @throws IllegalArgumentException * in case one of the property values cannot be converted to the class specified */ @SuppressWarnings("unchecked") public <T> List<T> get(final URI property, final Class<T> valueClass) throws IllegalArgumentException { final Object result; synchronized (this) { result = doGet(property, valueClass); } if (result == null) { return ImmutableList.of(); } else if (result instanceof List<?>) { return (List<T>) result; } else { return ImmutableList.of((T) result); } } /** * Returns the values of the property converted to instances of a certain class, or the * default value supplied in case of failure or if the property has no values. * * @param property * the property to read * @param valueClass * the class values have to be converted to * @param defaultValue * the default value to return in case conversion fails * @param <T> * the type of property values * @return an immutable list with the values of the property, converted to the class specified * and possibly empty, on success; the default value supplied in case the property has * no value or conversion fails for some value */ public <T> List<T> get(final URI property, final Class<T> valueClass, final List<T> defaultValue) { try { final List<T> values = get(property, valueClass); return values.isEmpty() ? defaultValue : values; } catch (final IllegalArgumentException ex) { return defaultValue; } } /** * Sets the values of the property specified. The method accepts one or more objects as the * values; these objects can be {@code Record}s, {@code URI}s, {@code BNode}s, * {@code Statement}s, {@code Literal}s, objects convertible to {@code Literal} or any array * or iterable of the former types. Setting a property to null has the effect of clearing it. * * @param property * the property to set * @param first * the first value, array or iterable of values to set, possibly null * @param other * additional values, arrays or iterables of values to set (if specified, will be * merged with {@code first}). * @return this record object, for call chaining * @throws IllegalArgumentException * if one of the supplied values has an unsupported type */ public Record set(final URI property, @Nullable final Object first, final Object... other) throws IllegalArgumentException { Preconditions.checkNotNull(property); final Set<Object> values = Sets.<Object>newHashSet(); Data.normalize(first, values); Data.normalize(other, values); synchronized (this) { doSet(property, values); } return this; } /** * Adds one or more values to the property specified. The method accepts one or more objects * as the values; these objects can be {@code Record}s, {@code URI}s, {@code BNode}s, * {@code Statement}s, {@code Literal}s, objects convertible to {@code Literal} or any array * or iterable of the former types. * * @param property * the property to modify * @param first * the first value, array or iterable of values to add, possibly null * @param other * additional values, arrays or iterables of values to set (if specified, will be * merged with {@code first}). * @return this record object, for call chaining * @throws IllegalArgumentException * if one of the supplied values has an unsupported type */ public Record add(final URI property, @Nullable final Object first, final Object... other) throws IllegalArgumentException { Preconditions.checkNotNull(property); final List<Object> added = Lists.newArrayList(); Data.normalize(first, added); Data.normalize(other, added); if (!Iterables.isEmpty(added)) { synchronized (this) { final Set<Object> values = Sets.newHashSet(get(property)); final boolean changed = values.addAll(added); if (changed) { doSet(property, values); } } } return this; } /** * Removes one or more values from the property specified. The method accepts one or more * objects as the values; these objects can be {@code Record}s, {@code URI}s, {@code BNode}s, * {@code Statement}s, {@code Literal}s, objects convertible to {@code Literal} or any array * or iterable of the former types. * * @param property * the property to modify * @param first * the first value, array or iterable of values to remove, possibly null * @param other * additional values, arrays or iterables of values to remove (if specified, will * be merged with {@code first}). * @return this record object, for call chaining * @throws IllegalArgumentException * if one of the supplied values has an unsupported type */ public Record remove(final URI property, @Nullable final Object first, final Object... other) throws IllegalArgumentException { Preconditions.checkNotNull(property); final List<Object> removed = Lists.newArrayList(); Data.normalize(first, removed); Data.normalize(other, removed); if (!removed.isEmpty()) { synchronized (this) { final Set<Object> values = Sets.newHashSet(get(property)); final boolean changed = values.removeAll(removed); if (changed) { doSet(property, values); } } } return this; } /** * Retains only the properties specified, clearing the remaining ones. Note that the ID is not * affected. * * @param properties * an array with the properties to retain, possibly empty (in which case all the * stored properties will be cleared) * @return this record object, for call chaining */ public synchronized Record retain(final URI... properties) { for (final URI property : doGetProperties()) { boolean retain = false; for (int i = 0; i < properties.length; ++i) { if (property.equals(properties[i])) { retain = true; break; } } if (!retain) { doSet(property, ImmutableSet.<Object>of()); } } return this; } /** * Clears the properties specified, or all the stored properties if no property is specified. * Note that the ID is not affected. * * @param properties * an array with the properties to retain, possibly empty (in which case all the * stored properties will be cleared) * @return this record object, for call chaining */ public synchronized Record clear(final URI... properties) { final List<URI> propertiesToClear; if (properties == null || properties.length == 0) { propertiesToClear = doGetProperties(); } else { propertiesToClear = Arrays.asList(properties); } for (final URI property : propertiesToClear) { doSet(property, ImmutableSet.<Object>of()); } return this; } /** * {@inheritDoc} Comparison is based on the record IDs only. */ @Override public int compareTo(final Record other) { final URI thisID = getID(); final URI otherID = other.getID(); if (thisID == null) { return otherID == null ? 0 : -1; } else { return otherID == null ? 1 : thisID.stringValue().compareTo(otherID.stringValue()); } } /** * {@inheritDoc} Two records are equal if they have the same IDs. */ @Override public boolean equals(final Object object) { if (object == this) { return true; } if (!(object instanceof Record)) { return false; } final Record other = (Record) object; return Objects.equal(getID(), other.getID()); } /** * {@inheritDoc} The returned hash code depends only on the record ID. */ @Override public int hashCode() { return Objects.hashCode(getID()); } /** * Computes a string-valued hash code of the properties specified, or of all the available * properties, if no URI is specified. Order of selected properties and order of values of * each property do not matter. A cryptographic hash function is used. Collision probability * is negligible. This method can be used to check whether two records have the same (subsets * of) properties, by computing and comparing the respective hashes. * * @param properties * the properties to hash. * @return the computed hash code */ public synchronized String hash(final URI... properties) { final List<URI> propertiesToHash; if (properties == null || properties.length == 0) { propertiesToHash = doGetProperties(); } else { propertiesToHash = Arrays.asList(properties); } final Hasher hasher = Hashing.md5().newHasher(); for (final URI property : propertiesToHash) { final Object object = doGet(property, Object.class); @SuppressWarnings("unchecked") final Iterable<Object> nodes = object instanceof List<?> ? (List<Object>) object : ImmutableList.of(object); for (final Object node : ((Ordering<Object>) Data.getTotalComparator()) .sortedCopy(nodes)) { // TODO: this is not efficient! add Node.toBytes hasher.putString(Data.toString(node, null, true), Charsets.UTF_16LE); } hasher.putByte((byte) 0); } final StringBuilder builder = new StringBuilder(16); final byte[] bytes = hasher.hash().asBytes(); int max = 52; for (int i = 0; i < bytes.length; ++i) { final int n = (bytes[i] & 0x7F) % max; if (n < 26) { builder.append((char) (65 + n)); } else if (n < 52) { builder.append((char) (71 + n)); } else { builder.append((char) (n - 4)); } max = 62; } return builder.toString(); } /** * Returns a string representation of the record, optionally using the namespaces supplied and * emitting record properties. This method extends {@code #toString()}, optionally allowing to * emit also record properties and, recursively, properties of records nested in this record. * * @param namespaces * the prefix-to-namespace mappings to be used when emitting property and value * URIs; if null, only non-abbreviated, full URIs will be emitted * @param includeProperties * true if record properties should be emitted too * @return a string representation of the record, computed based on the * {@code includeProperties} setting */ public synchronized String toString(@Nullable final Map<String, String> namespaces, final boolean includeProperties) { final URI id = getID(); final String base = "Record " + (id == null ? "<no id>" : Data.toString(id, namespaces)); if (!includeProperties) { return base; } final Integer oldIndent = INDENT_LEVEL.get(); try { final int indent = oldIndent == null ? 1 : oldIndent + 1; INDENT_LEVEL.set(indent + 1); final StringBuilder builder = new StringBuilder(base).append(" {"); String propertySeparator = "\n"; final Ordering<Object> ordering = Ordering.from(Data.getTotalComparator()); for (final URI property : ordering.sortedCopy(doGetProperties())) { builder.append(propertySeparator).append(Strings.repeat(INDENT_STRING, indent)); builder.append(Data.toString(property, namespaces)); builder.append(" = "); final List<Object> values = ordering.sortedCopy(get(property)); String valueSeparator = values.size() == 1 ? "" : "\n" + Strings.repeat(INDENT_STRING, indent + 1); for (final Object value : values) { builder.append(valueSeparator).append(Data.toString(value, namespaces, true)); valueSeparator = ",\n" + Strings.repeat(INDENT_STRING, indent + 1); } propertySeparator = ";\n"; } builder.append(" }"); return builder.toString(); } finally { INDENT_LEVEL.set(oldIndent); } } /** * {@inheritDoc} The returned string contains only the ID of the record. */ @Override public String toString() { return toString(null, false); } /** * Performs record-to-RDF encoding by converting a stream of records in a stream of RDF * statements. Parameter {@code types} specify additional types to be added to encoded * records. Type information may be set to null (e.g., because unknown at the time the method * is called): in this case, it will be read from metadata attribute {@code "types"} attached * to the stream; reading will happen just before decoding will take place, i.e., when a * terminal stream operation will be called. * * @param stream * the stream of records to encode. * @param types * the types to be added to each record of the stream, null if to be read from * stream metadata * @return the resulting stream of statements */ @SuppressWarnings("unchecked") public static Stream<Statement> encode(final Stream<? extends Record> stream, @Nullable final Iterable<? extends URI> types) { Preconditions.checkNotNull(stream); if (types != null) { stream.setProperty("types", types); } final Stream<Record> records = (Stream<Record>) stream; return records.transform(null, new Function<Handler<Statement>, Handler<Record>>() { @Override public Handler<Record> apply(final Handler<Statement> handler) { final Iterable<? extends URI> types = stream.getProperty("types", Iterable.class); return new Encoder(handler, types); } }); } /** * Performs RDF-to-record decoding by converting a stream of RDF statements in a stream of * records. Parameter {@code types} specify the types of records that have to be extracted * from the statement stream, while parameter {@code chunked} specifies whether the input * statement stream is chunked, i.e., organized as a sequence of statement chunks with each * chunk containing the statements for a record (and its nested records). Chunked RDF streams * noticeably speed up decoding, and are always produced by the KnowledgeStore API. Type and * chunking information may be set to null (e.g., because unknown at the time the method is * called): in this case, they will be read from metadata attributes attached to the stream, * named {@code "types"} and {@code "chunked"}; reading will happen just before decoding will * take place, i.e., when a terminal stream operation will be called. * * @param stream * the stream of statements to decode * @param types * the types of records to extract from the statement stream, null if to be read * from stream metadata * @param chunked * true if the input statement stream is chunked, null if to be read from stream * metadata * @return the resulting stream of records */ public static Stream<Record> decode(final Stream<Statement> stream, @Nullable final Iterable<? extends URI> types, @Nullable final Boolean chunked) { Preconditions.checkNotNull(stream); if (types != null) { stream.setProperty("types", types); } if (chunked != null) { stream.setProperty("chunked", chunked); } return stream.transform(null, new Function<Handler<Record>, Handler<Statement>>() { @SuppressWarnings("unchecked") @Override public Handler<Statement> apply(final Handler<Record> handler) { final Iterable<? extends URI> types = stream.getProperty("types", Iterable.class); final Boolean chunked = stream.getProperty("chunked", Boolean.class); return new Decoder(handler, types, chunked); } }); } private static class Encoder implements Handler<Record> { private final Handler<? super Statement> handler; private final Set<URI> types; Encoder(final Handler<? super Statement> handler, final Iterable<? extends URI> types) { this.handler = Preconditions.checkNotNull(handler); this.types = ImmutableSet.copyOf(types); } @Override public void handle(final Record record) throws Throwable { if (record != null) { emit(record, getID(record), true); } else { this.handler.handle(null); } } private void emit(final Record record, final URI subject, final boolean addType) throws Throwable { if (addType) { for (final URI type : this.types) { emit(subject, RDF.TYPE, type); } } final List<URI> properties = record.getProperties(); final List<Record> subRecords = Lists.newArrayList(); for (final URI property : properties) { final List<Object> values = record.get(property); for (final Object value : values) { if (value instanceof Value) { final Value v = (Value) value; if (!addType || !property.equals(RDF.TYPE) || !this.types.contains(v)) { emit(subject, property, v); } } else if (value instanceof Record) { final Record rv = (Record) value; emit(subject, property, getID(rv)); subRecords.add(rv); } else if (value instanceof Statement) { final Statement s = (Statement) value; final URI id = hash(s); emit(subject, property, id); emit(id, RDF.SUBJECT, s.getSubject()); emit(id, RDF.PREDICATE, s.getPredicate()); emit(id, RDF.OBJECT, s.getObject()); } else { throw new Error("Unexpected type for value: " + value); } } } for (final Record subRecord : subRecords) { emit(subRecord, getID(subRecord), false); } } private void emit(final Resource s, final URI p, final Value o) throws Throwable { this.handler.handle(Data.getValueFactory().createStatement(s, p, o)); } private URI hash(final Statement statement) { return Data.getValueFactory().createURI("triples:" + Data.hash(statement.toString())); } private URI getID(final Record record) { final URI id = record.getID(); if (id == null) { return Data.getValueFactory().createURI("bnode:" + record.hash()); } return id; } } private static class Decoder implements Handler<Statement> { private final Handler<? super Record> handler; private final Set<URI> types; private final boolean chunked; private final UUID uuid; private final Map<URI, Node> nodes; private final List<Node> roots; private Node current; Decoder(final Handler<? super Record> handler, final Iterable<? extends URI> types, final boolean chunked) { this.handler = Preconditions.checkNotNull(handler); this.types = ImmutableSet.copyOf(types); this.chunked = chunked; this.uuid = UUID.randomUUID(); // for skolemization this.nodes = this.chunked ? Maps.<URI, Node>newLinkedHashMap() : Maps .<URI, Node>newHashMap(); this.roots = Lists.newArrayList(); this.current = null; } @Override public void handle(final Statement statement) throws RDFHandlerException { if (statement == null) { flush(true); return; } final Statement s = skolemize(statement); final URI subj = (URI) s.getSubject(); final URI pred = s.getPredicate(); final Value obj = s.getObject(); if (this.current == null || !this.current.id().equals(subj)) { this.current = this.nodes.get(subj); if (this.current == null) { this.current = new Node(subj); this.nodes.put(subj, this.current); } } this.current.add(s); if (pred.equals(RDF.TYPE) && this.types.contains(obj)) { this.current.mark(); if (this.chunked && !this.roots.isEmpty()) { flush(false); final URI threshold = this.roots.get(this.roots.size() - 1).id(); final Iterator<URI> iterator = this.nodes.keySet().iterator(); while (true) { final URI id = iterator.next(); iterator.remove(); if (id.equals(threshold)) { break; } } this.roots.clear(); } this.roots.add(this.current); } } private Statement skolemize(final Statement statement) { boolean skolemized = false; Resource subj = statement.getSubject(); if (subj instanceof BNode) { subj = skolemize((BNode) subj); skolemized = true; } Value obj = statement.getObject(); if (obj instanceof BNode) { obj = skolemize((BNode) obj); skolemized = true; } if (skolemized) { final URI pred = statement.getPredicate(); return Data.getValueFactory().createStatement(subj, pred, obj); } return statement; } private URI skolemize(final BNode bnode) { final String hash = Data.hash(this.uuid.getLeastSignificantBits(), this.uuid.getMostSignificantBits(), bnode.getID()); return Data.getValueFactory().createURI("bnode:" + hash); } private void flush(final boolean complete) throws RDFHandlerException { try { final List<Node> queue = Lists.newLinkedList(); for (final Node root : this.roots) { final Record record = (Record) root.visit(root, queue); while (!queue.isEmpty()) { final Node node = queue.remove(0); node.complete(root, this.nodes, queue); } this.handler.handle(record); if (Thread.interrupted()) { throw new RDFHandlerException("Interrupted"); } } if (complete) { this.handler.handle(null); } } catch (final Throwable ex) { Throwables.propagateIfPossible(ex, RDFHandlerException.class); throw new RDFHandlerException(ex); } } private static class Node { private final URI id; private final List<Statement> statements; private Object value; private Node root; private boolean reified; private boolean result; Node(final URI id) { this.id = id; this.statements = Lists.newArrayList(); this.result = false; } URI id() { return this.id; } void mark() { this.result = true; } void add(final Statement statement) { this.statements.add(statement); final URI pred = statement.getPredicate(); this.reified = this.reified || pred.equals(RDF.SUBJECT) || pred.equals(RDF.PREDICATE) || pred.equals(RDF.OBJECT); } Object visit(final Node root, final List<Node> queue) { if (this.root != root) { this.root = root; if (this.reified) { this.value = unreify(); } else { this.value = Record.create((URI) this.statements.get(0).getSubject()); queue.add(this); // register in the queue so to fill the record next } return !this.result || this == root ? this.value : this.statements.get(0) .getSubject(); } else if (this.value instanceof Statement) { return this.value; } return this.statements.get(0).getSubject(); } void complete(final Node root, final Map<URI, Node> nodes, final List<Node> queue) { final Record record = (Record) this.value; URI property = null; final List<Object> values = Lists.newArrayList(); Collections.sort(this.statements, Data.getTotalComparator()); for (final Statement statement : this.statements) { if (!statement.getPredicate().equals(property)) { if (property != null) { record.set(property, values); } property = statement.getPredicate(); values.clear(); } Object value = statement.getObject(); if (value instanceof URI) { final Node n = nodes.get(value); if (n != null) { value = n.visit(root, queue); } } values.add(value); } record.set(property, values); } private Statement unreify() { Resource subj = null; URI pred = null; Value obj = null; for (final Statement statement : this.statements) { final URI property = statement.getPredicate(); if (property.equals(RDF.SUBJECT)) { subj = (Resource) statement.getObject(); } else if (property.equals(RDF.PREDICATE)) { pred = (URI) statement.getObject(); } else if (property.equals(RDF.OBJECT)) { obj = statement.getObject(); } } return Data.getValueFactory().createStatement(subj, pred, obj); } } } }