/* * Copyright 2013 Cloudera. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.kitesdk.data.spi; import com.google.common.base.Objects; import com.google.common.base.Predicate; import java.net.URI; import java.util.Map; import javax.annotation.concurrent.Immutable; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; import org.kitesdk.data.Dataset; import org.kitesdk.data.DatasetDescriptor; import org.kitesdk.data.DatasetReader; import org.kitesdk.data.Datasets; import org.kitesdk.data.IncompatibleSchemaException; import org.kitesdk.data.PartitionView; import org.kitesdk.data.RefinableView; import org.kitesdk.data.URIBuilder; import org.kitesdk.data.View; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * A common View base class to simplify implementations of Views created from ranges. * * @param <E> * The type of entities stored in the {@code Dataset} underlying this * {@code View}. * @since 0.9.0 */ @Immutable public abstract class AbstractRefinableView<E> implements RefinableView<E> { private static final Logger LOG = LoggerFactory.getLogger(AbstractRefinableView.class); protected final Dataset<E> dataset; protected final MarkerComparator comparator; protected final Constraints constraints; protected final EntityAccessor<E> accessor; protected final Predicate<E> entityTest; protected final boolean canRead; protected final boolean canWrite; // This class is Immutable and must be thread-safe protected final ThreadLocal<StorageKey> keys; protected AbstractRefinableView(Dataset<E> dataset, Class<E> type) { this.dataset = dataset; final DatasetDescriptor descriptor = dataset.getDescriptor(); if (descriptor.isPartitioned()) { this.constraints = new Constraints( descriptor.getSchema(), descriptor.getPartitionStrategy()); // TODO: is comparator used anywhere? this.comparator = new MarkerComparator(descriptor.getPartitionStrategy()); this.keys = new ThreadLocal<StorageKey>() { @Override protected StorageKey initialValue() { return new StorageKey(descriptor.getPartitionStrategy()); } }; } else { this.constraints = new Constraints(descriptor.getSchema()); this.comparator = null; this.keys = null; } this.accessor = DataModelUtil.accessor(type, descriptor.getSchema()); this.entityTest = constraints.toEntityPredicate(accessor); Schema datasetSchema = descriptor.getSchema(); this.canRead = SchemaValidationUtil.canRead( datasetSchema, accessor.getReadSchema()); this.canWrite = SchemaValidationUtil.canRead( accessor.getWriteSchema(), datasetSchema); IncompatibleSchemaException.check(canRead || canWrite, "The type cannot be used to read from or write to the dataset:\n" + "Type schema: %s\nDataset schema: %s", getSchema(), descriptor.getSchema()); } protected AbstractRefinableView(AbstractRefinableView<?> view, Schema schema, Class<E> type) { if (view.dataset instanceof AbstractDataset) { this.dataset = ((AbstractDataset<?>) view.dataset).asType(type); } else { this.dataset = Datasets.load(view.dataset.getUri(), type); } this.comparator = view.comparator; this.constraints = view.constraints; // thread-safe, so okay to reuse when views share a partition strategy this.keys = view.keys; // Resolve our type according to the given schema this.accessor = DataModelUtil.accessor(type, schema); this.entityTest = constraints.toEntityPredicate(accessor); Schema datasetSchema = dataset.getDescriptor().getSchema(); this.canRead = SchemaValidationUtil.canRead( datasetSchema, accessor.getReadSchema()); this.canWrite = SchemaValidationUtil.canRead( accessor.getWriteSchema(), datasetSchema); IncompatibleSchemaException.check(canRead || canWrite, "The type cannot be used to read from or write to the dataset:\n" + "Type schema: %s\nDataset schema: %s", getSchema(), datasetSchema); } protected AbstractRefinableView(AbstractRefinableView<E> view, Constraints constraints) { this.dataset = view.dataset; this.comparator = view.comparator; this.constraints = constraints; // thread-safe, so okay to reuse when views share a partition strategy this.keys = view.keys; // No need to resolve type here as it would have been resolved by our parent // view this.accessor = view.accessor; this.entityTest = constraints.toEntityPredicate(accessor); this.canRead = view.canRead; this.canWrite = view.canWrite; } public Constraints getConstraints() { return constraints; } protected abstract AbstractRefinableView<E> filter(Constraints c); protected abstract <T> AbstractRefinableView<T> project(Schema schema, Class<T> type); @Override public Dataset<E> getDataset() { return dataset; } @Override public boolean deleteAll() { throw new UnsupportedOperationException( "This Dataset does not support bulk deletion"); } @Override public boolean moveToTrash() { throw new UnsupportedOperationException( "This Dataset does not support bulk data removal to trash"); } @Override public Class<E> getType() { return accessor.getType(); } @Override public Schema getSchema() { return accessor.getReadSchema(); } public EntityAccessor<E> getAccessor() { return accessor; } public Map<String, Object> getProvidedValues() { return constraints.getProvidedValues(); } @Override public Iterable<PartitionView<E>> getCoveringPartitions() { throw new UnsupportedOperationException("This Dataset does not support " + "getCoveringPartitions."); } @Override public boolean includes(E entity) { return entityTest.apply(entity); } @Override public AbstractRefinableView<E> with(String name, Object... values) { return filter(constraints.with(name, values)); } @Override public AbstractRefinableView<E> from(String name, Comparable value) { return filter(constraints.from(name, value)); } @Override public AbstractRefinableView<E> fromAfter(String name, Comparable value) { return filter(constraints.fromAfter(name, value)); } @Override public AbstractRefinableView<E> to(String name, Comparable value) { return filter(constraints.to(name, value)); } @Override public AbstractRefinableView<E> toBefore(String name, Comparable value) { return filter(constraints.toBefore(name, value)); } @Override @SuppressWarnings("unchecked") public AbstractRefinableView<GenericRecord> asSchema(Schema schema) { return project(schema, GenericRecord.class); } @Override public <T> View<T> asType(Class<T> type) { if (DataModelUtil.isGeneric(type)) { // if the type is generic, don't reset the schema return project(getSchema(), type); } // otherwise, the type determines the schema return project(getDataset().getDescriptor().getSchema(), type); } @Override public boolean isEmpty() { DatasetReader<E> reader = null; try { // use a reader because files may be present but empty reader = newReader(); return !reader.hasNext(); } finally { if (reader != null) { reader.close(); } } } @Override public boolean equals(Object o) { if (this == o) { return true; } if ((o == null) || !Objects.equal(this.getClass(), o.getClass())) { return false; } AbstractRefinableView that = (AbstractRefinableView) o; return (Objects.equal(this.dataset, that.dataset) && Objects.equal(this.constraints, that.constraints)); } @Override public int hashCode() { return Objects.hashCode(getClass(), dataset, constraints); } @Override public String toString() { return Objects.toStringHelper(this) .add("dataset", dataset) .add("constraints", constraints) .toString(); } @Override public URI getUri() { URIBuilder builder = new URIBuilder(dataset.getUri()); for (Map.Entry<String, String> entry : constraints.toQueryMap().entrySet()) { builder.with(entry.getKey(), entry.getValue()); } return builder.build(); } protected Predicate<StorageKey> getKeyPredicate() { return constraints.toKeyPredicate(); } protected void checkSchemaForWrite() { IncompatibleSchemaException.check(canWrite, "Cannot write data with this view's schema, " + "it cannot be read with the dataset's schema:\n" + "Current schema: %s\nDataset schema: %s", getSchema(), dataset.getDescriptor().getSchema()); } protected void checkSchemaForRead() { IncompatibleSchemaException.check(canRead, "Cannot read data with this view's schema:\n" + "Current schema: %s\nDataset schema: %s", dataset.getDescriptor().getSchema(), getSchema()); } }