/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.dstream.support;
import java.io.Serializable;
import io.dstream.SerializableStreamAssets.SerFunction;
import io.dstream.utils.Assert;
/**
* Base implementation of the classification functionality.<br>
* Classification could be looked at as the process of distributed grouping and
* in the "distributable" reality, often coincides with data <i>partitioning</i>.
* Since {@link Classifier} is compliant with the general semantics of partitioners
* by returning an {@link Integer} from {@link #getClassificationId(Object)} method,
* this id could be treated by a target partitioner as partition id.<br>.
*/
public abstract class Classifier implements Serializable {
private static final long serialVersionUID = -250807397502312547L;
private final int classificationSize;
private SerFunction<Object, ?> classificationValueMapper;
/**
* Constructs this instance with <i>classificationSize</i>
*/
public Classifier(int classificationSize) {
Assert.isTrue(classificationSize > 0, "'classificationSize' must be > 0");
this.classificationSize = classificationSize;
}
/**
* Returns classification if computed on the instance of <i>input</i>.
*/
public Integer getClassificationId(Object input) {
int partId = this.doGetClassificationId(input);
return partId;
}
/**
* Returns the total amount of classifications
*/
public int getSize(){
return this.classificationSize;
}
/**
* Allows to set/reset an instance of the {@link SerFunction} which maps the value to be
* used to compute classification.
* <pre>
* dstream.classify(str -> str.substring(0, 5))
* </pre>
* Assuming that the value passed to the classify operation is "Hello Washington", the
* classification will be performed using only "Hello" string based on the
* given function (str -> str.substring(0, 5)).
*/
public void setClassificationValueMapper(SerFunction<Object, ?> classificationValueMapper) {
this.classificationValueMapper = classificationValueMapper;
}
/**
* Returns and instance of the {@link SerFunction} which maps the value to be
* used to compute classification.
*/
public SerFunction<Object, ?> getClassificationValueMapper() {
return this.classificationValueMapper;
}
/**
*
*/
@Override
public String toString(){
return this.getClass().getSimpleName() + ":" + this.classificationSize;
}
/**
* An abstract delegate method to be implemented by sub-classes
* which implements the actual classification logic.
*/
protected abstract int doGetClassificationId(Object input);
}