/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.common.commandline;
import org.apache.commons.cli2.builder.ArgumentBuilder;
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
public final class MinhashOptionCreator {
public static final String NUM_HASH_FUNCTIONS = "numHashFunctions";
public static final String KEY_GROUPS = "keyGroups";
public static final String HASH_TYPE = "hashType";
public static final String MIN_CLUSTER_SIZE = "minClusterSize";
public static final String MIN_VECTOR_SIZE = "minVectorSize";
public static final String NUM_REDUCERS = "numReducers";
public static final String DEBUG_OUTPUT = "debugOutput";
private MinhashOptionCreator() {
}
public static DefaultOptionBuilder debugOutputOption() {
return new DefaultOptionBuilder()
.withLongName(DEBUG_OUTPUT)
.withShortName("debug")
.withDescription("Output the whole vectors for debugging");
}
public static DefaultOptionBuilder numReducersOption() {
return new DefaultOptionBuilder()
.withLongName(NUM_REDUCERS)
.withRequired(false)
.withShortName("r")
.withArgument(
new ArgumentBuilder().withName(NUM_REDUCERS).withDefault("2")
.withMinimum(1).withMaximum(1).create())
.withDescription("The number of reduce tasks. Defaults to 2");
}
/**
* Returns a default command line option for specifying the minimum cluster
* size in MinHash clustering
*/
public static DefaultOptionBuilder minClusterSizeOption() {
return new DefaultOptionBuilder()
.withLongName(MIN_CLUSTER_SIZE)
.withRequired(false)
.withArgument(
new ArgumentBuilder().withName(MIN_CLUSTER_SIZE).withDefault("10")
.withMinimum(1).withMaximum(1).create())
.withDescription("Minimum points inside a cluster")
.withShortName("mcs");
}
/**
* Returns a default command line option for specifying the type of hash to
* use in MinHash clustering: Should one out of
* ("linear","polynomial","murmur")
*/
public static DefaultOptionBuilder hashTypeOption() {
return new DefaultOptionBuilder()
.withLongName(HASH_TYPE)
.withRequired(false)
.withArgument(
new ArgumentBuilder().withName(HASH_TYPE).withDefault("murmur")
.withMinimum(1).withMaximum(1).create())
.withDescription(
"Type of hash function to use. Available types: (linear, polynomial, murmur) ")
.withShortName("ht");
}
/**
* Returns a default command line option for specifying the min size of the
* vector to hash Should one out of ("linear","polynomial","murmur")
*/
public static DefaultOptionBuilder minVectorSizeOption() {
return new DefaultOptionBuilder()
.withLongName(MIN_VECTOR_SIZE)
.withRequired(false)
.withArgument(
new ArgumentBuilder().withName(MIN_VECTOR_SIZE).withDefault("5")
.withMinimum(1).withMaximum(1).create())
.withDescription("Minimum size of vector to be hashed")
.withShortName("mvs");
}
/**
* Returns a default command line option for specifying the number of hash
* functions to be used in MinHash clustering
*/
public static DefaultOptionBuilder numHashFunctionsOption() {
return new DefaultOptionBuilder()
.withLongName(NUM_HASH_FUNCTIONS)
.withRequired(false)
.withArgument(
new ArgumentBuilder().withName(NUM_HASH_FUNCTIONS)
.withDefault("10").withMinimum(1).withMaximum(1).create())
.withDescription("Number of hash functions to be used")
.withShortName("nh");
}
/**
* Returns a default command line option for specifying the number of key
* groups to be used in MinHash clustering
*/
public static DefaultOptionBuilder keyGroupsOption() {
return new DefaultOptionBuilder()
.withLongName(KEY_GROUPS)
.withRequired(false)
.withArgument(
new ArgumentBuilder().withName(KEY_GROUPS).withDefault("2")
.withMinimum(1).withMaximum(1).create())
.withDescription("Number of key groups to be used").withShortName("kg");
}
}