package org.apache.blur.analysis;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.UUID;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.blur.analysis.type.MultiValuedNotAllowedException;
import org.apache.blur.log.Log;
import org.apache.blur.log.LogFactory;
import org.apache.blur.trace.Trace;
import org.apache.blur.trace.Tracer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.lucene.analysis.Analyzer;
public class HdfsFieldManager extends BaseFieldManager {
private static final List<String> EMPTY_LIST = Arrays.asList(new String[] {});
private static final Log LOG = LogFactory.getLog(HdfsFieldManager.class);
private static final String FIELD_TYPE = "_fieldType_";
private static final String FIELD_LESS_INDEXING = "_fieldLessIndexing_";
private static final String SORTENABLED = "_sortEnabled_";
private static final String MULTI_VALUE_FIELD = "_multiValueField_";
private static final String FAMILY = "_family_";
private static final String COLUMN_NAME = "_columnName_";
private static final String SUB_COLUMN_NAME = "_subColumnName_";
private static final String TYPE_FILE_EXT = ".type";
private static final Lock _lock = new ReentrantReadWriteLock().writeLock();
private final Configuration _configuration;
private final Path _storagePath;
private final FileSystem _fileSystem;
public HdfsFieldManager(String fieldLessField, Analyzer defaultAnalyzerForQuerying, Path storagePath,
Configuration configuration) throws IOException {
this(fieldLessField, defaultAnalyzerForQuerying, storagePath, configuration, true, null, false, null);
}
public HdfsFieldManager(String fieldLessField, Analyzer defaultAnalyzerForQuerying, Path storagePath,
Configuration configuration, boolean strict, String defaultMissingFieldType,
boolean defaultMissingFieldLessIndexing, Map<String, String> defaultMissingFieldProps) throws IOException {
super(fieldLessField, defaultAnalyzerForQuerying, strict, defaultMissingFieldType, defaultMissingFieldLessIndexing,
defaultMissingFieldProps, configuration);
_storagePath = storagePath;
_configuration = configuration;
_fileSystem = _storagePath.getFileSystem(_configuration);
}
@Override
protected List<String> getFieldNamesToLoad() throws IOException {
Tracer trace = Trace.trace("filesystem - getFieldNamesToLoad", Trace.param("storagePath", _storagePath));
try {
if (!_fileSystem.exists(_storagePath)) {
return EMPTY_LIST;
}
FileStatus[] listStatus = _fileSystem.listStatus(_storagePath, new PathFilter() {
@Override
public boolean accept(Path path) {
if (path.getName().endsWith(TYPE_FILE_EXT)) {
return true;
}
return false;
}
});
if (listStatus == null) {
return EMPTY_LIST;
}
List<String> fieldNames = new ArrayList<String>();
for (FileStatus fileStatus : listStatus) {
if (!fileStatus.isDir()) {
String fileName = fileStatus.getPath().getName();
fieldNames.add(fileName.substring(0, fileName.lastIndexOf(TYPE_FILE_EXT)));
}
}
return fieldNames;
} finally {
trace.done();
}
}
@Override
protected boolean tryToStore(FieldTypeDefinition fieldTypeDefinition, String fieldName) throws IOException {
Tracer trace = Trace.trace("filesystem - tryToStore fieldName", Trace.param("fieldName", fieldName),
Trace.param("storagePath", _storagePath));
try {
// Might want to make this a ZK lock
_lock.lock();
try {
String fieldType = fieldTypeDefinition.getFieldType();
boolean fieldLessIndexed = fieldTypeDefinition.isFieldLessIndexed();
boolean sortEnable = fieldTypeDefinition.isSortEnable();
boolean multiValueField = fieldTypeDefinition.isMultiValueField();
LOG.info(
"Attempting to store new field [{0}] with fieldLessIndexing [{1}] with type [{2}] and properties [{3}]",
fieldName, fieldLessIndexed, fieldType, fieldTypeDefinition.getProperties());
Properties properties = new Properties();
setProperty(properties, FAMILY, fieldTypeDefinition.getFamily());
setProperty(properties, FAMILY, fieldTypeDefinition.getFamily());
setProperty(properties, COLUMN_NAME, fieldTypeDefinition.getColumnName());
setProperty(properties, SUB_COLUMN_NAME, fieldTypeDefinition.getSubColumnName());
setProperty(properties, FIELD_LESS_INDEXING, Boolean.toString(fieldLessIndexed));
setProperty(properties, SORTENABLED, Boolean.toString(sortEnable));
setProperty(properties, MULTI_VALUE_FIELD, Boolean.toString(multiValueField));
setProperty(properties, FIELD_TYPE, fieldType);
Map<String, String> props = fieldTypeDefinition.getProperties();
if (props != null) {
for (Entry<String, String> e : props.entrySet()) {
properties.setProperty(e.getKey(), e.getValue());
}
}
Path path = getFieldPath(fieldName);
if (_fileSystem.exists(path)) {
LOG.info("Field [{0}] already exists.", fieldName);
return false;
}
Path tmpPath = new Path(path.getParent(), UUID.randomUUID().toString() + ".tmp");
FSDataOutputStream outputStream = _fileSystem.create(tmpPath, false);
properties.store(outputStream, getComments());
outputStream.close();
if (_fileSystem.rename(tmpPath, path)) {
// @TODO make this configurable
_fileSystem.setReplication(path, (short) 10);
return true;
} else {
_fileSystem.delete(tmpPath, false);
LOG.info("Field [{0}] already exists.", fieldName, fieldLessIndexed, fieldType, props);
return false;
}
} finally {
_lock.unlock();
}
} finally {
trace.done();
}
}
private void setProperty(Properties properties, String key, String value) {
if (value == null) {
return;
}
properties.setProperty(key, value);
}
private Path getFieldPath(String fieldName) {
return new Path(_storagePath, fieldName + TYPE_FILE_EXT);
}
private String getComments() {
return "This file is generated from Apache Blur to store meta data about field types. DO NOT MODIFY!";
}
@Override
protected void tryToLoad(String fieldName) throws IOException {
_lock.lock();
try {
Path path = getFieldPath(fieldName);
if (!_fileSystem.exists(path)) {
return;
}
FSDataInputStream inputStream = _fileSystem.open(path);
Properties properties = new Properties();
properties.load(inputStream);
inputStream.close();
boolean fieldLessIndexing = Boolean.parseBoolean(properties.getProperty(FIELD_LESS_INDEXING));
boolean sortenabled = Boolean.parseBoolean(properties.getProperty(SORTENABLED));
String mvfProp = properties.getProperty(MULTI_VALUE_FIELD);
boolean multiValueField;
if (mvfProp == null || mvfProp.trim().isEmpty()) {
multiValueField = true;
} else {
multiValueField = Boolean.parseBoolean(mvfProp);
}
String fieldType = properties.getProperty(FIELD_TYPE);
Map<String, String> props = toMap(properties);
if (mvfProp == null) {
if (multiValueField && sortenabled) {
// @TODO hack because we use to not have multivalue in the schema
LOG.warn("Changing field [{0}] to be NOT multiValueField.", fieldName);
multiValueField = false;
}
}
FieldTypeDefinition fieldTypeDefinition;
try {
fieldTypeDefinition = newFieldTypeDefinition(fieldName, fieldLessIndexing, fieldType, sortenabled,
multiValueField, props);
} catch (MultiValuedNotAllowedException e) {
if (mvfProp == null) {
multiValueField = false;
fieldTypeDefinition = newFieldTypeDefinition(fieldName, fieldLessIndexing, fieldType, sortenabled,
multiValueField, props);
} else {
throw e;
}
}
fieldTypeDefinition.setFamily(properties.getProperty(FAMILY));
fieldTypeDefinition.setColumnName(properties.getProperty(COLUMN_NAME));
fieldTypeDefinition.setSubColumnName(properties.getProperty(SUB_COLUMN_NAME));
fieldTypeDefinition.setFieldLessIndexed(fieldLessIndexing);
fieldTypeDefinition.setFieldType(properties.getProperty(FIELD_TYPE));
fieldTypeDefinition.setSortEnable(sortenabled);
fieldTypeDefinition.setMultiValueField(multiValueField);
fieldTypeDefinition.setProperties(props);
registerFieldTypeDefinition(fieldName, fieldTypeDefinition);
} finally {
_lock.unlock();
}
}
private Map<String, String> toMap(Properties props) {
Map<String, String> result = new HashMap<String, String>();
for (Entry<Object, Object> e : props.entrySet()) {
result.put(e.getKey().toString(), e.getValue().toString());
}
result.remove(FAMILY);
result.remove(COLUMN_NAME);
result.remove(SUB_COLUMN_NAME);
result.remove(FIELD_TYPE);
result.remove(FIELD_LESS_INDEXING);
result.remove(SORTENABLED);
result.remove(MULTI_VALUE_FIELD);
return result;
}
}