/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.metadata.types;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.StringWriter;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.codehaus.jackson.JsonEncoding;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonGenerator;
import org.codehaus.jackson.annotate.JsonProperty;
import org.codehaus.jackson.map.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.xml.bind.DatatypeConverter;
/**
* Represents metadata for a pipeline. There are two 'levels' of metadata - one that is global to an entire
* dataset, and one that is applicable to each file present in a dataset.
*/
public class GlobalMetadata {
private static final Logger log = LoggerFactory.getLogger(GlobalMetadata.class);
private static final ObjectMapper objectMapper = new ObjectMapper();
private static final JsonFactory jsonFactory = new JsonFactory();
private static final String EMPTY_ID = "0";
@JsonProperty("dataset")
private final Map<String, Object> datasetLevel;
@JsonProperty("file")
private final Map<String, Map<String, Object>> fileLevel;
@JsonProperty("id")
private String cachedId;
private transient boolean markedImmutable;
private final static String DATASET_URN_KEY = "Dataset-URN";
private final static String TRANSFER_ENCODING_KEY = "Transfer-Encoding";
private final static String CONTENT_TYPE_KEY = "Content-Type";
private final static String INNER_CONTENT_TYPE_KEY = "Inner-Content-Type";
/**
* Create a new, empty, metadata descriptor.
*/
public GlobalMetadata() {
this.datasetLevel = new ConcurrentHashMap<>();
this.fileLevel = new ConcurrentHashMap<>();
this.markedImmutable = false;
}
/**
* Mark the metadata as immutable. Once this flag is set all attempts to modify the object
* will fail with {@link UnsupportedOperationException}.
*/
public void markImmutable() {
this.markedImmutable = true;
}
public boolean isImmutable() {
return this.markedImmutable;
}
/**
* Create a new GlobalMetadata object from its serialized representation.
* @throws IOException If the JSON string cannot be parsed.
*/
public static GlobalMetadata fromJson(String json)
throws IOException {
return objectMapper.readValue(json, GlobalMetadata.class);
}
/**
* Merge another GlobalMetadata object into this one. All keys from 'other' will be placed into
* this object, replacing any already existing keys.
* @param other Metadata object to add
*/
public void addAll(GlobalMetadata other) {
throwIfImmutable();
datasetLevel.putAll(other.datasetLevel);
for (Map.Entry<String, Map<String, Object>> e : other.fileLevel.entrySet()) {
Map<String, Object> val = new ConcurrentHashMap<>();
val.putAll(e.getValue());
fileLevel.put(e.getKey(), val);
}
cachedId = null;
}
/**
* Merge default settings into this object. Logic is very similar to addAll(), but Transfer-Encoding gets
* special treatment; the 'default' transfer-encoding settings are appended to any transfer-encoding
* already set (vs simply overwriting them).
*/
public void mergeWithDefaults(GlobalMetadata defaults) {
List<String> defaultTransferEncoding = defaults.getTransferEncoding();
List<String> myEncoding = getTransferEncoding();
if (defaultTransferEncoding != null) {
if (myEncoding == null) {
setDatasetMetadata(TRANSFER_ENCODING_KEY, defaultTransferEncoding);
} else {
List<String> combinedEncoding = new ArrayList<>();
combinedEncoding.addAll(myEncoding);
combinedEncoding.addAll(defaultTransferEncoding);
setDatasetMetadata(TRANSFER_ENCODING_KEY, combinedEncoding);
}
}
for (Map.Entry<String, Object> entry : defaults.datasetLevel.entrySet()) {
if (!datasetLevel.containsKey(entry.getKey())) {
setDatasetMetadata(entry.getKey(), entry.getValue());
}
}
}
/**
* Serialize as a UTF8 encoded JSON string.
*/
public byte[] toJsonUtf8() {
try {
ByteArrayOutputStream bOs = new ByteArrayOutputStream(512);
try (JsonGenerator generator = jsonFactory.createJsonGenerator(bOs, JsonEncoding.UTF8)
.setCodec(objectMapper)) {
toJsonUtf8(generator);
}
return bOs.toByteArray();
} catch (IOException e) {
throw new RuntimeException("Unexpected IOException serializing to ByteArray", e);
}
}
/**
* Serialize as a String
*/
public String toJson()
throws IOException {
StringWriter writer = new StringWriter();
try (JsonGenerator generator = jsonFactory.createJsonGenerator(writer)
.setCodec(objectMapper)) {
toJsonUtf8(generator);
}
return writer.toString();
}
protected void toJsonUtf8(JsonGenerator generator) throws IOException {
generator.writeStartObject();
generator.writeStringField("id", getId());
bodyToJsonUtf8(generator);
generator.writeEndObject();
generator.flush();
}
/**
* Write this object out to an existing JSON stream
*/
protected void bodyToJsonUtf8(JsonGenerator generator)
throws IOException {
generator.writeObjectField("dataset", datasetLevel);
generator.writeObjectFieldStart("file");
for (Map.Entry<String, Map<String, Object>> entry : fileLevel.entrySet()) {
generator.writeObjectField(entry.getKey(), entry.getValue());
}
generator.writeEndObject();
}
// Dataset-level metadata
/**
* Convenience method to retrieve the Dataset-URN dataset-level property.
*/
public String getDatasetUrn() {
return (String) datasetLevel.get(DATASET_URN_KEY);
}
/**
* Convenience method to set the Dataset-URN property.
*/
public void setDatasetUrn(String urn) {
setDatasetMetadata(DATASET_URN_KEY, urn);
}
/**
* Convenience method to set the Content-Type property
*/
public void setContentType(String contentType) {
setDatasetMetadata(CONTENT_TYPE_KEY, contentType);
}
/**
* Convenience method to retrieve the Content-Type property
* @return
*/
public String getContentType() {
return (String)getDatasetMetadata(CONTENT_TYPE_KEY);
}
/**
* Convenience method to set the Inner-Content-Type property
*/
public void setInnerContentType(String innerContentType) {
setDatasetMetadata(INNER_CONTENT_TYPE_KEY, innerContentType);
}
/**
* Convenience method to retrieve the Inner-Content-Type property
*/
public String getInnerContentType() {
return (String)getDatasetMetadata(INNER_CONTENT_TYPE_KEY);
}
/**
* Get an arbitrary dataset-level metadata key
*/
public Object getDatasetMetadata(String key) {
return datasetLevel.get(key);
}
/**
* Set an arbitrary dataset-level metadata key
*/
public void setDatasetMetadata(String key, Object val) {
throwIfImmutable();
datasetLevel.put(key, val);
cachedId = null;
}
/**
* Convenience method to retrieve the transfer-encodings that have been applied to the dataset
*/
@SuppressWarnings("unchecked")
public List<String> getTransferEncoding() {
return (List<String>) getDatasetMetadata(TRANSFER_ENCODING_KEY);
}
/**
* Convenience method to add a new transfer-encoding to a dataset
*/
public synchronized void addTransferEncoding(String encoding) {
throwIfImmutable();
List<String> encodings = getTransferEncoding();
if (encodings == null) {
encodings = new ArrayList<>();
}
encodings.add(encoding);
setDatasetMetadata(TRANSFER_ENCODING_KEY, encodings);
}
// File-level metadata
/**
* Get an arbitrary file-level metadata key
*/
public Object getFileMetadata(String file, String key) {
Map<String, Object> fileKeys = fileLevel.get(file);
if (fileKeys == null) {
return null;
}
return fileKeys.get(key);
}
/**
* Set an arbitrary file-level metadata key
*/
public void setFileMetadata(String file, String key, Object val) {
throwIfImmutable();
Map<String, Object> fileKeys = fileLevel.get(file);
if (fileKeys == null) {
fileKeys = new ConcurrentHashMap<>();
fileLevel.put(file, fileKeys);
}
fileKeys.put(key, val);
cachedId = null;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
GlobalMetadata that = (GlobalMetadata) o;
return this.getId().equals(that.getId());
}
@Override
public int hashCode() {
return getId().hashCode();
}
public String getId() {
if (cachedId != null) {
return cachedId;
}
if (datasetLevel.size() == 0 && fileLevel.size() == 0) {
cachedId = EMPTY_ID;
return cachedId;
}
try {
// ID is calculated by serializing body to JSON and then taking that hash
ByteArrayOutputStream bOs = new ByteArrayOutputStream(512);
MessageDigest md5Digest = MessageDigest.getInstance("MD5");
try (JsonGenerator generator = jsonFactory.createJsonGenerator(bOs, JsonEncoding.UTF8).setCodec(objectMapper)) {
generator.writeStartObject();
bodyToJsonUtf8(generator);
generator.writeEndObject();
}
byte[] digestBytes = md5Digest.digest(bOs.toByteArray());
cachedId = DatatypeConverter.printHexBinary(digestBytes);
return cachedId;
} catch (IOException|NoSuchAlgorithmException e) {
throw new RuntimeException("Unexpected exception generating id", e);
}
}
public boolean isEmpty() {
return getId().equals(EMPTY_ID);
}
private void throwIfImmutable() {
if (this.markedImmutable) {
throw new UnsupportedOperationException("Metadata is marked as immutable -- cannot modify");
}
}
}