/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.zebra.types;
import java.util.Iterator;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
/**
*/
class CsvZebraTupleOutput {
private StringBuilder sb;
private boolean isFirst = true;
protected static final Log LOG = LogFactory.getLog(CsvZebraTupleOutput.class);
private static CsvZebraTupleOutput instance = null;
String toCSVString(String s) {
StringBuffer sb = new StringBuffer(s.length() + 1);
sb.append('\'');
int len = s.length();
for (int i = 0; i < len; i++) {
char c = s.charAt(i);
switch (c) {
case '\0':
sb.append("%00");
break;
case '\n':
sb.append("%0A");
break;
case '\r':
sb.append("%0D");
break;
case ',':
sb.append("%2C");
break;
case '}':
sb.append("%7D");
break;
case '%':
sb.append("%25");
break;
default:
sb.append(c);
}
}
return sb.toString();
}
String toCSVBuffer(DataByteArray buf) {
StringBuffer sb = new StringBuffer("#");
sb.append(buf.toString());
return sb.toString();
}
void printCommaUnlessFirst() {
if (!isFirst) {
sb.append(",");
}
isFirst = false;
}
/** Creates a new instance of CsvZebraTupleOutput */
private CsvZebraTupleOutput() {
sb = new StringBuilder();
}
void reset() {
sb.delete(0, sb.length());
isFirst = true;
}
static CsvZebraTupleOutput createCsvZebraTupleOutput() {
if (instance == null) {
instance = new CsvZebraTupleOutput();
} else {
instance.reset();
}
return instance;
}
@Override
public String toString() {
if (sb != null) {
return sb.toString();
}
return null;
}
void writeByte(byte b) {
writeLong((long) b);
}
void writeBool(boolean b) {
printCommaUnlessFirst();
String val = b ? "T" : "F";
sb.append(val);
}
void writeInt(int i) {
writeLong((long) i);
}
void writeLong(long l) {
printCommaUnlessFirst();
sb.append(l);
}
void writeFloat(float f) {
writeDouble((double) f);
}
void writeDouble(double d) {
printCommaUnlessFirst();
sb.append(d);
}
void writeString(String s) {
printCommaUnlessFirst();
sb.append(toCSVString(s));
}
void writeBuffer(DataByteArray buf) {
printCommaUnlessFirst();
sb.append(toCSVBuffer(buf));
}
void writeNull() {
printCommaUnlessFirst();
}
void startTuple(Tuple r) {
}
void endTuple(Tuple r) {
sb.append("\n");
isFirst = true;
}
/**
* Generate CSV-format string representations of Zebra tuples for Zebra
* streaming use.
*
* @param tuple
* @return CSV format string representation of Tuple
*/
@SuppressWarnings("unchecked")
void writeTuple(Tuple r) {
for (int i = 0; i < r.size(); i++) {
try {
Object d = r.get(i);
if (d != null) {
if (d instanceof Map) {
Map<String, Object> map = (Map<String, Object>) d;
startMap(map);
writeMap(map);
endMap(map);
} else if (d instanceof Tuple) {
Tuple t = (Tuple) d;
writeTuple(t);
} else if (d instanceof DataBag) {
DataBag bag = (DataBag) d;
writeBag(bag);
} else if (d instanceof Boolean) {
writeBool((Boolean) d);
} else if (d instanceof Byte) {
writeByte((Byte) d);
} else if (d instanceof Integer) {
writeInt((Integer) d);
} else if (d instanceof Long) {
writeLong((Long) d);
} else if (d instanceof Float) {
writeFloat((Float) d);
} else if (d instanceof Double) {
writeDouble((Double) d);
} else if (d instanceof String) {
writeString((String) d);
} else if (d instanceof DataByteArray) {
writeBuffer((DataByteArray) d);
} else {
throw new ExecException("Unknown data type");
}
} else { // if d is null, write nothing except ','
writeNull();
}
} catch (ExecException e) {
e.printStackTrace();
LOG.warn("Exception when CSV format Zebra tuple", e);
}
}
}
void startBag(DataBag bag) {
printCommaUnlessFirst();
sb.append("v{");
isFirst = true;
}
void writeBag(DataBag bag) {
Iterator<Tuple> iter = bag.iterator();
while (iter.hasNext()) {
Tuple t = (Tuple) iter.next();
startTuple(t);
writeTuple(t);
endTuple(t);
}
}
void endBag(DataBag bag) {
sb.append("}");
isFirst = false;
}
void startMap(Map<String, Object> m) {
printCommaUnlessFirst();
sb.append("m{");
isFirst = true;
}
void endMap(Map<String, Object> m) {
sb.append("}");
isFirst = false;
}
@SuppressWarnings("unchecked")
void writeMap(Map<String, Object> m) throws ExecException {
for (Map.Entry<String, Object> e : m.entrySet()) {
writeString(e.getKey());
Object d = e.getValue();
if (d != null) {
if (d instanceof Map) {
Map<String, Object> map = (Map<String, Object>) d;
startMap(map);
writeMap(map);
endMap(map);
} else if (d instanceof Tuple) {
Tuple t = (Tuple) d;
writeTuple(t);
} else if (d instanceof DataBag) {
DataBag bag = (DataBag) d;
writeBag(bag);
} else if (d instanceof Boolean) {
writeBool((Boolean) d);
} else if (d instanceof Byte) {
writeByte((Byte) d);
} else if (d instanceof Integer) {
writeInt((Integer) d);
} else if (d instanceof Long) {
writeLong((Long) d);
} else if (d instanceof Float) {
writeFloat((Float) d);
} else if (d instanceof Double) {
writeDouble((Double) d);
} else if (d instanceof String) {
writeString((String) d);
} else if (d instanceof DataByteArray) {
writeBuffer((DataByteArray) d);
} else {
throw new ExecException("Unknown data type");
}
} else { // if d is null, write nothing except ','
writeNull();
}
}
}
}