package nl.helixsoft.recordstream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Convert a record stream in long form into short form.
* The opposite operation is performed by Melt
* Similar to the cast function of the R reshape package.
* It's also similar to the pivot operation in MS Excel
*
* Transform a record stream which is in long form:
*
* <pre>
* groupvar colvar value
* group1 col1 1
* group1 col2 2
* group2 col1 3
* group2 col2 4
* </pre>
*
* into wide form
*
* <pre>
* groupvar col1 col2
* group1 1 2
* group2 3 4
* </pre>
*
*
* Note that the incoming recordstream must be sorted by groupVar for this to work properly.
*/
public class Cast extends AbstractRecordStream
{
private final RecordStream parent;
private List<String> groupVar;
private final int[] groupIdx;
private final int columnIdx;
private final int valueIdx;
private List<String> outCols = new ArrayList<String>();
private Map<String, Integer> outColIdx = new HashMap<String, Integer>();
private final RecordMetaData rmd;
/**
* If the input stream contains more than three columns, the remaining ones are quietly ignored.
* @param parent incoming recordStream. Note it must be sorted by groupVar for this to work properly.
* @param _groupVar
* @throws RecordStreamException
*/
public Cast (RecordStream parent, String[] _groupVar, String columnVar, String valueVar) throws StreamException
{
Map<String, Integer> idx = new HashMap<String, Integer>();
for (int i = 0; i < parent.getMetaData().getNumCols(); ++i)
{
idx.put (parent.getMetaData().getColumnName(i), i);
}
this.parent = parent;
groupVar = new ArrayList<String>(_groupVar.length);
groupIdx = new int[_groupVar.length];
int i = 0;
for (String g : _groupVar)
{
groupVar.add(g);
groupIdx[i++] = idx.get(g);
}
columnIdx = idx.get(columnVar);
valueIdx = idx.get(valueVar);
next = parent.getNext();
loadNextRecord();
List<String> colNames = new ArrayList<String>();
colNames.addAll (groupVar);
colNames.addAll (outCols);
rmd = new DefaultRecordMetaData(colNames);
}
public Cast (RecordStream parent, String groupVar, String columnVar, String valueVar) throws StreamException
{
this (parent, new String[] { groupVar }, columnVar, valueVar);
}
private class IndexedRecord implements Record
{
Map<Integer, Object> values = new HashMap<Integer, Object>();
@Override
public Object getValue(int i) { return get (i); }
@Override
public Object get(int i) {
return values.get(i);
}
public void putValue (int i, Object val)
{
values.put (i, val);
}
@Override
public Object getValue(String s) { return get(s); }
@Override
public Object get(String s)
{
return values.get(Cast.this.outColIdx.get(s));
}
@Override
public RecordMetaData getMetaData()
{
return Cast.this.rmd;
}
@Override
public void set(String s, Object val)
{
values.put (Cast.this.outColIdx.get(s), val);
}
@Override
public void set(int i, Object val)
{
values.put (i, val);
}
}
private IndexedRecord nextResult;
private Record next;
private void loadNextRecord() throws StreamException
{
if (next == null)
{
nextResult = null;
return;
}
nextResult = new IndexedRecord();
String[] currentGroup = new String[groupIdx.length];
for (int i = 0; i < groupIdx.length; ++i)
{
currentGroup[i] = "" + next.get(groupIdx[i]);
nextResult.putValue (i, currentGroup[i]);
}
while (true)
{
String col = "" + next.get(columnIdx);
Object val = next.get(valueIdx);
if (!outColIdx.containsKey(col))
{
outColIdx.put(col, outCols.size());
outCols.add(col);
}
int idx = outColIdx.get(col);
nextResult.putValue(idx + groupIdx.length, val);
next = parent.getNext();
if (next == null) break;
if (!sameGroup(currentGroup)) break;
}
}
private boolean sameGroup(String[] currentGroup)
{
for (int i = 0; i < groupIdx.length; ++i)
{
if (!currentGroup[i].equals ("" + next.get(groupIdx[i])))
{
return false;
}
}
return true;
}
@Override
public Record getNext() throws StreamException {
Record result = nextResult;
loadNextRecord();
return result;
}
@Override
public RecordMetaData getMetaData()
{
return rmd;
}
@Override
public void close() { parent.close(); }
}