package nl.helixsoft.stats;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import nl.helixsoft.recordstream.DefaultRecord;
import nl.helixsoft.recordstream.DefaultRecordMetaData;
import nl.helixsoft.recordstream.MemoryRecordStream;
import nl.helixsoft.recordstream.Record;
import nl.helixsoft.recordstream.RecordMetaData;
import nl.helixsoft.recordstream.RecordStream;
import nl.helixsoft.stats.impl.AbstractDataFrame;
/**
* Simple implementation of DataFrame, not very optimized.
*
* //TODO cut and merge implementation would be much faster with col-based rather than row-based storage...
* //TODO: rename to something more explicit like e.g. rowBoundDataFrame
*/
public class DefaultDataFrame extends AbstractDataFrame
{
private List<Record> records;
private RecordMetaData rmd;
private Header header;
/**
* static Constructor-like to create a DataFrame by sucking all records from a RecordStream.
*/
public static DataFrame createFromRecordStream (RecordStream input)
{
DefaultDataFrame df = new DefaultDataFrame();
df.rmd = input.getMetaData();
df.records = new ArrayList<Record>();
List<String> header = new ArrayList<String>();
for (int i = 0; i < df.rmd.getNumCols(); ++i)
{
header.add (df.rmd.getColumnName(i));
}
df.header = new DefaultHeader(header);
input.into(df.records);
return df;
}
/**
* Creates an empty DataFrame with given header.
* @deprecated use DataFrameOperation.createWithHeader instead
*/
public static DataFrame createWithHeader (String... header)
{
DefaultDataFrame df = new DefaultDataFrame();
df.rmd = new DefaultRecordMetaData(header);
df.records = new ArrayList<Record>();
df.header = new DefaultHeader (Arrays.asList(header));
return df;
}
/** @inheritDocs */
@Override
public DataFrame select(int... rowIdx)
{
DefaultDataFrame result = new DefaultDataFrame();
result.records = new ArrayList<Record>();
result.header = header;
result.rmd = rmd;
for (int i = 0; i < rowIdx.length; ++i)
{
result.records.add(records.get(rowIdx[i]));
}
result.rmd = rmd;
return result;
}
/** @inheritDocs */
@Override
public DataFrame select(List<Integer> rowIndexes)
{
DefaultDataFrame result = new DefaultDataFrame();
result.records = new ArrayList<Record>();
result.header = header;
result.rmd = rmd;
for (Integer i : rowIndexes)
{
if (i == null)
result.records.add(new DefaultRecord(result.rmd, new Object[header.size()]));
else
result.records.add(records.get(i));
}
result.rmd = rmd;
return result;
}
@Override
public DataFrame cut(String... columnName)
{
int[] colIdx = new int[columnName.length];
for (int i = 0; i < columnName.length; ++i)
{
colIdx[i] = rmd.getColumnIndex(columnName[i]);
}
return cut(colIdx);
}
/** @inheritDocs */
@Override
public DataFrame cut(int... columnIdx)
{
String[] colNames = new String[columnIdx.length];
for (int i = 0; i < columnIdx.length; ++i)
{
colNames[i] = rmd.getColumnName(columnIdx[i]);
}
RecordMetaData newRmd = new DefaultRecordMetaData(colNames);
List<Record> newRecords = new ArrayList<Record>();
for (Record r : records)
{
Object[] fields = new Object[columnIdx.length];
for (int i = 0; i < columnIdx.length; ++i)
{
fields[i] = r.get(columnIdx[i]);
}
Record newR = new DefaultRecord(newRmd, fields);
newRecords.add (newR);
}
DefaultDataFrame newDataFrame = new DefaultDataFrame();
newDataFrame.rmd = newRmd;
newDataFrame.records = newRecords;
newDataFrame.header = new DefaultHeader(Arrays.asList(colNames));
return newDataFrame;
}
/** @inheritDocs */
@Override
public DataFrame merge(DataFrame _other, int onColumn, int onOtherColumn)
{
DefaultDataFrame other;
if (_other instanceof DefaultDataFrame)
{
other = (DefaultDataFrame)_other;
}
else
{
//TODO - this really should be implemented for all possible values, but currently not enough methods exposed in DataFrame interface.
throw new UnsupportedOperationException("Not implemented yet. Only supported if other is a DefaultDataFrame.");
}
int newColNum = rmd.getNumCols() + other.rmd.getNumCols() - 1;
String[] colNames = new String[newColNum];
// first column will be join column.
colNames[0] = rmd.getColumnName(onColumn);
int pos = 1;
for (int i = 0; i < rmd.getNumCols(); ++i)
{
if (i == onColumn) continue;
colNames[pos++] = rmd.getColumnName(i);
}
for (int i = 0; i < other.rmd.getNumCols(); ++i)
{
if (i == onOtherColumn) continue;
colNames[pos++] = other.rmd.getColumnName(i);
}
RecordMetaData newRmd = new DefaultRecordMetaData(colNames);
List<Record> newRecords = new ArrayList<Record>();
Map<Object, Record> otherIndex = new HashMap<Object, Record>();
Map<Object, Record> index = new HashMap<Object, Record>();
Set<Object> allKeys = new HashSet<Object>();
for (Record r : records)
{
String key = r.get(onColumn).toString();
allKeys.add (key);
index.put (key, r);
}
for (Record r : other.records)
{
Object key = r.get(onOtherColumn);
allKeys.add (key);
otherIndex.put (key, r);
}
for (Object key : allKeys)
{
Object[] fields = new Object[newColNum];
fields[0] = key;
int fpos = 1;
Record r = index.get(key);
for (int i = 0; i < rmd.getNumCols(); ++i)
{
if (i == onColumn) continue;
fields[fpos++] = r == null ? null : r.get(i);
}
Record otherR = otherIndex.get(key);
for (int i = 0; i < other.rmd.getNumCols(); ++i)
{
if (i == onOtherColumn) continue;
fields[fpos++] = otherR == null ? null : otherR.get(i);
}
newRecords.add (new DefaultRecord(newRmd, fields));
}
DefaultDataFrame results = new DefaultDataFrame();
results.records = newRecords;
results.rmd = newRmd;
results.header = new DefaultHeader(Arrays.asList(colNames));
return results;
}
/** @inheritDocs */
@Override
public List<String> getColumnNames()
{
List<String> result = new ArrayList<String>();
for (int i = 0; i < rmd.getNumCols(); ++i)
{
result.add (rmd.getColumnName(i));
}
return result;
}
@Override
public int getColumnIndex(String columnName)
{
return rmd.getColumnIndex(columnName);
}
@Override
public RecordStream asRecordStream()
{
return new MemoryRecordStream (records);
}
@Override
public int getRowCount() {
return records.size();
}
@Override
public int getColumnCount()
{
return rmd.getNumCols();
}
@Override
public String getColumnName(int columnIndex)
{
return rmd.getColumnName(columnIndex);
}
@Override
public Object getColumnHeader(int colIx)
{
return header.getColumnName(colIx);
}
@Override
public Header getColumnHeader()
{
return header;
}
@Override
public Object getValueAt(int rowIndex, int columnIndex)
{
return records.get(rowIndex).get(columnIndex);
}
@Override
public void setValueAt(Object aValue, int rowIndex, int columnIndex) {
records.get(rowIndex).set(columnIndex, aValue);
}
@Override
public <T> DataFrame cbind(List<T> column)
{
if (column.size() != records.size()) throw new IllegalArgumentException ("DataFrame has " + records.size() + " rows but trying to add column of size " + column.size());
int newColNum = rmd.getNumCols() + 1;
String[] colNames = new String[newColNum];
int pos = 0;
for (int i = 0; i < rmd.getNumCols(); ++i)
{
colNames[pos++] = rmd.getColumnName(i);
}
colNames[pos++] = "cbind_" + pos;
RecordMetaData newRmd = new DefaultRecordMetaData(colNames);
List<Record> newRecords = new ArrayList<Record>();
for (int row = 0; row < records.size(); ++ row)
{
Record r = records.get(row);
Object[] fields = new Object[newColNum];
int fpos = 0;
for (int i = 0; i < rmd.getNumCols(); ++i)
{
fields[fpos++] = r == null ? null : r.get(i);
}
fields[fpos++] = column.get(row);
Record newRecord = new DefaultRecord (newRmd, fields);
newRecords.add (newRecord);
}
DefaultDataFrame result = new DefaultDataFrame();
result.records = newRecords;
result.rmd = newRmd;
return result;
}
@Override
public Iterable<Record> asRecordIterable()
{
return records;
}
@Override
public Record getRow(int rowIdx)
{
return records.get(rowIdx);
}
@Override
public RecordMetaData getMetaData()
{
return rmd;
}
@Override
public DataFrame rbind(Object... row)
{
if (row.length != rmd.getNumCols()) throw new IllegalArgumentException ("DataFrame has " + rmd.getNumCols() + " columns but trying to add row of size " + row.length);
Record r = new DefaultRecord(rmd, row);
records.add(r);
return this;
}
@Override
public List<String> getRowNames() {
// TODO Auto-generated method stub
return null;
}
@Override
public String getRowName(int rowIx) {
// TODO Auto-generated method stub
return null;
}
@Override
public DataFrame setColumnHeader(int colIx, String value)
{
header.set(colIx, value);
rmd.setColumnName(colIx, value);
return this;
}
}