/**
* (C) Copyright IBM Corp. 2010, 2015
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package com.ibm.bi.dml.runtime.matrix.data;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import com.ibm.bi.dml.runtime.util.FastStringTokenizer;
public class TextToBinaryCellConverter
implements Converter<LongWritable, Text, MatrixIndexes, MatrixCell>
{
private MatrixIndexes indexes = new MatrixIndexes();
private MatrixCell value = new MatrixCell();
private Pair<MatrixIndexes, MatrixCell> pair = new Pair<MatrixIndexes, MatrixCell>(indexes, value);
private FastStringTokenizer st = new FastStringTokenizer(' ');
private boolean hasValue = false;
private boolean toIgnore = false;
@Override
public void convert(LongWritable k1, Text v1)
{
String str = v1.toString();
//handle support for matrix market format
if(str.startsWith("%")) {
if(str.startsWith("%%"))
toIgnore=true;
hasValue=false;
return;
}
else if(toIgnore) {
toIgnore=false;
hasValue=false;
return;
}
//reset the tokenizer
st.reset( str );
//convert text to matrix cell
indexes.setIndexes( st.nextLong(), st.nextLong() );
value.setValue( st.nextDouble() );
hasValue = true;
}
@Override
public boolean hasNext() {
return hasValue;
}
@Override
public Pair<MatrixIndexes, MatrixCell> next() {
if(!hasValue)
return null;
hasValue=false;
return pair;
}
@Override
public void setBlockSize(int rl, int cl)
{
}
}