package eu.dnetlib.iis.wf.documentssimilarity.converter; import eu.dnetlib.iis.documentssimilarity.schemas.DocumentSimilarity; import org.apache.avro.mapred.AvroKey; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.Mapper; import org.apache.log4j.Logger; import java.io.IOException; /** * @author Mateusz Fedoryszak (m.fedoryszak@icm.edu.pl) */ public class TsvToAvroMapper extends Mapper<Writable, Text, AvroKey<DocumentSimilarity>, NullWritable> { private static Logger log = Logger.getLogger(TsvToAvroMapper.class); @Override protected void map(Writable ignore, Text data, Context context) throws IOException, InterruptedException { try { String[] fields = data.toString().split("\\t"); DocumentSimilarity similarity = new DocumentSimilarity(); similarity.setDocumentId(fields[0]); similarity.setOtherDocumentId(fields[1]); similarity.setSimilarity(Float.parseFloat(fields[2])); context.write(new AvroKey<DocumentSimilarity>(similarity), NullWritable.get()); } catch (Exception e) { log.error(e); } } }