package com.formulasearchengine.mathosphere.mathpd.contracts; import com.formulasearchengine.mathosphere.mathpd.pojos.ExtractedMathPDDocument; import org.apache.flink.api.common.functions.FlatMapFunction; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.util.Collector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.Base64; /** * Created by felix on 13.01.17. */ public class PreprocessedExtractedMathPDDocumentMapper implements FlatMapFunction<String, Tuple2<String, ExtractedMathPDDocument>> { public static final Charset CHARSET = StandardCharsets.UTF_8; private static final Logger LOGGER = LoggerFactory.getLogger(PreprocessedExtractedMathPDDocumentMapper.class); public static ExtractedMathPDDocument readExtractedMathPDDocumentFromText(String text) { LOGGER.info("text = " + text); ByteArrayInputStream bis = new ByteArrayInputStream(Base64.getDecoder().decode(text)); ObjectInput in = null; try { in = new ObjectInputStream(bis); return (ExtractedMathPDDocument) in.readObject(); } catch (IOException | ClassNotFoundException e) { new RuntimeException(e); } finally { try { if (in != null) { in.close(); } } catch (IOException ex2) { // ignore close exception } } return null; } public static String getFormattedWritableText(ExtractedMathPDDocument doc) { final ByteArrayOutputStream bos = new ByteArrayOutputStream(); ObjectOutputStream oos = null; try { oos = new ObjectOutputStream(bos); oos.writeObject(doc); oos.flush(); return Base64.getEncoder().encodeToString(bos.toByteArray()); } catch (IOException ioe) { throw new RuntimeException(ioe); } finally { try { if (bos != null) bos.close(); if (oos != null) { oos.close(); ; } } catch (IOException oie2) { throw new RuntimeException(oie2); } } } @Override public void flatMap(String s, Collector<Tuple2<String, ExtractedMathPDDocument>> collector) throws Exception { ExtractedMathPDDocument doc = readExtractedMathPDDocumentFromText(s); collector.collect(new Tuple2<>(doc.getName(), doc)); } }