package eu.dnetlib.iis.wf.documentssimilarity.producer;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.google.common.collect.Lists;
import de.svenjacobs.loremipsum.LoremIpsum;
import eu.dnetlib.iis.common.java.PortBindings;
import eu.dnetlib.iis.common.java.Process;
import eu.dnetlib.iis.common.java.io.DataStore;
import eu.dnetlib.iis.common.java.io.FileSystemPath;
import eu.dnetlib.iis.common.java.porttype.AvroPortType;
import eu.dnetlib.iis.common.java.porttype.PortType;
import eu.dnetlib.iis.documentssimilarity.schemas.DocumentMetadata;
import eu.dnetlib.iis.importer.schemas.Person;
/**
* Produce data stores
*
* @author Mateusz Fedoryszak
*/
public class DocumentAvroDatastoreProducer implements Process {
private final static String documentPort = "document";
public Map<String, PortType> getInputPorts() {
return new HashMap<String, PortType>();
}
@Override
public Map<String, PortType> getOutputPorts() {
return createOutputPorts();
}
private static Map<String, PortType> createOutputPorts() {
HashMap<String, PortType> outputPorts =
new HashMap<String, PortType>();
outputPorts.put(documentPort,
new AvroPortType(DocumentMetadata.SCHEMA$));
return outputPorts;
}
public static Person createPerson(String id, String name) {
return new Person(id, null, null, name);
}
public static List<DocumentMetadata> getDocumentMetadataList() {
DocumentMetadata doc1 = new DocumentMetadata();
doc1.setId("1");
doc1.setTitle("A new method of something");
doc1.setAbstract$("We present a new method of doing something. We are not sure yet what " +
"it is actually doing, but it definitely is a worthwhile technique.");
doc1.setKeywords(Lists.<CharSequence>newArrayList("method", "something", "nothing", "anything"));
doc1.setAuthors(Lists.<Person>newArrayList(createPerson("1", "Jan Kowalski")));
DocumentMetadata doc2 = new DocumentMetadata();
doc2.setId("2");
doc2.setTitle("How to do it?");
doc2.setAbstract$("We are asking some of fundamental engineering questions here. As all " +
"kinds of fundamental questions, they probably have no answers.");
doc2.setKeywords(Lists.<CharSequence>newArrayList(
"doing things", "questioning", "falsificationism", "epistemology"));
doc2.setAuthors(Lists.<Person>newArrayList(createPerson("1", "Jan Kowalski"), createPerson("2", "Zygmunt Nowak")));
DocumentMetadata doc3 = new DocumentMetadata();
doc3.setId("3");
doc3.setTitle("Our great tool");
doc3.setAbstract$("In this paper we present Our great tool that is capable of doing " +
"anything. First theoretical studies have shown its great potential. Practical " +
"applications are to be investigated in the future.");
doc3.setKeywords(Lists.<CharSequence>newArrayList(
"Our great tool", "perpetuum mobile", "stop problem", "P==NP?"));
doc3.setAuthors(Lists.<Person>newArrayList(createPerson("2", "Zygmunt Nowak")));
DocumentMetadata doc4 = new DocumentMetadata();
doc4.setId("4");
doc4.setTitle("Big and great system");
doc4.setAbstract$("Worldwide amount of data is growing every year. That is why ever " +
"bigger and greater systems needs to be built. In this paper we present our biggest " +
"and greatest system so far.");
doc4.setKeywords(Lists.<CharSequence>newArrayList(
"big", "enormous", "great", "grand"));
doc4.setAuthors(Lists.<Person>newArrayList(createPerson("2", "Zygmunt Nowak"), createPerson("1", "Jan Kowalski")));
List<DocumentMetadata> results = Lists.newArrayList(doc1, doc2, doc3, doc4);
// adding dummy records up to 10 in total
LoremIpsum loremIpsum = new LoremIpsum();
Random rand = new Random();
for(int i = 5; i<=200; i++) {
DocumentMetadata doc = new DocumentMetadata();
doc.setId(Integer.toString(i));
doc.setTitle(loremIpsum.getWords(10, rand.nextInt(50)));
doc.setAbstract$(loremIpsum.getWords(50, rand.nextInt(50)));
results.add(doc);
}
return results;
}
@Override
public void run(PortBindings portBindings, Configuration conf,
Map<String, String> parameters) throws IOException {
Map<String, Path> output = portBindings.getOutput();
FileSystem fs = FileSystem.get(conf);
DataStore.create(getDocumentMetadataList(),
new FileSystemPath(fs, output.get(documentPort)));
}
}