package eu.dnetlib.iis.wf.importer.infospace.converter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import com.google.common.base.Preconditions;
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
import eu.dnetlib.data.proto.OafProtos.OafEntity;
import eu.dnetlib.iis.importer.schemas.Project;
/**
* {@link OafEntity} containing project details to {@link Project} converter.
*
* @author mhorst
*
*/
public class ProjectConverter implements OafEntityToAvroConverter<Project> {
protected static final Logger log = Logger.getLogger(ProjectConverter.class);
public static final String BLANK_JSONEXTRAINFO = "{}";
private static final Set<String> ACRONYM_SKIP_LOWERCASED_VALUES = new HashSet<String>(
Arrays.asList("undefined", "unknown"));
private final FundingTreeParser fundingTreeParser = new FundingTreeParser();
// ------------------------ LOGIC --------------------------
@Override
public Project convert(OafEntity oafEntity) throws IOException {
Preconditions.checkNotNull(oafEntity);
if (oafEntity.hasProject()) {
eu.dnetlib.data.proto.ProjectProtos.Project sourceProject = oafEntity.getProject();
if (sourceProject.hasMetadata()) {
Project.Builder builder = Project.newBuilder();
builder.setId(oafEntity.getId());
StringField acronym = sourceProject.getMetadata().getAcronym();
if (isAcronymValid(acronym)) {
builder.setProjectAcronym(acronym.getValue());
}
String projectGrantId = sourceProject.getMetadata().getCode().getValue();
if (StringUtils.isNotBlank(projectGrantId)) {
builder.setProjectGrantId(projectGrantId);
}
String jsonExtraInfo = sourceProject.getMetadata().getJsonextrainfo().getValue();
if (StringUtils.isNotBlank(jsonExtraInfo)) {
builder.setJsonextrainfo(jsonExtraInfo);
} else {
builder.setJsonextrainfo(BLANK_JSONEXTRAINFO);
}
String extractedFundingClass = fundingTreeParser.extractFundingClass(
extractStringValues(sourceProject.getMetadata().getFundingtreeList()));
if (StringUtils.isNotBlank(extractedFundingClass)) {
builder.setFundingClass(extractedFundingClass);
}
return isDataValid(builder)?builder.build():null;
} else {
log.error("skipping: no metadata for project " + oafEntity.getId());
return null;
}
} else {
log.error("skipping: no project for entity " + oafEntity.getId());
return null;
}
}
/**
* Verifies whether acronym should be considered as valid.
* @return true if valid, false otherwise
*/
public static boolean isAcronymValid(String acronym) {
return StringUtils.isNotBlank(acronym)
&& !ACRONYM_SKIP_LOWERCASED_VALUES.contains(acronym.trim().toLowerCase());
}
/**
* Checks whether Project builder has all required fields set.
* @return true when all required fields set
*/
public boolean isDataValid(Project.Builder builder) {
return builder.hasFundingClass() || builder.hasProjectAcronym() || builder.hasProjectGrantId();
}
// ------------------------ PRIVATE --------------------------
/**
* Extracts string values from {@link StringField} list.
*/
private static List<String> extractStringValues(List<StringField> source) {
if (CollectionUtils.isNotEmpty(source)) {
List<String> results = new ArrayList<String>(source.size());
for (StringField currentField : source) {
results.add(currentField.getValue());
}
return results;
} else {
return Collections.emptyList();
}
}
/**
* Verifies whether acronym should be considered as valid.
* @return true if valid, false otherwise
*/
private static boolean isAcronymValid(StringField acronym) {
return isAcronymValid(acronym.getValue());
}
}