package quickml.utlities; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.joda.time.DateTime; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; import quickml.data.AttributesMap; import quickml.data.instances.ClassifierInstance; import quickml.supervised.crossValidation.utils.DateTimeExtractor; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.Arrays; import java.util.List; import java.util.Map; /** * Created by alexanderhawk on 5/16/17. */ public class LibSVMFormatReader { public List<ClassifierInstance> readLibSVMFormattedInstances(String path, String dateAttribute) { List<ClassifierInstance> instances = Lists.newArrayList(); try (BufferedReader br = new BufferedReader(new FileReader(path))) { for (String line; (line = br.readLine()) != null; ) { List<String> rawInstance = Arrays.asList(line.split(" ")); Double label = Double.valueOf(rawInstance.get(0)); AttributesMap map = AttributesMap.newHashMap(); DateTime instanceTimeStamp = null; for (String rawAttributeAndValue : rawInstance.subList(1, rawInstance.size())) { String[] attributeAndValue = rawAttributeAndValue.split(":"); String attribute = attributeAndValue[0]; String value = attributeAndValue[1]; if (attribute.equals(dateAttribute)) { DateTimeFormatter dateTimeFormatter = DateTimeFormat.forPattern("yyyy-MM-dd'T'HH:mm:ss.SSS"); //format of T may be wrong instanceTimeStamp = new DateTime(dateTimeFormatter.parseMillis((String) value)); } else { try { //add numeric variable as Double map.put(attribute, Double.parseDouble(value)); } catch (NumberFormatException e) { //add categorical variable as String map.put(attribute, value); } } } instances.add(new ClassifierInstance(map, label, instanceTimeStamp)); } } catch (IOException e) { throw new RuntimeException(e.getMessage()); } return instances; } }