package com.matrobot.gha.insights.filter; import java.io.IOException; import com.matrobot.gha.archive.repo.RepositoryRecord; import com.matrobot.gha.insights.ml.Dataset; import com.matrobot.gha.insights.ml.Sample; public class RegressionRepositoryFilter { private static final int MIN_ACTIVITY = 5; private RepositoryArchiveList datasets = new RepositoryArchiveList(); public RegressionRepositoryFilter(String firstPath, String secondPath, String thirdPath) throws IOException{ datasets.addFromFile(firstPath); datasets.addFromFile(secondPath); datasets.addFromFile(thirdPath); } /** * Training data for single feature vector * @return */ public Dataset getDataset() { Dataset dataset = new Dataset(2); for(RepositoryRecord record : datasets.getDataset(1).values()){ RepositoryRecord nextRecord = datasets.findRepository(2, record.repoName); RepositoryRecord prevRecord = datasets.findRepository(0, record.repoName); if(record.pushEventCount > MIN_ACTIVITY && nextRecord.pushEventCount > MIN_ACTIVITY){ Sample sample = createSample(record, nextRecord, prevRecord); dataset.addSample(sample); } } return dataset; } private Sample createSample(RepositoryRecord record, RepositoryRecord nextRecord, RepositoryRecord prevRecord) { Sample sample = new Sample(2); sample.name = record.repoName; sample.features[0] = record.pushEventCount; sample.features[1] = prevRecord.pushEventCount; sample.output = nextRecord.pushEventCount; return sample; } }