package moviescraper.doctord.controller.releaserenamer; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.List; import java.util.regex.Pattern; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVRecord; import org.apache.commons.lang3.text.WordUtils; public class WebReleaseRenamer extends ReleaseRenamer { private List<CSVRecord> removeTheseWords; private List<CSVRecord> replaceFirstInstanceOfTheseWords; public WebReleaseRenamer() throws IOException { removeTheseWords = readWordsToRemoveFromCSV(); replaceFirstInstanceOfTheseWords = readSiteNamesToReplaceFromCSV(); } @Override public String getCleanName(String filename) { String cleanFileName = filename.toLowerCase(); cleanFileName = replaceSeperatorsWithSpaces(cleanFileName); /* * remove things from the filename which are usually not part of the * scene / movie name such as par2, xvid, divx, etc */ cleanFileName += " "; //add a space at the end so we our regex works in the next step for the last word for(CSVRecord wordsToRemove : removeTheseWords) { //putting spaces in front of it so we only get an actual word, not parts of a word String wordToRemove = wordsToRemove.get(0).toLowerCase(); cleanFileName = cleanFileName.replaceFirst("\\b"+wordToRemove+"\\b", ""); } cleanFileName = cleanFileName.trim(); /* * often times files are released with abbreviations in their name which * messes up doing google searches on them, so we'll do a substitution to get the full name */ boolean doneReplacingabbreviation = false; for(CSVRecord siteNameReplacement : replaceFirstInstanceOfTheseWords) { /* * Our format in this file is that the first word on each line is the full name * of the abbreviation and each subsequent comma seperated entry on the line * is an abbreviation */ String fullSiteName = siteNameReplacement.get(0); //WebReleaseRenamer.System.out.println("FullSiteName = " + fullSiteName); for(String abbreviation : siteNameReplacement) { abbreviation = abbreviation.replace("\"",""); //System.out.println("abbreviation = " + abbreviation.trim().toLowerCase()); if(cleanFileName.startsWith(abbreviation.trim().toLowerCase() + " ") && abbreviation.trim().length() > 0) { //System.out.println("Found match = " + abbreviation.trim().toLowerCase()); cleanFileName = cleanFileName.replaceFirst(Pattern.quote(abbreviation.trim().toLowerCase() + " "), fullSiteName + " "); doneReplacingabbreviation = true; break; //just assume we want to only replace one abbreviaton } //System.out.println("CFN: " + cleanFileName); } if(doneReplacingabbreviation) break; //System.out.println(siteNameReplacement); } //Fix up the case and trim it - not needed for search but it just looks better :) cleanFileName = WordUtils.capitalize(cleanFileName).trim(); return cleanFileName; } public List<CSVRecord> readWordsToRemoveFromCSV() throws IOException { return readFromCSVFile("/moviescraper/doctord/controller/releaserenamer/WordsToRemove.csv"); } public List<CSVRecord> readSiteNamesToReplaceFromCSV() throws IOException{ return readFromCSVFile("/moviescraper/doctord/controller/releaserenamer/SiteNameAbbreviations.csv"); } public List<CSVRecord> readFromCSVFile(String filePath) throws IOException { CSVFormat format = CSVFormat.RFC4180.withDelimiter(',').withCommentMarker('#'); try (InputStream inputStream = getClass().getResourceAsStream(filePath); CSVParser parser = new CSVParser(new InputStreamReader(inputStream), format);) { List<CSVRecord> csvRecords = parser.getRecords(); return csvRecords; } } }