package pl.edu.icm.saos.importer.commoncourt.judgment.process;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.springframework.stereotype.Service;
import com.google.common.base.Preconditions;
/**
* @author Łukasz Dumiszewski
*/
@Service("lawJournalEntryExtractor")
class LawJournalEntryExtractor {
private static final String TITLE_YEAR_SEPARATOR = "###YEAR###";
/**
* Extracts law journal entry data from string of a form like this: <br/>
* Ustawa z dnia 29 sierpnia 1997 r. o usługach turystycznych (Dz. U. z 1997 r. Nr 133, poz. 884 - art. 11 a; art. 11 a ust. 1; art. 14; art. 14 ust. 6; art. 14 ust. 7)
*
* @returns null if the year or number or entry of the journal cannot be found in the given string
*/
public LawJournalEntryData extractLawJournalEntry(String lawJournalEntryString) {
Preconditions.checkNotNull(lawJournalEntryString);
String title = extractTitle(lawJournalEntryString);
if (title == null) {
return null;
}
String yearNumberEntryPart = extractYearNumberEntryPart(lawJournalEntryString);
if (StringUtils.isBlank(yearNumberEntryPart)) {
return null;
}
Integer year = extractYear(yearNumberEntryPart);
Integer number = extractNumber(yearNumberEntryPart);
Integer entry = extractEntry(yearNumberEntryPart);
if (year == null || number == null || entry == null) {
return null;
}
return new LawJournalEntryData(year, number, entry, title);
}
//------------------------ PRIVATE --------------------------
private Integer extractYear(String yearNumberEntryPart) {
Pattern p = Pattern.compile("^([1|2][0-9]{3}\\s*r)");
Matcher m = p.matcher(yearNumberEntryPart);
return findNumber(m);
}
private Integer extractNumber(String yearNumberEntryPart) {
Pattern p = Pattern.compile("[N|n][R|r]\\s*[0-9]+");
Matcher m = p.matcher(yearNumberEntryPart);
return findNumber(m);
}
private Integer extractEntry(String yearNumberEntryPart) {
Pattern p = Pattern.compile("[P|p][O|o][Z|z]\\s*\\.*\\s*[0-9]+");
Matcher m = p.matcher(yearNumberEntryPart);
return findNumber(m);
}
private Integer findNumber(Matcher m) {
if (m.find()) {
String number = m.group().replaceAll("\\D", "");
return NumberUtils.toInt(number);
}
return null;
}
private String extractTitle(String entry) {
return extractPart(entry, 0);
}
private String extractYearNumberEntryPart(String entry) {
return extractPart(entry, 1);
}
private static String extractPart(String entry, int partNo) {
String[] titleYearParts = normalizeTitleYearSeparator(entry).split(TITLE_YEAR_SEPARATOR);
if (titleYearParts.length != 2) {
return null;
}
return StringUtils.trim(titleYearParts[partNo]);
}
private static String normalizeTitleYearSeparator(String entry) {
String changedEntry = entry.replaceAll("\\s*\\(\\s*[D|d][z|Z]\\.\\s*[u|U]\\.\\s*z\\s*", TITLE_YEAR_SEPARATOR);
return changedEntry;
}
}