package ecologylab.bigsemantics.metametadata.fieldparsers; import java.util.HashMap; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; public class FieldParserForBibTeX extends FieldParser { public static final String entryTypeTagName = "@type"; public static final String entryIdTagName = "@key"; static Pattern pEntry = Pattern.compile( "@(\\w+)\\s*\\{(.*)\\}"); static Pattern pString = Pattern.compile( "\"([^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\""); static Map<String, String> emptyMap = new HashMap<String, String>(); @Override public Map<String, String> getKeyValuePairResult(FieldParserElement parserElement, String input) { if (input == null) return emptyMap; return parseBibtexString(input.replaceAll("\\s+", " ").trim()); } private Map<String, String> parseBibtexString(String input) { Map<String, String> rst = new HashMap<String, String>(); Matcher matcher = pEntry.matcher(input); if (matcher.matches() && matcher.groupCount() == 2) { String entryType = matcher.group(1); rst.put(entryTypeTagName, entryType); String contents = matcher.group(2); String[] tags = contents.split(","); if (tags.length >= 1) { String entryId = tags[0].trim(); rst.put(entryIdTagName, entryId); for (int i = 1; i < tags.length; ++i) { String tag = tags[i]; String[] parts = tag.split("="); if (parts.length == 2) { String tagName = parts[0].trim(); String tagValue0 = parts[1].trim(); if (tagValue0.startsWith("\"")) { // detect string concatenation StringBuilder sb = new StringBuilder(); Matcher m = pString.matcher(tagValue0); while (m.find()) { sb.append(m.group(1)); } String tagValue = sb.toString(); rst.put(tagName, tagValue); } else if (tagValue0.startsWith("{")) { // escape from curly braces int len = tagValue0.length(); String tagValue = tagValue0.substring(1, len - 1); rst.put(tagName, tagValue); } else { // treat it as it is rst.put(tagName, tagValue0); } } } } } return rst; } }