package edu.stanford.nlp.trees.ud; import java.util.*; /** * Utility functions for reading and writing CoNLL-U files. * * @author Sebastian Schuster */ public class CoNLLUUtils { /** * Parses the value of the feature column in a CoNLL-U file * and returns them in a HashMap with the feature names as keys * and the feature values as values. * * @param featureString * @return A HashMap<String,String> with the feature values. */ public static HashMap<String,String> parseFeatures(String featureString) { HashMap<String, String> features = new HashMap<>(); if (! featureString.equals("_")) { String[] featValPairs = featureString.split("\\|"); for (String p : featValPairs) { String[] featValPair = p.split("="); features.put(featValPair[0], featValPair[1]); } } return features; } /** * Converts a feature HashMap to a feature string to be used * in a CoNLL-U file. * * @return The feature string. */ public static String toFeatureString(HashMap<String,String> features) { StringBuffer sb = new StringBuffer(); boolean first = true; if (features != null) { List<String> sortedKeys = new ArrayList<>(features.keySet()); Collections.sort(sortedKeys, new FeatureNameComparator()); for (String key : sortedKeys) { if (!first) { sb.append("|"); } else { first = false; } sb.append(key) .append("=") .append(features.get(key)); } } /* Empty feature list. */ if (first) { sb.append("_"); } return sb.toString(); } /** * Parses the value of the extra dependencies column in a CoNLL-U file * and returns them in a HashMap with the governor indices as keys * and the relation names as values. * * @param extraDepsString * @return A HashMap<Integer,String> with the additional dependencies. */ public static HashMap<Integer,String> parseExtraDeps(String extraDepsString) { HashMap<Integer,String> extraDeps = new HashMap<>(); if ( ! extraDepsString.equals("_")) { String[] extraDepParts = extraDepsString.split("\\|"); for (String extraDepString : extraDepParts) { int sepPos = extraDepString.indexOf(":"); String reln = extraDepString.substring(sepPos + 1); Integer gov = Integer.parseInt(extraDepString.substring(0, sepPos)); extraDeps.put(gov, reln); } } return extraDeps; } /** * Converts an extra dependencies hash map to a string to be used * in a CoNLL-U file. * * @param extraDeps * @return The extra dependencies string. */ public static String toExtraDepsString(HashMap<Integer,String> extraDeps) { StringBuffer sb = new StringBuffer(); boolean first = true; if (extraDeps != null) { List<Integer> sortedKeys = new ArrayList<>(extraDeps.keySet()); Collections.sort(sortedKeys); for (Integer key : sortedKeys) { if (!first) { sb.append("|"); } else { first = false; } sb.append(key) .append(":") .append(extraDeps.get(key)); } } /* Empty feature list. */ if (first) { sb.append("_"); } return sb.toString(); } public static class FeatureNameComparator implements Comparator<String> { @Override public int compare(String featureName1, String featureName2) { return featureName1.toLowerCase().compareTo(featureName2.toLowerCase()); } } }