/** * Copyright 2014 National University of Ireland, Galway. * * This file is part of the SIREn project. Project and contact information: * * https://github.com/rdelbru/SIREn * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.sindice.siren.util; import java.util.HashMap; import java.util.Map; import java.util.TreeMap; public class IOUtils { /** * Flatten a list of triples to n-tuples containing many objects for the same * subject/predicate pair. Generate one n-tuple per subject/predicate pair. * <br> * This is useful for the document-centric indexing approach. The flatten * representation is more efficient in term of index size than the plain * n-triples approach. * * @param values The list of n-triples. * @return The n-tuples concatenated. */ public static String flattenNTriples(final String[] values) { final Map<String,StringBuilder> map = new HashMap<String, StringBuilder>(); return flattenNTriples(values, map); } /** * Sort and flatten a list of triples to n-tuples containing many objects for * the same subject/predicate pair. Generate one n-tuple per subject/predicate * pair. The tuples are ordered by subject/predicate. * <br> * This is useful for the document-centric indexing approach. The sorted and * flatten representation is generally more efficient in term of index size * than the normal flatten approach. * * @param values The list of n-triples. * @return The n-tuples concatenated. */ public static String sortAndFlattenNTriples(final String[] values) { final Map<String,StringBuilder> map = new TreeMap<String, StringBuilder>(); return flattenNTriples(values, map); } /** * Flatten a list of triples to n-tuples containing many objects for the same * subject/predicate pair. Generate one n-tuple per subject/predicate pair. * <br> * This is useful for the document-centric indexing approach. The flatten * representation is more efficient in term of index size than the plain * n-triples approach. * * @param values The list of n-triples. * @return The n-tuples concatenated. */ private static String flattenNTriples(final String[] values, final Map<String,StringBuilder> map) { for (final String value : values) { if (value != null) { final int firstWhitespace = value.indexOf(' '); final int secondWhitespace = value.indexOf(' ', firstWhitespace + 1); final int lastDot = value.lastIndexOf('.'); if (firstWhitespace == -1 || secondWhitespace == -1 || lastDot == -1) { continue; // probably invalid triple, just skip it } final String key = value.substring(0, secondWhitespace); final String object = value.substring(secondWhitespace, lastDot - 1); StringBuilder tb = map.get(key); if (tb == null) { tb = new StringBuilder(); tb.append(key); map.put(key, tb); } tb.append(object); } } final StringBuilder result = new StringBuilder(); for (final StringBuilder tb : map.values()) { result.append(tb).append('.').append("\n"); } return result.toString(); } }