IOUtils.java example

Explorer
siren-master
/**
 * Copyright 2014 National University of Ireland, Galway.
 *
 * This file is part of the SIREn project. Project and contact information:
 *
 *  https://github.com/rdelbru/SIREn
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.sindice.siren.util;

import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;

public class IOUtils {

  /**
   * Flatten a list of triples to n-tuples containing many objects for the same
   * subject/predicate pair. Generate one n-tuple per subject/predicate pair.
   * <br>
   * This is useful for the document-centric indexing approach. The flatten
   * representation is more efficient in term of index size than the plain
   * n-triples approach.
   *
   * @param values The list of n-triples.
   * @return The n-tuples concatenated.
   */
  public static String flattenNTriples(final String[] values) {
    final Map<String,StringBuilder> map = new HashMap<String, StringBuilder>();
    return flattenNTriples(values, map);
  }

  /**
   * Sort and flatten a list of triples to n-tuples containing many objects for
   * the same subject/predicate pair. Generate one n-tuple per subject/predicate
   * pair. The tuples are ordered by subject/predicate.
   * <br>
   * This is useful for the document-centric indexing approach. The sorted and
   * flatten representation is generally more efficient in term of index size
   * than the normal flatten approach.
   *
   * @param values The list of n-triples.
   * @return The n-tuples concatenated.
   */
  public static String sortAndFlattenNTriples(final String[] values) {
    final Map<String,StringBuilder> map = new TreeMap<String, StringBuilder>();
    return flattenNTriples(values, map);
  }

  /**
   * Flatten a list of triples to n-tuples containing many objects for the same
   * subject/predicate pair. Generate one n-tuple per subject/predicate pair.
   * <br>
   * This is useful for the document-centric indexing approach. The flatten
   * representation is more efficient in term of index size than the plain
   * n-triples approach.
   *
   * @param values The list of n-triples.
   * @return The n-tuples concatenated.
   */
  private static String flattenNTriples(final String[] values, final Map<String,StringBuilder> map) {
    for (final String value : values) {
      if (value != null) {
        final int firstWhitespace = value.indexOf(' ');
        final int secondWhitespace = value.indexOf(' ', firstWhitespace + 1);
        final int lastDot = value.lastIndexOf('.');
        if (firstWhitespace == -1 || secondWhitespace == -1 || lastDot == -1) {
          continue; // probably invalid triple, just skip it
        }
        final String key = value.substring(0, secondWhitespace);
        final String object = value.substring(secondWhitespace, lastDot - 1);
        StringBuilder tb = map.get(key);
        if (tb == null) {
          tb = new StringBuilder();
          tb.append(key);
          map.put(key, tb);
        }
        tb.append(object);
      }
    }
    final StringBuilder result = new StringBuilder();
    for (final StringBuilder tb : map.values()) {
      result.append(tb).append('.').append("\n");
    }
    return result.toString();
  }

}