CSVHelper.java example

Explorer
ANNIS-master
/*
 * Copyright 2009-2011 Collaborative Research Centre SFB 632 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package annis;

import annis.dao.objects.AnnotatedMatch;
import annis.dao.objects.AnnotatedSpan;
import annis.model.Annotation;
import au.com.bytecode.opencsv.CSVWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 *
 * @author thomas
 */
public class CSVHelper
{
  
  private static final Logger log = LoggerFactory.getLogger(CSVHelper.class);
  
  public static SortedMap<Integer, SortedSet<String>> exportCSVHeader(
    Iterator<AnnotatedMatch> matches, PrintWriter w)
  {
    // figure out what annotations are used at each match position
    SortedMap<Integer, SortedSet<String>> columnsByNodePos = 
      new TreeMap<>();
    while(matches.hasNext())
    {
      AnnotatedMatch match = matches.next();
      for(int j = 0; j < match.size(); ++j)
      {
        AnnotatedSpan span = match.get(j);
        if(columnsByNodePos.get(j) == null)
        {
          columnsByNodePos.put(j, new TreeSet<String>());
        }
        for(Annotation annotation : span.getAnnotations())
        {
          columnsByNodePos.get(j).add("anno_" + annotation.getQualifiedName());
        }

        for(Annotation meta : span.getMetadata())
        {
          columnsByNodePos.get(j).add("meta_" + meta.getQualifiedName());
        }

      }
    }
    
    CSVWriter csvWriter = new CSVWriter(w, '\t', CSVWriter.NO_QUOTE_CHARACTER, '\\');
    // print column names and data types
    int count = columnsByNodePos.keySet().size();
    ArrayList<String> headerLine = new ArrayList<>();
    
    for(int j = 0; j < count; ++j)
    {
      headerLine.add(fullColumnName(j + 1, "id"));
      headerLine.add(fullColumnName(j + 1, "span"));
      
      SortedSet<String> annotationNames = columnsByNodePos.get(j);
      for(String name : annotationNames)
      {
        headerLine.add(fullColumnName(j + 1, name));
      }
    }
    csvWriter.writeNext(headerLine.toArray(new String[headerLine.size()]));
        
    return columnsByNodePos;
  }
  
  public static void exportCSVData(Iterator<AnnotatedMatch> matches,
    SortedMap<Integer, SortedSet<String>> columnsByNodePos, PrintWriter w)
  {
    int count = columnsByNodePos.keySet().size();
      
    // print values
    while(matches.hasNext())
    {
      AnnotatedMatch match = matches.next();
      
      List<String> line = new ArrayList<>();
      int k = 0;
      for(; k < match.size(); ++k)
      {
        AnnotatedSpan span = match.get(k);
        Map<String, String> valueByName = new HashMap<>();

        if(span != null)
        {
          if(span.getAnnotations() != null)
          {
            for(Annotation annotation : span.getAnnotations())
            {
              valueByName.put("anno_" + annotation.getQualifiedName(), annotation.getValue());
            }
          }
          if(span.getMetadata() != null)
          {
            for(Annotation meta : span.getMetadata())
            {
              valueByName.put("meta_" + meta.getQualifiedName(), meta.getValue());
            }
          }

          line.add("" + span.getId());
          line.add(span.getCoveredText().replace("\t", "\\t"));
        }

        for(String name : columnsByNodePos.get(k))
        {
          if(valueByName.containsKey(name))
          {
            line.add(valueByName.get(name).replace("\t", "\\t"));
          }
          else
          {
            line.add("'NULL'");
          }
        }
      }
      for(int l = k; l < count; ++l)
      {
        line.add("'NULL'");
        for(int m = 0; m <= columnsByNodePos.get(l).size(); ++m)
        {
          line.add("'NULL'");
        }
      }
      w.append(StringUtils.join(line, "\t"));
      w.append("\n");
    }
  }
  
 

  private static String fullColumnName(int i, String name)
  {
    return i + "_" + name;
  }
}