FieldContentParser.java example

Explorer
Docear-master
package net.sf.jabref.imports;

import net.sf.jabref.Globals;
import net.sf.jabref.GUIGlobals;


/**
 * This class provides the reformatting needed when reading BibTeX fields formatted
 * in JabRef style. The reformatting must undo all formatting done by JabRef when
 * writing the same fields.
 */
public class FieldContentParser {

    /**
     * Performs the reformatting
     * @param content StringBuffer containing the field to format. key contains field name according to field
     *  was edited by Kuehn/Havalevich
     * @return The formatted field content. NOTE: the StringBuffer returned may
     * or may not be the same as the argument given.
     */
	public StringBuffer format(StringBuffer content, String key) {

        /*System.out.println("Content: '"+content+"'");
        byte[] bt = content.toString().getBytes();
        for (int i = 0; i < bt.length; i++) {
            byte b = bt[i];
            System.out.print(b+" ");
        }
        System.out.println("");
        */
        //boolean rep = false;

        int i=0;

        // Remove windows newlines and insert unix ones:
        // TODO: 2005.12.3: Added replace from \r to \n, to work around a reported problem of words stiched together.
        // But: we need to find out why these lone \r characters appear in his file.
        content = new StringBuffer(content.toString().replaceAll("\r\n","\n").replaceAll("\r", "\n"));

        while (i<content.length()) {

            int c = content.charAt(i);
            if (c == '\n') {
                if ((content.length()>i+1) && (content.charAt(i+1)=='\t')
                    && ((content.length()==i+2) || !Character.isWhitespace(content.charAt(i+2)))) {
                    // We have either \n\t followed by non-whitespace, or \n\t at the
                    // end. Bothe cases indicate a wrap made by JabRef. Remove and insert space if necessary.

                    content.deleteCharAt(i); // \n
                    content.deleteCharAt(i); // \t
                    // Add space only if necessary:
                    // Note 2007-05-26, mortenalver: the following line was modified. It previously
                    // didn't add a space if the line break was at i==0. This caused some occurences of
                    // "string1 # { and } # string2" constructs lose the space in front of the "and" because
                    // the line wrap caused a JabRef linke break at the start of a value containing the " and ".
                    // The bug was caused by a protective check for i>0 to avoid intexing char -1 in content.
                    if ((i==0) || !Character.isWhitespace(content.charAt(i-1))) {
                        content.insert(i, ' ');
                        // Increment i because of the inserted character:
                        i++;
                    }
                }
                else if ((content.length()>i+3) && (content.charAt(i+1)=='\t')
                    && (content.charAt(i+2)==' ')
                    && !Character.isWhitespace(content.charAt(i+3))) {
                    // We have \n\t followed by ' ' followed by non-whitespace, which indicates
                    // a wrap made by JabRef <= 1.7.1. Remove:
                    content.deleteCharAt(i); // \n
                    content.deleteCharAt(i); // \t
                    // Remove space only if necessary:
                    if ((i>0) && Character.isWhitespace(content.charAt(i-1))) {
                        content.deleteCharAt(i);
                    }
                }
                else if ((content.length()>i+3) && (content.charAt(i+1)=='\t')
                        && (content.charAt(i+2)=='\n') && (content.charAt(i+3)=='\t')) {
                    // We have \n\t\n\t, which looks like a JabRef-formatted empty line.
                    // Remove the tabs and keep one of the line breaks:
                    content.deleteCharAt(i+1); // \t
                    content.deleteCharAt(i+1); // \n
                    content.deleteCharAt(i+1); // \t
                    // Skip past the line breaks:
                    i++;

                    // Now, if more \n\t pairs are following, keep each line break. This
                    // preserves several line breaks properly. Repeat until done:
                    while ((content.length()>i+1) && (content.charAt(i)=='\n')
                        && (content.charAt(i+1)=='\t')) {

                        content.deleteCharAt(i+1);
                        i++;
                    }
                }
                else if ((content.length()>i+1) && (content.charAt(i+1)!='\n')) {
                    // We have a line break not followed by another line break. This is probably a normal
                    // line break made by whatever other editor, so we will remove the line break.
                    content.deleteCharAt(i);
                    // If the line break is not accompanied by other whitespace we must add a space:
                    if (!Character.isWhitespace(content.charAt(i)) &&  // No whitespace after?
                            (i>0) && !Character.isWhitespace(content.charAt(i-1))) // No whitespace before?
                        content.insert(i, ' ');
                }

                //else if ((content.length()>i+1) && (content.charAt(i+1)=='\n'))
                else
                    i++;
                //content.deleteCharAt(i);
            }
            else if (c == ' ') {
                //if ((content.length()>i+2) && (content.charAt(i+1)==' ')) {
                if ((i>0) && (content.charAt(i-1)==' ')) {
                    // We have two spaces in a row. Don't include this one.
                	
                	// Yes, of course we have, but in Filenames it is nessary to have all spaces. :-)
                	// This is the reason why the next lines are required
                	if(key != null && key.equals(GUIGlobals.FILE_FIELD)){
                		i++;
                	}
                	else
                		content.deleteCharAt(i);
                }
                else
                    i++;
            } else if (c == '\t')
                // Remove all tab characters that aren't associated with a line break.
                content.deleteCharAt(i);
            else
                i++;

        }
        
        return content;
	}

    /**
     * Performs the reformatting
     * @param content StringBuffer containing the field to format.
     * @return The formatted field content. NOTE: the StringBuffer returned may
     * or may not be the same as the argument given.
     */
    public StringBuffer format(StringBuffer content) { 
    	return format(content, null);
    }

    /**
     * Formats field contents for output. Must be "symmetric" with the parse method above,
     * so stored and reloaded fields are not mangled.
     * @param in
     * @param wrapAmount
     * @return the wrapped String.
     */
    public static String wrap(String in, int wrapAmount){
        
        String[] lines = in.split("\n");
        StringBuffer res = new StringBuffer();
        addWrappedLine(res, lines[0], wrapAmount);
        for (int i=1; i<lines.length; i++) {

            if (!lines[i].trim().equals("")) {
                res.append(Globals.NEWLINE);
                res.append('\t');
                res.append(Globals.NEWLINE);
                res.append('\t');
                addWrappedLine(res, lines[i], wrapAmount);
            } else {
                res.append(Globals.NEWLINE);
                res.append('\t');
            }
        }
        return res.toString();
    }

    private static void addWrappedLine(StringBuffer res, String line, int wrapAmount) {
        // Set our pointer to the beginning of the new line in the StringBuffer:
        int p = res.length();
        // Add the line, unmodified:
        res.append(line);

        while (p < res.length()){
            int q = res.indexOf(" ", p+wrapAmount);
            if ((q < 0) || (q >= res.length()))
                break;

            res.deleteCharAt(q);
            res.insert(q, Globals.NEWLINE+"\t");
            p = q+Globals.NEWLINE_LENGTH;

        }
    }

    static class Indents {
        //int hyp
    }
}