/*
* Copyright 2008
* Richard Eckart de Castilho
* Institut für Sprach- und Literaturwissenschaft
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.api.transform.alignment;
import static org.junit.Assert.assertEquals;
import org.junit.After;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import de.tudarmstadt.ukp.dkpro.core.api.transform.alignment.AlignedString;
import de.tudarmstadt.ukp.dkpro.core.api.transform.alignment.ImmutableInterval;
import de.tudarmstadt.ukp.dkpro.core.api.transform.alignment.Interval;
public
class AlignedStringTest
{
private String baseString;
private AlignedString bottom;
private AlignedString top;
@Before
public
void setUp()
throws Exception
{
// 11
// 012345678901
baseString = "I am a test.";
bottom = new AlignedString(baseString);
top = new AlignedString(bottom);
System.out.println("-------------------------------------------");
}
@After
public
void after()
{
System.out.println(" 1 | 2 | 3 | 4 | 5");
System.out.println(" 012345678901234567890123456789012345678901234567890");
System.out.println("Base : "+baseString);
System.out.println("Bottom : "+bottom.get()+" - "+bottom.dataSegmentsToString());
System.out.println("Top : "+top.get()+" - "+top.dataSegmentsToString());
}
@Test
public
void testGet()
{
assertEquals(baseString, top.get());
}
@Test
public
void testInsert()
{
final String insertString = "such ";
final int insertPos = 2;
bottom.insert(insertPos, insertString);
final StringBuilder sb = new StringBuilder(baseString);
sb.insert(insertPos, insertString);
assertEquals(sb.toString(), bottom.get());
assertEquals(sb.toString(), top.get());
}
@Test
public
void testInsert2()
{
// 0123456789012345678901234567890
baseString = "This is a hyphen- ated sentence";
bottom = new AlignedString(baseString);
top = new AlignedString(bottom);
System.out.println("Delete word fragment");
final String fragment = top.get(18,22);
top.delete(18,22);
System.out.println("Top : "+top.get()+" - "+top.dataSegmentsToString());
System.out.println("Bottom : "+bottom.get()+" - "+bottom.dataSegmentsToString());
System.out.println("Insert word fragment to complete word");
top.insert(16, fragment);
System.out.println("Top : "+top.get()+" - "+top.dataSegmentsToString());
System.out.println("Bottom : "+bottom.get()+" - "+bottom.dataSegmentsToString());
System.out.println("Delete hyphen");
top.delete(16+fragment.length(), 18+fragment.length());
System.out.println("Top : "+top.get()+" - "+top.dataSegmentsToString());
System.out.println("Bottom : "+bottom.get()+" - "+bottom.dataSegmentsToString());
ImmutableInterval uli = new ImmutableInterval(0, 18);
ImmutableInterval adi = top.inverseResolve(uli);
System.out.println("ADI : "+top.get(adi.getStart(), adi.getEnd()));
System.out.println("ULI : "+bottom.get(uli.getStart(), uli.getEnd()));
assertEquals("This is a hyphenated", top.get(adi.getStart(), adi.getEnd()));
uli = new ImmutableInterval(18, 31);
adi = top.inverseResolve(uli);
System.out.println("ADI : "+top.get(adi.getStart(), adi.getEnd()));
System.out.println("ULI : "+bottom.get(uli.getStart(), uli.getEnd()));
assertEquals(" sentence", top.get(adi.getStart(), adi.getEnd()));
}
/**
* This is how you would expect to do hypenation removal, but it's wrong - use method used in
* testInsert2. This here will not work, because AlignedString will try to interpolate the
* start position of the uli interval (18) within the replaced interval (16-22).
*/
@Test @Ignore("Wrong method to do hypenation removal")
public
void testInsert3()
{
// 0123456789012345678901234567890
baseString = "This is a hyphen- ated sentence";
bottom = new AlignedString(baseString);
top = new AlignedString(bottom);
top.replace(16, 22, "ated");
ImmutableInterval uli = new ImmutableInterval(18, 31);
Interval adi = top.inverseResolve(uli);
System.out.println("ADI : "+top.get(adi.getStart(), adi.getEnd()));
System.out.println("ULI : "+bottom.get(uli.getStart(), uli.getEnd()));
assertEquals(" sentence", top.get(adi.getStart(), adi.getEnd()));
}
@Test
public
void testDelete_1()
{
bottom.delete(2, 5);
top.delete(2, 4);
final StringBuilder bottomRef = new StringBuilder(baseString);
bottomRef.delete(2, 5);
final StringBuilder topRef = new StringBuilder(bottomRef);
topRef.delete(2, 4);
assertEquals(bottomRef.toString(), bottom.get());
assertEquals(topRef.toString(), top.get());
}
@Test
public
void testDelete_2()
{
bottom.delete(2, 5);
top.insert(4, "new ");
final StringBuilder bottomRef = new StringBuilder(baseString);
bottomRef.delete(2, 5);
final StringBuilder topRef = new StringBuilder(bottomRef);
topRef.insert(4, "new ");
assertEquals(bottomRef.toString(), bottom.get());
assertEquals(topRef.toString(), top.get());
}
@Test
public
void testDelete_3()
{
bottom.delete(7, 11);
bottom.delete(6, 7);
final StringBuilder bottomRef = new StringBuilder(baseString);
bottomRef.delete(7, 11);
bottomRef.delete(6, 7);
assertEquals(bottomRef.toString(), bottom.get());
}
@Test
public
void testDelete_4()
{
final StringBuilder bottomRef = new StringBuilder(baseString);
bottomRef.delete(7, 12);
bottomRef.delete(6, 9);
bottom.delete(7, 12);
bottom.delete(6, 7);
assertEquals(bottomRef.toString(), bottom.get());
}
/**
* If we delete and then try to resolve a segment start ends at the start
* boundary of the deleted segment, we do not want the deleted segment to
* be included in the resolved interval.
*/
@Test
public
void testResolve()
{
top.delete(4, 7);
final ImmutableInterval ri = new ImmutableInterval(3, 4);
final Interval i = top.resolve(ri);
assertEquals(1, i.getLength());
}
@Test
public
void testResolve2()
{
top.delete(0, 5);
top.replace(0, 1, "I want a");
final ImmutableInterval ri = new ImmutableInterval(0, 8);
final Interval i = top.resolve(ri);
assertEquals(5, i.getStart());
assertEquals(6, i.getEnd());
}
@Test
public
void testResolve3()
{
bottom = new AlignedString("<Post class=\"System\" user=\"11-08-adultsUser12\">11-08-adultsUser13");
top = new AlignedString(bottom);
top.replace(0, 47, " ");
after();
top.replace(1, 19, "John");
after();
ImmutableInterval ri = new ImmutableInterval(1, 5);
Interval i = top.resolve(ri);
assertEquals(47, i.getStart());
assertEquals(65, i.getEnd());
bottom = new AlignedString("<Post class=\"System\" user=\"11-08-adultsUser12\">11-08-adultsUser13");
top = new AlignedString(bottom);
top.replace(47, 65, "John");
after();
top.replace(0, 47, " ");
ri = new ImmutableInterval(1, 5);
i = top.resolve(ri);
assertEquals(47, i.getStart());
assertEquals(65, i.getEnd());
}
@Test
public
void testDeleteInsert()
{
bottom.delete(2, 5);
top.insert(4, "new ");
bottom.insert(8, ", man");
final StringBuilder bottomRef = new StringBuilder(baseString);
bottomRef.delete(2, 5);
bottomRef.insert(8, ", man");
final StringBuilder topRef = new StringBuilder(bottomRef);
topRef.insert(4, "new ");
assertEquals(bottomRef.toString(), bottom.get());
assertEquals(topRef.toString(), top.get());
}
@Test
public
void testReplace()
{
top.replace(2, 4, "want");
final StringBuilder topRef = new StringBuilder(baseString);
topRef.replace(2,4,"want");
assertEquals(topRef.toString(), top.get());
}
@Test
public
void testReplace2()
{
top.replace(2, 4, "want");
top.replace(4, 8, "nnahave");
final StringBuilder topRef = new StringBuilder(baseString);
topRef.replace(2,4,"want");
topRef.replace(4,8,"nnahave");
assertEquals(topRef.toString(), top.get());
final Interval i1 = top.resolve(new ImmutableInterval(2, 11));
assertEquals(2, i1.getStart());
assertEquals(6, i1.getEnd());
final Interval i2 = top.inverseResolve(new ImmutableInterval(i1.getStart(), i1.getEnd()));
final String replaced = top.get(i2.getStart(), i2.getEnd());
System.out.println("Inverse resolved: "+i2);
assertEquals("wannahave", replaced);
assertEquals(i1.getStart(), i2.getStart());
assertEquals(i2.getEnd(), i2.getEnd());
}
// @Ignore // FIXME http://code.google.com/p/dkpro-core-asl/issues/detail?id=50
@Test
public
void testReplace3()
{
top.replace(0, 1, "i");
final StringBuilder topRef = new StringBuilder(baseString);
topRef.replace(0, 1, "i");
assertEquals(topRef.toString(), top.get());
}
@Test
public
void testReplace4()
{
top.replace(11, 12, "!");
final StringBuilder topRef = new StringBuilder(baseString);
topRef.replace(11, 12, "!");
assertEquals(topRef.toString(), top.get());
}
@Test
public
void testReplace5()
{
baseString = "";
bottom = new AlignedString(baseString);
top = new AlignedString(bottom);
top.replace(0, 0, "Hello!");
final StringBuilder topRef = new StringBuilder(baseString);
topRef.replace(0, 0, "Hello!");
assertEquals(topRef.toString(), top.get());
}
@Test
public
void testReplace6()
{
StringBuilder bottomRef = new StringBuilder(baseString);
StringBuilder topRef = new StringBuilder(bottomRef);
top.delete(2, 5);
topRef.delete(2, 5);
assertEquals(bottomRef.toString(), bottom.get());
assertEquals(topRef.toString(), top.get());
top.insert(2, "was ");
topRef.insert(2, "was ");
assertEquals(bottomRef.toString(), bottom.get());
assertEquals(topRef.toString(), top.get());
System.out.println("Resolved: "+top.resolve(new ImmutableInterval(2, 5)));
System.out.println("Inv resolved: "+top.inverseResolve(new ImmutableInterval(2, 5)));
}
@Test
public
void testDirty()
{
final StringBuilder bottomRef = new StringBuilder(baseString);
final StringBuilder topRef = new StringBuilder(bottomRef);
bottom.delete(2, 5);
bottomRef.delete(2, 5);
topRef.delete(2, 5);
assertEquals(bottomRef.toString(), bottom.get());
assertEquals(topRef.toString(), top.get());
bottom.insert(8, ", man");
bottomRef.insert(8, ", man");
topRef.insert(8, ", man");
assertEquals(bottomRef.toString(), bottom.get());
assertEquals(topRef.toString(), top.get());
}
/**
* For the given interval on the underlying data, get the corresponding
* interval on this level.
*
* Example:
* 11 11 11 111 12
* 012 34567 8901 23 45 678 90
* AD |111|22ZZ2|3333|44|55|YYY|55|
*
* UL |111|XX|22|ZZ|2|XXXXX|3333|XX|44|XXXX|5555|XXXX|
* 012 34 56 78 9 11111 1111 12 22 2222 2223 3333
* 01234 5678 90 12 3456 7890 1234
*
* As you can see there is a YYY inserted in the AD. Otherwise some parts
* of the UL (marked "X") have been removed in the AD. Also an ZZ part has
* been added to UL
*
* Calling this method with getStart()=22 getEnd()=30 ("4XXXX555") should return
* [13, 20] ("455YYY5").
*
* Generally:
* - if the getStart() is within a deleted region, then find the next oblique
* segment in AD to the right and return its getStart() position.
* - if the getEnd() is within a deleted region, then find the next oblique
* segment in AD to the left and return its getEnd() position.
*
* Anchors are always in UL. They are referenced from the ObliqueSegments
* in AD.
*/
@Test
public
void testInverseResolve()
{
bottom = new AlignedString("111XX222XXXXX3333XX44XXXX5555XXXX");
bottom.insert(7, "ZZ");
assertEquals("111XX22ZZ2XXXXX3333XX44XXXX5555XXXX", bottom.get());
top = new AlignedString(bottom);
top.delete(31, 35);
assertEquals("111XX22ZZ2XXXXX3333XX44XXXX5555", top.get());
top.delete(23, 27);
assertEquals("111XX22ZZ2XXXXX3333XX445555", top.get());
top.delete(19, 21);
assertEquals("111XX22ZZ2XXXXX3333445555", top.get());
top.delete(10, 15);
assertEquals("111XX22ZZ23333445555", top.get());
top.delete(3, 5);
assertEquals("11122ZZ23333445555", top.get());
top.insert(16, "YYY");
assertEquals("11122ZZ233334455YYY55", top.get());
final ImmutableInterval uli = new ImmutableInterval(22, 30);
System.out.println("ULI : "+bottom.get(uli.getStart(), uli.getEnd()));
final ImmutableInterval adi = top.inverseResolve(uli);
System.out.println("ADI : "+top.get(adi.getStart(), adi.getEnd()));
assertEquals(new ImmutableInterval(13, 20), adi);
assertEquals("455YYY5", top.get(adi.getStart(), adi.getEnd()));
assertEquals("4XXXX555", bottom.get(uli.getStart(), uli.getEnd()));
}
}