/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.codec.language; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import org.apache.commons.codec.StringEncoderAbstractTest; import org.junit.Test; /** * Series of tests for the Match Rating Approach algorithm. * * General naming nomenclature for the test is of the form: * GeneralMetadataOnTheTestArea_ActualTestValues_ExpectedResult * * An unusual value is indicated by the term "corner case" */ public class MatchRatingApproachEncoderTest extends StringEncoderAbstractTest<MatchRatingApproachEncoder> { // ********** BEGIN REGION - TEST SUPPORT METHODS @Test public final void testAccentRemoval_AllLower_SuccessfullyRemoved() { assertEquals("aeiou", this.getStringEncoder().removeAccents("áéíóú")); } @Test public final void testAccentRemoval_WithSpaces_SuccessfullyRemovedAndSpacesInvariant() { assertEquals("ae io u", this.getStringEncoder().removeAccents("áé íó ú")); } @Test public final void testAccentRemoval_UpperandLower_SuccessfullyRemovedAndCaseInvariant() { assertEquals("AeiOuu", this.getStringEncoder().removeAccents("ÁeíÓuu")); } @Test public final void testAccentRemoval_MixedWithUnusualChars_SuccessfullyRemovedAndUnusualcharactersInvariant() { assertEquals("A-e'i.,o&u", this.getStringEncoder().removeAccents("Á-e'í.,ó&ú")); } @Test public final void testAccentRemoval_GerSpanFrenMix_SuccessfullyRemoved() { assertEquals("aeoußAEOUnNa", this.getStringEncoder().removeAccents("äëöüßÄËÖÜñÑà")); } @Test public final void testAccentRemoval_ComprehensiveAccentMix_AllSuccessfullyRemoved() { assertEquals("E,E,E,E,U,U,I,I,A,A,O,e,e,e,e,u,u,i,i,a,a,o,c", this.getStringEncoder().removeAccents("È,É,Ê,Ë,Û,Ù,Ï,Î,À,Â,Ô,è,é,ê,ë,û,ù,ï,î,à,â,ô,ç")); } @Test public final void testAccentRemovalNormalString_NoChange() { assertEquals("Colorless green ideas sleep furiously", this.getStringEncoder().removeAccents("Colorless green ideas sleep furiously")); } @Test public final void testAccentRemoval_NINO_NoChange() { assertEquals("", this.getStringEncoder().removeAccents("")); } @Test public final void testAccentRemoval_NullValue_ReturnNullSuccessfully() { assertEquals(null, this.getStringEncoder().removeAccents(null)); } @Test public final void testRemoveSingleDoubleConsonants_BUBLE_RemovedSuccessfully() { assertEquals("BUBLE", this.getStringEncoder().removeDoubleConsonants("BUBBLE")); } @Test public final void testRemoveDoubleConsonants_MISSISSIPPI_RemovedSuccessfully() { assertEquals("MISISIPI", this.getStringEncoder().removeDoubleConsonants("MISSISSIPPI")); } @Test public final void testRemoveDoubleDoubleVowel_BEETLE_NotRemoved() { assertEquals("BEETLE", this.getStringEncoder().removeDoubleConsonants("BEETLE")); } @Test public final void testIsVowel_CapitalA_ReturnsTrue() { assertTrue(this.getStringEncoder().isVowel("A")); } @Test public final void testIsVowel_SmallD_ReturnsFalse() { assertFalse(this.getStringEncoder().isVowel("d")); } @Test public final void testRemoveVowel_ALESSANDRA_Returns_ALSSNDR() { assertEquals("ALSSNDR", this.getStringEncoder().removeVowels("ALESSANDRA")); } @Test public final void testRemoveVowel__AIDAN_Returns_ADN() { assertEquals("ADN", this.getStringEncoder().removeVowels("AIDAN")); } @Test public final void testRemoveVowel__DECLAN_Returns_DCLN() { assertEquals("DCLN", this.getStringEncoder().removeVowels("DECLAN")); } @Test public final void testGetFirstLast3__ALEXANDER_Returns_Aleder() { assertEquals("Aleder", this.getStringEncoder().getFirst3Last3("Alexzander")); } @Test public final void testGetFirstLast3_PETE_Returns_PETE() { assertEquals("PETE", this.getStringEncoder().getFirst3Last3("PETE")); } @Test public final void testleftTorightThenRightToLeft_ALEXANDER_ALEXANDRA_Returns4() { assertEquals(4, this.getStringEncoder().leftToRightThenRightToLeftProcessing("ALEXANDER", "ALEXANDRA")); } @Test public final void testleftTorightThenRightToLeft_EINSTEIN_MICHAELA_Returns0() { assertEquals(0, this.getStringEncoder().leftToRightThenRightToLeftProcessing("EINSTEIN", "MICHAELA")); } @Test public final void testGetMinRating_7_Return4_Successfully() { assertEquals(4, this.getStringEncoder().getMinRating(7)); } @Test public final void testGetMinRating_1_Returns5_Successfully() { assertEquals(5, this.getStringEncoder().getMinRating(1)); } @Test public final void testGetMinRating_2_Returns5_Successfully() { assertEquals(5, this.getStringEncoder().getMinRating(2)); } @Test public final void testgetMinRating_5_Returns4_Successfully(){ assertEquals(4, this.getStringEncoder().getMinRating(5)); } @Test public final void testgetMinRating_5_Returns4_Successfully2(){ assertEquals(4, this.getStringEncoder().getMinRating(5)); } @Test public final void testgetMinRating_6_Returns4_Successfully(){ assertEquals(4, this.getStringEncoder().getMinRating(6)); } @Test public final void testgetMinRating_7_Returns4_Successfully(){ assertEquals(4, this.getStringEncoder().getMinRating(7)); } @Test public final void testgetMinRating_8_Returns3_Successfully(){ assertEquals(3, this.getStringEncoder().getMinRating(8)); } @Test public final void testgetMinRating_10_Returns3_Successfully(){ assertEquals(3, this.getStringEncoder().getMinRating(10)); } @Test public final void testgetMinRating_11_Returns_3_Successfully(){ assertEquals(3, this.getStringEncoder().getMinRating(11)); } @Test public final void testGetMinRating_13_Returns_1_Successfully() { assertEquals(1, this.getStringEncoder().getMinRating(13)); } @Test public final void testcleanName_SuccessfullyClean() { assertEquals("THISISATEST", this.getStringEncoder().cleanName("This-ís a t.,es &t")); } @Test public final void testisVowel_SingleVowel_ReturnsTrue() { assertTrue(this.getStringEncoder().isVowel(("I"))); } @Test public final void testisEncodeEquals_CornerCase_SecondNameNothing_ReturnsFalse() { assertFalse(this.getStringEncoder().isEncodeEquals("test", "")); } @Test public final void testisEncodeEquals_CornerCase_FirstNameNothing_ReturnsFalse() { assertFalse(this.getStringEncoder().isEncodeEquals("", "test")); } @Test public final void testisEncodeEquals_CornerCase_SecondNameJustSpace_ReturnsFalse() { assertFalse(this.getStringEncoder().isEncodeEquals("test", " ")); } @Test public final void testisEncodeEquals_CornerCase_FirstNameJustSpace_ReturnsFalse() { assertFalse(this.getStringEncoder().isEncodeEquals(" ", "test")); } @Test public final void testisEncodeEquals_CornerCase_SecondNameNull_ReturnsFalse() { assertFalse(this.getStringEncoder().isEncodeEquals("test", null)); } @Test public final void testisEncodeEquals_CornerCase_FirstNameNull_ReturnsFalse() { assertFalse(this.getStringEncoder().isEncodeEquals(null, "test")); } @Test public final void testisEncodeEquals_CornerCase_FirstNameJust1Letter_ReturnsFalse() { assertFalse(this.getStringEncoder().isEncodeEquals("t", "test")); } @Test public final void testisEncodeEqualsSecondNameJust1Letter_ReturnsFalse() { assertFalse(this.getStringEncoder().isEncodeEquals("test", "t")); } // ***** END REGION - TEST SUPPORT METHODS // ***** BEGIN REGION - TEST GET MRA ENCODING @Test public final void testGetEncoding_HARPER_HRPR() { assertEquals("HRPR", this.getStringEncoder().encode("HARPER")); } @Test public final void testGetEncoding_SMITH_to_SMTH() { assertEquals("SMTH", this.getStringEncoder().encode("Smith")); } @Test public final void testGetEncoding_SMYTH_to_SMYTH() { assertEquals("SMYTH", this.getStringEncoder().encode("Smyth")); } @Test public final void testGetEncoding_Space_to_Nothing() { assertEquals("", this.getStringEncoder().encode(" ")); } @Test public final void testGetEncoding_NoSpace_to_Nothing() { assertEquals("", this.getStringEncoder().encode("")); } @Test public final void testGetEncoding_Null_to_Nothing() { assertEquals("", this.getStringEncoder().encode(null)); } @Test public final void testGetEncoding_One_Letter_to_Nothing() { assertEquals("", this.getStringEncoder().encode("E")); } @Test public final void testCompareNameNullSpace_ReturnsFalseSuccessfully() { assertFalse(getStringEncoder().isEncodeEquals(null, " ")); } @Test public final void testCompareNameSameNames_ReturnsFalseSuccessfully() { assertTrue(getStringEncoder().isEncodeEquals("John", "John")); } // ***** END REGION - TEST GET MRA ENCODING // ***** BEGIN REGION - TEST GET MRA COMPARISONS @Test public final void testCompare_SMITH_SMYTH_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("smith", "smyth")); } @Test public final void testCompare_BURNS_BOURNE_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Burns", "Bourne")); } @Test public final void testCompare_ShortNames_AL_ED_WorksButNoMatch() { assertFalse(this.getStringEncoder().isEncodeEquals("Al", "Ed")); } @Test public final void testCompare_CATHERINE_KATHRYN_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Catherine", "Kathryn")); } @Test public final void testCompare_BRIAN_BRYAN_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Brian", "Bryan")); } @Test public final void testCompare_SEAN_SHAUN_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Séan", "Shaun")); } @Test public final void testCompare_COLM_COLIN_WithAccentsAndSymbolsAndSpaces_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Cólm. ", "C-olín")); } @Test public final void testCompare_STEPHEN_STEVEN_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Stephen", "Steven")); } @Test public final void testCompare_STEVEN_STEFAN_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Steven", "Stefan")); } @Test public final void testCompare_STEPHEN_STEFAN_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Stephen", "Stefan")); } @Test public final void testCompare_SAM_SAMUEL_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Sam", "Samuel")); } @Test public final void testCompare_MICKY_MICHAEL_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Micky", "Michael")); } @Test public final void testCompare_OONA_OONAGH_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Oona", "Oonagh")); } @Test public final void testCompare_SOPHIE_SOFIA_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Sophie", "Sofia")); } @Test public final void testCompare_FRANCISZEK_FRANCES_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Franciszek", "Frances")); } @Test public final void testCompare_TOMASZ_TOM_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Tomasz", "tom")); } @Test public final void testCompare_SmallInput_CARK_Kl_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Kl", "Karl")); } @Test public final void testCompareNameToSingleLetter_KARL_C_DoesNotMatch() { assertFalse(this.getStringEncoder().isEncodeEquals("Karl", "C")); } @Test public final void testCompare_ZACH_ZAKARIA_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Zach", "Zacharia")); } @Test public final void testCompare_KARL_ALESSANDRO_DoesNotMatch() { assertFalse(this.getStringEncoder().isEncodeEquals("Karl", "Alessandro")); } @Test public final void testCompare_Forenames_UNA_OONAGH_ShouldSuccessfullyMatchButDoesNot() { assertFalse(this.getStringEncoder().isEncodeEquals("Úna", "Oonagh")); // Disappointing } // ***** Begin Region - Test Get Encoding - Surnames @Test public final void testCompare_Surname_OSULLIVAN_OSUILLEABHAIN_SuccessfulMatch() { assertTrue(this.getStringEncoder().isEncodeEquals("O'Sullivan", "Ó ' Súilleabháin")); } @Test public final void testCompare_LongSurnames_MORIARTY_OMUIRCHEARTAIGH_DoesNotSuccessfulMatch() { assertFalse(this.getStringEncoder().isEncodeEquals("Moriarty", "OMuircheartaigh")); } @Test public final void testCompare_LongSurnames_OMUIRCHEARTAIGH_OMIREADHAIGH_SuccessfulMatch() { assertTrue(this.getStringEncoder().isEncodeEquals("o'muireadhaigh", "Ó 'Muircheartaigh ")); } @Test public final void testCompare_Surname_COOPERFLYNN_SUPERLYN_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Cooper-Flynn", "Super-Lyn")); } @Test public final void testCompare_Surname_HAILEY_HALLEY_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Hailey", "Halley")); } // **** BEGIN YIDDISH/SLAVIC SECTION **** @Test public final void testCompare_Surname_AUERBACH_UHRBACH_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Auerbach", "Uhrbach")); } @Test public final void testCompare_Surname_MOSKOWITZ_MOSKOVITZ_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Moskowitz", "Moskovitz")); } @Test public final void testCompare_Surname_LIPSHITZ_LIPPSZYC_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("LIPSHITZ", "LIPPSZYC")); } @Test public final void testCompare_Surname_LEWINSKY_LEVINSKI_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("LEWINSKY", "LEVINSKI")); } @Test public final void testCompare_Surname_SZLAMAWICZ_SHLAMOVITZ_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("SZLAMAWICZ", "SHLAMOVITZ")); } @Test public final void testCompare_Surname_ROSOCHOWACIEC_ROSOKHOVATSETS_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("R o s o ch o w a c ie c", " R o s o k ho v a ts e ts")); } @Test public final void testCompare_Surname_PRZEMYSL_PSHEMESHIL_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals(" P rz e m y s l", " P sh e m e sh i l")); } // **** END YIDDISH/SLAVIC SECTION **** @Test public final void testCompare_PETERSON_PETERS_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("Peterson", "Peters")); } @Test public final void testCompare_MCGOWAN_MCGEOGHEGAN_SuccessfullyMatched() { assertTrue(this.getStringEncoder().isEncodeEquals("McGowan", "Mc Geoghegan")); } @Test public final void testCompare_SurnamesCornerCase_MURPHY_Space_NoMatch() { assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", " ")); } @Test public final void testCompare_SurnamesCornerCase_MURPHY_NoSpace_NoMatch() { assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", "")); } @Test public final void testCompare_SurnameCornerCase_Nulls_NoMatch() { assertFalse(this.getStringEncoder().isEncodeEquals(null, null)); } @Test public final void testCompare_Surnames_MURPHY_LYNCH_NoMatchExpected() { assertFalse(this.getStringEncoder().isEncodeEquals("Murphy", "Lynch")); } @Test public final void testCompare_Forenames_SEAN_JOHN_MatchExpected() { assertTrue(this.getStringEncoder().isEncodeEquals("Sean", "John")); } @Test public final void testCompare_Forenames_SEAN_PETE_NoMatchExpected() { assertFalse(this.getStringEncoder().isEncodeEquals("Sean", "Pete")); } @Override protected MatchRatingApproachEncoder createStringEncoder() { return new MatchRatingApproachEncoder(); } // ***** END REGION - TEST GET MRA COMPARISONS }