package org.basex.util; import static org.basex.util.Token.*; import static org.junit.Assert.*; import org.basex.util.ft.*; import org.junit.*; /** * Tests for {@link WesternTokenizer}. * * @author BaseX Team 2005-17, BSD License * @author Dimitar Popov */ public final class WesternTokenizerTest { /** Case sensitive. */ private static final int FTCS = 1; /** Diacritics. */ private static final int FTDC = 2; /** Lower case. */ private static final int FTLC = 4; /** Upper case. */ private static final int FTUC = 8; /** Wild cards. */ private static final int FTWC = 16; /** Full-text options to use. */ private final FTOpt opt = new FTOpt(); /** Test text to tokenize. */ private static final byte[] TEXT = token("\\T\u00e9st.*\\t\u00c4Ste\\\\Toast\\."); /** Test case insensitive. */ @Test public void cI() { run(TEXT, "test", "taste", "toast"); } /** Test case sensitive. */ @Test public void cS() { setFTFlags(FTCS); run(TEXT, "Test", "tASte", "Toast"); } /** Test lower case. */ @Test public void lC() { setFTFlags(FTCS | FTLC); run(TEXT, "test", "taste", "toast"); } /** Test upper case. */ @Test public void uC() { setFTFlags(FTCS | FTUC); run(TEXT, "TEST", "TASTE", "TOAST"); } /** Test + case insensitive. */ @Test public void diaCI() { setFTFlags(FTDC); run(TEXT, "t\u00e9st", "täste", "toast"); } /** Test diacritics + case sensitive. */ @Test public void diaCS() { setFTFlags(FTDC | FTCS); run(TEXT, "T\u00e9st", "t\u00c4Ste", "Toast"); } /** Test diacritics + lower case. */ @Test public void diaLC() { setFTFlags(FTDC | FTCS | FTLC); run(TEXT, "t\u00e9st", "t\u00e4ste", "toast"); } /** Test diacritics + upper case. */ @Test public void diaUC() { setFTFlags(FTDC | FTCS | FTUC); run(TEXT, "T\u00c9ST", "T\u00c4STE", "TOAST"); } /** Test wild cards + case insensitive. */ @Test public void wildCardsCI() { setFTFlags(FTWC); run(TEXT, "\\test.*\\taste", "toast"); } /** Test wild cards + case sensitive. */ @Test public void wildCardsCS() { setFTFlags(FTWC | FTCS); run(TEXT, "\\Test.*\\tASte", "Toast"); } /** Test wild cards + lower case. */ @Test public void wildCardsLC() { setFTFlags(FTWC | FTCS | FTLC); run(TEXT, "\\test.*\\taste", "toast"); } /** Test wild cards + upper case. */ @Test public void wildCardsUC() { setFTFlags(FTWC | FTCS | FTUC); run(TEXT, "\\TEST.*\\TASTE", "TOAST"); } /** Test wild cards + diacritics + case insensitive. */ @Test public void wildCardsDiaCI() { setFTFlags(FTWC | FTDC); run(TEXT, "\\t\u00e9st.*\\t\u00e4ste", "toast"); } /** Test wild cards + diacritics + case sensitive. */ @Test public void wildCardsDiaCS() { setFTFlags(FTWC | FTDC | FTCS); run(TEXT, "\\T\u00e9st.*\\t\u00c4Ste", "Toast"); } /** Test wild cards + diacritics + lower case. */ @Test public void wildCardsDiaLC() { setFTFlags(FTWC | FTDC | FTCS | FTLC); run(TEXT, "\\t\u00e9st.*\\t\u00e4ste", "toast"); } /** Test wild cards + diacritics + upper case. */ @Test public void wildCardsDiaUC() { setFTFlags(FTWC | FTDC | FTCS | FTUC); run(TEXT, "\\T\u00c9ST.*\\T\u00c4STE", "TOAST"); } /** * Perform tokenization test. * @param input input text to tokenize * @param tokens expected tokens */ private void run(final byte[] input, final String... tokens) { final WesternTokenizer tok = new WesternTokenizer(opt); tok.init(input); int i = -1; while(tok.hasNext()) { assertTrue(eq(tok.nextToken(), token(tokens[++i]))); } } /** * Set the full-text option flags. * @param flags bit mask with full-text flags */ private void setFTFlags(final int flags) { if((flags & FTDC) != 0) opt.set(FTFlag.DC, true); if((flags & FTWC) != 0) opt.set(FTFlag.WC, true); if((flags & FTCS) != 0) opt.cs = FTCase.SENSITIVE; if((flags & FTLC) != 0) opt.cs = FTCase.LOWER; if((flags & FTUC) != 0) opt.cs = FTCase.UPPER; } }