package lia.indexing; /** * Copyright Manning Publications Co. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific lan */ import java.util.Date; import java.util.Calendar; import java.io.IOException; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.document.Document; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.util.Version; // From chapter 2 /** Just to test the code compiles. */ class Fragments { public static void indexNumbersMethod() { // START new Field("size", "4096", Field.Store.YES, Field.Index.NOT_ANALYZED); new Field("price", "10.99", Field.Store.YES, Field.Index.NOT_ANALYZED); new Field("author", "Arthur C. Clark", Field.Store.YES, Field.Index.NOT_ANALYZED); // END } public static final String COMPANY_DOMAIN = "example.com"; public static final String BAD_DOMAIN = "yucky-domain.com"; private String getSenderEmail() { return "bob@smith.com"; } private String getSenderName() { return "Bob Smith"; } private String getSenderDomain() { return COMPANY_DOMAIN; } private String getSubject() { return "Hi there Lisa"; } private String getBody() { return "I don't have much to say"; } private boolean isImportant(String lowerDomain) { return lowerDomain.endsWith(COMPANY_DOMAIN); } private boolean isUnimportant(String lowerDomain) { return lowerDomain.endsWith(BAD_DOMAIN); } public void ramDirExample() throws Exception { Analyzer analyzer = new WhitespaceAnalyzer(); // START Directory ramDir = new RAMDirectory(); IndexWriter writer = new IndexWriter(ramDir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED); // END } public void dirCopy() throws Exception { Directory otherDir = null; // START Directory ramDir = new RAMDirectory(otherDir); // END } public void addIndexes() throws Exception { Directory otherDir = null; Directory ramDir = null; Analyzer analyzer = null; // START IndexWriter writer = new IndexWriter(otherDir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED); writer.addIndexesNoOptimize(new Directory[] {ramDir}); // END } public void docBoostMethod() throws IOException { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30), IndexWriter.MaxFieldLength.UNLIMITED); // START Document doc = new Document(); String senderEmail = getSenderEmail(); String senderName = getSenderName(); String subject = getSubject(); String body = getBody(); doc.add(new Field("senderEmail", senderEmail, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("senderName", senderName, Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("subject", subject, Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("body", body, Field.Store.NO, Field.Index.ANALYZED)); String lowerDomain = getSenderDomain().toLowerCase(); if (isImportant(lowerDomain)) { doc.setBoost(1.5F); //1 } else if (isUnimportant(lowerDomain)) { doc.setBoost(0.1F); //2 } writer.addDocument(doc); // END writer.close(); /* #1 Good domain boost factor: 1.5 #2 Bad domain boost factor: 0.1 */ } public void fieldBoostMethod() throws IOException { String senderName = getSenderName(); String subject = getSubject(); // START Field subjectField = new Field("subject", subject, Field.Store.YES, Field.Index.ANALYZED); subjectField.setBoost(1.2F); // END } public void numberField() { Document doc = new Document(); // START doc.add(new NumericField("price").setDoubleValue(19.99)); // END } public void numberTimestamp() { Document doc = new Document(); // START doc.add(new NumericField("timestamp") .setLongValue(new Date().getTime())); // END // START doc.add(new NumericField("day") .setIntValue((int) (new Date().getTime()/24/3600))); // END Date date = new Date(); // START Calendar cal = Calendar.getInstance(); cal.setTime(date); doc.add(new NumericField("dayOfMonth") .setIntValue(cal.get(Calendar.DAY_OF_MONTH))); // END } public void setInfoStream() throws Exception { Directory dir = null; Analyzer analyzer = null; // START IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); writer.setInfoStream(System.out); // END } public void dateMethod() { Document doc = new Document(); doc.add(new Field("indexDate", DateTools.dateToString(new Date(), DateTools.Resolution.DAY), Field.Store.YES, Field.Index.NOT_ANALYZED)); } public void numericField() throws Exception { Document doc = new Document(); NumericField price = new NumericField("price"); price.setDoubleValue(19.99); doc.add(price); NumericField timestamp = new NumericField("timestamp"); timestamp.setLongValue(new Date().getTime()); doc.add(timestamp); Date b = new Date(); NumericField birthday = new NumericField("birthday"); String v = DateTools.dateToString(b, DateTools.Resolution.DAY); birthday.setIntValue(Integer.parseInt(v)); doc.add(birthday); } public void indexAuthors() throws Exception { String[] authors = new String[] {"lisa", "tom"}; // START Document doc = new Document(); for (String author: authors) { doc.add(new Field("author", author, Field.Store.YES, Field.Index.ANALYZED)); } // END } }