/** * KeyList * Copyright 2011 by Michael Peter Christen, mc@yacy.net, Frankfurt a. M., Germany * First released 18.4.2011 at http://yacy.net * * $LastChangedDate: 2011-03-22 10:34:10 +0100 (Di, 22 Mrz 2011) $ * $LastChangedRevision: 7619 $ * $LastChangedBy: orbiter $ * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program in the file lgpl21.txt * If not, see <http://www.gnu.org/licenses/>. */ package net.yacy.cora.storage; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.RandomAccessFile; import java.nio.charset.StandardCharsets; import java.util.Iterator; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.zip.GZIPInputStream; import net.yacy.cora.document.encoding.UTF8; /** * a key list is a file which contains a list of key words; each line one word * The key list is stored into a java set object and the list can be extended on the fly * which is done by extending the file with just another line. * When is key list file is initialized, all lines are read and pushed into a java set */ public class KeyList implements Iterable<String> { private static final Object _obj = new Object(); private final Map<String, Object> keys; private final RandomAccessFile raf; public KeyList(final File file) throws IOException { this.keys = new ConcurrentHashMap<String, Object>(); if (file.exists()) { InputStream is = new FileInputStream(file); if (file.getName().endsWith(".gz")) { is = new GZIPInputStream(is); } final BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8)); String l; try { while ((l = reader.readLine()) != null) { if (l.isEmpty() || l.charAt(0) == '#') continue; l = l.trim().toLowerCase(); this.keys.put(l, _obj); } } catch (final IOException e) { // finish } finally { reader.close(); } } this.raf = new RandomAccessFile(file, "rw"); } public void clear() throws IOException { this.raf.setLength(0); this.keys.clear(); } public int size() { return this.keys.size(); } public boolean contains(final String key) { return this.keys.containsKey(key.trim().toLowerCase()); } public void add(final String key) throws IOException { if (this.keys.containsKey(key)) return; synchronized (this.raf) { if (this.keys.containsKey(key)) return; // check again for those threads who come late (after another has written this) this.keys.put(key, _obj); this.raf.seek(this.raf.length()); this.raf.write(UTF8.getBytes(key)); this.raf.writeByte('\n'); } } public synchronized void close() throws IOException { synchronized (this.raf) { this.raf.close(); } } public static void main(final String[] args) { try { final KeyList kl = new KeyList(new File("/tmp/test")); kl.add("eins"); kl.add("zwei"); kl.add("drei"); System.out.println(kl.contains("eins") ? "drin" : "nicht"); } catch (final IOException e) { e.printStackTrace(); } } @Override public Iterator<String> iterator() { return this.keys.keySet().iterator(); } }