001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.collections4.bloomfilter; 018 019import java.util.TreeMap; 020import java.util.function.IntPredicate; 021 022/** 023 * Some Bloom filter implementations use a count rather than a bit flag. The term {@code Cell} is used to 024 * refer to these counts and their associated index. This class is the equivalent of the index extractor except 025 * that it produces cells. 026 * 027 * <p>Note that a CellExtractor must not return duplicate indices and must be ordered.</p> 028 * 029 * <p>Implementations must guarantee that:</p> 030 * 031 * <ul> 032 * <li>The IndexExtractor implementation returns unique ordered indices.</li> 033 * <li>The cells are produced in IndexExtractor order.</li> 034 * <li>For every value produced by the IndexExtractor there will be only one matching 035 * cell produced by the CellExtractor.</li> 036 * <li>The CellExtractor will not generate cells with indices that are not output by the IndexExtractor.</li> 037 * <li>The IndexExtractor will not generate indices that have a zero count for the cell.</li> 038 * </ul> 039 * 040 * @since 4.5.0-M2 041 */ 042@FunctionalInterface 043public interface CellExtractor extends IndexExtractor { 044 045 /** 046 * Represents an operation that accepts an {@code <index, count>} pair. 047 * Returns {@code true} if processing should continue, {@code false} otherwise. 048 * 049 * <p>Note: This is a functional interface as a specialization of 050 * {@link java.util.function.BiPredicate} for {@code int}.</p> 051 */ 052 @FunctionalInterface 053 interface CellPredicate { 054 /** 055 * Performs an operation on the given {@code <index, count>} pair. 056 * 057 * @param index the bit index. 058 * @param count the cell value at the specified bit index. 059 * @return {@code true} if processing should continue, {@code false} if processing should stop. 060 */ 061 boolean test(int index, int count); 062 } 063 064 /** 065 * Creates a CellExtractor from an IndexExtractor. 066 * 067 * <p>Note the following properties:</p> 068 * <ul> 069 * <li>Each index returned from the IndexExtractor is assumed to have a cell value of 1.</li> 070 * <li>The CellExtractor aggregates duplicate indices from the IndexExtractor.</li> 071 * </ul> 072 * 073 * <p>A CellExtractor that outputs the mapping [(1,2),(2,3),(3,1)] can be created from many combinations 074 * of indices including:</p> 075 * <pre> 076 * [1, 1, 2, 2, 2, 3] 077 * [1, 3, 1, 2, 2, 2] 078 * [3, 2, 1, 2, 1, 2] 079 * ... 080 * </pre> 081 * 082 * @param indexExtractor An index indexExtractor. 083 * @return A CellExtractor with the same indices as the IndexExtractor. 084 */ 085 static CellExtractor from(final IndexExtractor indexExtractor) { 086 return new CellExtractor() { 087 /** 088 * Class to track cell values in the TreeMap. 089 */ 090 final class CounterCell implements Comparable<CounterCell> { 091 final int idx; 092 int count; 093 094 CounterCell(final int idx, final int count) { 095 this.idx = idx; 096 this.count = count; 097 } 098 099 @Override 100 public int compareTo(final CounterCell other) { 101 return Integer.compare(idx, other.idx); 102 } 103 } 104 105 TreeMap<CounterCell, CounterCell> counterCells = new TreeMap<>(); 106 107 @Override 108 public int[] asIndexArray() { 109 populate(); 110 return counterCells.keySet().stream().mapToInt(c -> c.idx).toArray(); 111 } 112 113 private void populate() { 114 if (counterCells.isEmpty()) { 115 indexExtractor.processIndices(idx -> { 116 final CounterCell cell = new CounterCell(idx, 1); 117 final CounterCell counter = counterCells.get(cell); 118 if (counter == null) { 119 counterCells.put(cell, cell); 120 } else { 121 counter.count++; 122 } 123 return true; 124 }); 125 } 126 } 127 128 @Override 129 public boolean processCells(final CellPredicate consumer) { 130 populate(); 131 for (final CounterCell cell : counterCells.values()) { 132 if (!consumer.test(cell.idx, cell.count)) { 133 return false; 134 } 135 } 136 return true; 137 } 138 }; 139 } 140 141 /** 142 * Performs the given action for each {@code cell} where the cell count is non-zero. 143 * 144 * <p>Some Bloom filter implementations use a count rather than a bit flag. The term {@code Cell} is used to 145 * refer to these counts.</p> 146 * 147 * <p>Any exceptions thrown by the action are relayed to the caller. The consumer is applied to each 148 * cell. If the consumer returns {@code false} the execution is stopped, {@code false} 149 * is returned, and no further pairs are processed.</p> 150 * 151 * @param consumer the action to be performed for each non-zero cell. 152 * @return {@code true} if all cells return true from consumer, {@code false} otherwise. 153 * @throws NullPointerException if the specified consumer is null 154 */ 155 boolean processCells(CellPredicate consumer); 156 157 /** 158 * The default implementation returns distinct and ordered indices for all cells with a non-zero count. 159 */ 160 @Override 161 default boolean processIndices(final IntPredicate predicate) { 162 return processCells((i, v) -> predicate.test(i)); 163 } 164 165 @Override 166 default IndexExtractor uniqueIndices() { 167 return this; 168 } 169} 170