001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.collections4.bloomfilter; 018 019import java.util.Objects; 020 021/** 022 * The interface that describes a Bloom filter that associates a count with each 023 * bit index rather than a bit. This allows reversal of merge operations with 024 * remove operations. 025 * 026 * <p>A counting Bloom filter is expected to function identically to a standard 027 * Bloom filter that is the merge of all the Bloom filters that have been added 028 * to and not later subtracted from the counting Bloom filter. The functional 029 * state of a CountingBloomFilter at the start and end of a series of merge and 030 * subsequent remove operations of the same Bloom filters, irrespective of 031 * remove order, is expected to be the same.</p> 032 * 033 * <p>Removal of a filter that has not previously been merged results in an 034 * invalid state where the cells no longer represent a sum of merged Bloom 035 * filters. It is impossible to validate merge and remove exactly without 036 * explicitly storing all filters. Consequently such an operation may go 037 * undetected. The CountingBloomFilter maintains a state flag that is used as a 038 * warning that an operation was performed that resulted in invalid cells and 039 * thus an invalid state. For example this may occur if a cell for an index was 040 * set to negative following a remove operation.</p> 041 * 042 * <p>Implementations should document the expected state of the filter after an 043 * operation that generates invalid cells, and any potential recovery options. 044 * An implementation may support a reversal of the operation to restore the 045 * state to that prior to the operation. In the event that invalid cells are 046 * adjusted to a valid range then it should be documented if there has been 047 * irreversible information loss.</p> 048 * 049 * <p>Implementations may choose to throw an exception during an operation that 050 * generates invalid cells. Implementations should document the expected state 051 * of the filter after such an operation. For example are the cells not updated, 052 * partially updated or updated entirely before the exception is raised.</p> 053 * 054 * @see CellExtractor 055 * @since 4.5.0-M1 056 */ 057public interface CountingBloomFilter extends BloomFilter<CountingBloomFilter>, CellExtractor { 058 059 // Query Operations 060 061 /** 062 * Adds the specified CellExtractor to this Bloom filter. 063 * 064 * <p>Specifically 065 * all cells for the indexes identified by the {@code other} will be incremented 066 * by their corresponding values in the {@code other}.</p> 067 * 068 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 069 * 070 * @param other the CellExtractor to add. 071 * @return {@code true} if the addition was successful and the state is valid 072 * @see #isValid() 073 * @see #subtract(CellExtractor) 074 */ 075 boolean add(CellExtractor other); 076 077 /** 078 * Gets the maximum allowable value for a cell count in this Counting filter. 079 * 080 * @return the maximum allowable value for a cell count in this Counting filter. 081 */ 082 int getMaxCell(); 083 084 /** 085 * Determines the maximum number of times the BitMapExtractor could have been merged into this counting filter. 086 * 087 * @param bitMapExtractor the BitMapExtractor to provide the indices. 088 * @return the maximum number of times the BitMapExtractor could have been inserted. 089 */ 090 default int getMaxInsert(final BitMapExtractor bitMapExtractor) { 091 if (!contains(bitMapExtractor)) { 092 return 0; 093 } 094 final long[] bitMaps = bitMapExtractor.asBitMapArray(); 095 final int[] max = { Integer.MAX_VALUE }; 096 processCells((x, y) -> { 097 if ((bitMaps[BitMaps.getLongIndex(x)] & BitMaps.getLongBit(x)) != 0) { 098 max[0] = max[0] <= y ? max[0] : y; 099 } 100 return true; 101 }); 102 return max[0]; 103 } 104 105 /** 106 * Determines the maximum number of times the Bloom filter could have been merged into this counting filter. 107 * 108 * @param bloomFilter the Bloom filter the check for. 109 * @return the maximum number of times the Bloom filter could have been inserted. 110 */ 111 default int getMaxInsert(final BloomFilter<?> bloomFilter) { 112 return getMaxInsert((BitMapExtractor) bloomFilter); 113 } 114 115 /** 116 * Determines the maximum number of times the Cell Extractor could have been added. 117 * 118 * @param cellExtractor the extractor of cells. 119 * @return the maximum number of times the CellExtractor could have been inserted. 120 */ 121 int getMaxInsert(CellExtractor cellExtractor); 122 123 /** 124 * Determines the maximum number of times the Hasher could have been merged into this counting filter. 125 * 126 * @param hasher the Hasher to provide the indices. 127 * @return the maximum number of times the hasher could have been inserted. 128 */ 129 default int getMaxInsert(final Hasher hasher) { 130 return getMaxInsert(hasher.indices(getShape())); 131 } 132 133 /** 134 * Determines the maximum number of times the IndexExtractor could have been merged into this counting filter. 135 * <p> 136 * To determine how many times an indexExtractor could have been added create a CellExtractor from the indexExtractor and check that 137 * </p> 138 * 139 * @param indexExtractor the extractor to drive the count check. 140 * @return the maximum number of times the IndexExtractor could have been inserted. 141 * @see #getMaxInsert(CellExtractor) 142 */ 143 default int getMaxInsert(final IndexExtractor indexExtractor) { 144 return getMaxInsert(CellExtractor.from(indexExtractor.uniqueIndices())); 145 } 146 147 /** 148 * Returns {@code true} if the internal state is valid. 149 * 150 * <p>This flag is a warning that an addition or 151 * subtraction of cells from this filter resulted in an invalid cell for one or more 152 * indexes. For example this may occur if a cell for an index was 153 * set to negative following a subtraction operation, or overflows the value specified by {@code getMaxCell()} following an 154 * addition operation.</p> 155 * 156 * <p>A counting Bloom filter that has an invalid state is no longer ensured to function 157 * identically to a standard Bloom filter instance that is the merge of all the Bloom filters 158 * that have been added to and not later subtracted from this counting Bloom filter.</p> 159 * 160 * <p>Note: The change to an invalid state may or may not be reversible. Implementations 161 * are expected to document their policy on recovery from an addition or removal operation 162 * that generated an invalid state.</p> 163 * 164 * @return {@code true} if the state is valid 165 */ 166 boolean isValid(); 167 168 /** 169 * Merges the specified BitMap extractor into this Bloom filter. 170 * 171 * <p>Specifically: all cells for the indexes identified by the {@code bitMapExtractor} will be incremented by 1.</p> 172 * 173 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 174 * 175 * @param bitMapExtractor the BitMapExtractor 176 * @return {@code true} if the removal was successful and the state is valid 177 * @see #isValid() 178 * @see #add(CellExtractor) 179 */ 180 @Override 181 default boolean merge(final BitMapExtractor bitMapExtractor) { 182 return merge(IndexExtractor.fromBitMapExtractor(bitMapExtractor)); 183 } 184 185 /** 186 * Merges the specified Bloom filter into this Bloom filter. 187 * 188 * <p>Specifically: all cells for the indexes identified by the {@code other} filter will be incremented by 1.</p> 189 * 190 * <p>Note: If the other filter is a counting Bloom filter the other filter's cells are ignored and it is treated as an 191 * IndexExtractor.</p> 192 * 193 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 194 * 195 * @param other the other Bloom filter 196 * @return {@code true} if the removal was successful and the state is valid 197 * @see #isValid() 198 * @see #add(CellExtractor) 199 */ 200 @Override 201 default boolean merge(final BloomFilter<?> other) { 202 Objects.requireNonNull(other, "other"); 203 return merge((IndexExtractor) other); 204 } 205 206 /** 207 * Merges the specified Hasher into this Bloom filter. 208 * 209 * <p>Specifically: all cells for the unique indexes identified by the {@code hasher} will be incremented by 1.</p> 210 * 211 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 212 * 213 * @param hasher the hasher 214 * @return {@code true} if the removal was successful and the state is valid 215 * @see #isValid() 216 * @see #add(CellExtractor) 217 */ 218 @Override 219 default boolean merge(final Hasher hasher) { 220 Objects.requireNonNull(hasher, "hasher"); 221 return merge(hasher.indices(getShape())); 222 } 223 224 /** 225 * Merges the specified index extractor into this Bloom filter. 226 * 227 * <p>Specifically: all unique cells for the indices identified by the {@code indexExtractor} will be incremented by 1.</p> 228 * 229 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 230 * 231 * <p>Notes:</p> 232 * <ul> 233 * <li>If indices that are returned multiple times should be incremented multiple times convert the IndexExtractor 234 * to a CellExtractor and add that.</li> 235 * <li>Implementations should throw {@code IllegalArgumentException} and no other exception on bad input.</li> 236 * </ul> 237 * @param indexExtractor the IndexExtractor 238 * @return {@code true} if the removal was successful and the state is valid 239 * @see #isValid() 240 * @see #add(CellExtractor) 241 */ 242 @Override 243 default boolean merge(final IndexExtractor indexExtractor) { 244 Objects.requireNonNull(indexExtractor, "indexExtractor"); 245 try { 246 return add(CellExtractor.from(indexExtractor.uniqueIndices())); 247 } catch (final IndexOutOfBoundsException e) { 248 throw new IllegalArgumentException( 249 String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()), e); 250 } 251 } 252 253 /** 254 * Removes the specified BitMapExtractor from this Bloom filter. 255 * 256 * <p>Specifically all cells for the indices produced by the {@code bitMapExtractor} will be 257 * decremented by 1.</p> 258 * 259 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 260 * 261 * @param bitMapExtractor the BitMapExtractor to provide the indexes 262 * @return {@code true} if the removal was successful and the state is valid 263 * @see #isValid() 264 * @see #subtract(CellExtractor) 265 */ 266 default boolean remove(final BitMapExtractor bitMapExtractor) { 267 return remove(IndexExtractor.fromBitMapExtractor(bitMapExtractor)); 268 } 269 270 /** 271 * Removes the specified Bloom filter from this Bloom filter. 272 * 273 * <p>Specifically: all cells for the indexes identified by the {@code other} filter will be decremented by 1.</p> 274 * 275 * <p>Note: If the other filter is a counting Bloom filter the other filter's cells are ignored and it is treated as an 276 * IndexExtractor.</p> 277 * 278 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 279 * 280 * @param other the other Bloom filter 281 * @return {@code true} if the removal was successful and the state is valid 282 * @see #isValid() 283 * @see #subtract(CellExtractor) 284 */ 285 default boolean remove(final BloomFilter<?> other) { 286 return remove((IndexExtractor) other); 287 } 288 289 /** 290 * Removes the unique values from the specified hasher from this Bloom filter. 291 * 292 * <p>Specifically all cells for the unique indices produced by the {@code hasher} will be 293 * decremented by 1.</p> 294 * 295 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 296 * 297 * @param hasher the hasher to provide the indexes 298 * @return {@code true} if the removal was successful and the state is valid 299 * @see #isValid() 300 * @see #subtract(CellExtractor) 301 */ 302 default boolean remove(final Hasher hasher) { 303 Objects.requireNonNull(hasher, "hasher"); 304 return remove(hasher.indices(getShape())); 305 } 306 307 /** 308 * Removes the values from the specified IndexExtractor from the Bloom filter from this Bloom filter. 309 * 310 * <p>Specifically all cells for the unique indices produced by the {@code hasher} will be 311 * decremented by 1.</p> 312 * 313 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 314 * 315 * <p>Note: If indices that are returned multiple times should be decremented multiple times convert the IndexExtractor 316 * to a CellExtractor and subtract that.</p> 317 * 318 * @param indexExtractor the IndexExtractor to provide the indexes 319 * @return {@code true} if the removal was successful and the state is valid 320 * @see #isValid() 321 * @see #subtract(CellExtractor) 322 */ 323 default boolean remove(final IndexExtractor indexExtractor) { 324 Objects.requireNonNull(indexExtractor, "indexExtractor"); 325 try { 326 return subtract(CellExtractor.from(indexExtractor.uniqueIndices())); 327 } catch (final IndexOutOfBoundsException e) { 328 throw new IllegalArgumentException( 329 String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits())); 330 } 331 } 332 333 /** 334 * Adds the specified CellExtractor to this Bloom filter. 335 * 336 * <p>Specifically 337 * all cells for the indexes identified by the {@code other} will be decremented 338 * by their corresponding values in the {@code other}.</p> 339 * 340 * <p>This method will return true if the filter is valid after the operation.</p> 341 * 342 * @param other the CellExtractor to subtract. 343 * @return {@code true} if the subtraction was successful and the state is valid 344 * @see #isValid() 345 * @see #add(CellExtractor) 346 */ 347 boolean subtract(CellExtractor other); 348 349 /** 350 * The default implementation is a no-op since the counting bloom filter returns an unique IndexExtractor by default. 351 * @return this counting Bloom filter. 352 */ 353 @Override 354 default IndexExtractor uniqueIndices() { 355 return this; 356 } 357}