001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.collections4.bloomfilter;
018
019import java.util.Objects;
020
021/**
022 * The interface that describes a Bloom filter that associates a count with each
023 * bit index rather than a bit.  This allows reversal of merge operations with
024 * remove operations.
025 *
026 * <p>A counting Bloom filter is expected to function identically to a standard
027 * Bloom filter that is the merge of all the Bloom filters that have been added
028 * to and not later subtracted from the counting Bloom filter. The functional
029 * state of a CountingBloomFilter at the start and end of a series of merge and
030 * subsequent remove operations of the same Bloom filters, irrespective of
031 * remove order, is expected to be the same.</p>
032 *
033 * <p>Removal of a filter that has not previously been merged results in an
034 * invalid state where the cells no longer represent a sum of merged Bloom
035 * filters. It is impossible to validate merge and remove exactly without
036 * explicitly storing all filters. Consequently such an operation may go
037 * undetected. The CountingBloomFilter maintains a state flag that is used as a
038 * warning that an operation was performed that resulted in invalid cells and
039 * thus an invalid state. For example this may occur if a cell for an index was
040 * set to negative following a remove operation.</p>
041 *
042 * <p>Implementations should document the expected state of the filter after an
043 * operation that generates invalid cells, and any potential recovery options.
044 * An implementation may support a reversal of the operation to restore the
045 * state to that prior to the operation. In the event that invalid cells are
046 * adjusted to a valid range then it should be documented if there has been
047 * irreversible information loss.</p>
048 *
049 * <p>Implementations may choose to throw an exception during an operation that
050 * generates invalid cells. Implementations should document the expected state
051 * of the filter after such an operation. For example are the cells not updated,
052 * partially updated or updated entirely before the exception is raised.</p>
053 *
054 * @see CellExtractor
055 * @since 4.5.0-M1
056 */
057public interface CountingBloomFilter extends BloomFilter<CountingBloomFilter>, CellExtractor {
058
059    // Query Operations
060
061    /**
062     * Adds the specified CellExtractor to this Bloom filter.
063     *
064     * <p>Specifically
065     * all cells for the indexes identified by the {@code other} will be incremented
066     * by their corresponding values in the {@code other}.</p>
067     *
068     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
069     *
070     * @param other the CellExtractor to add.
071     * @return {@code true} if the addition was successful and the state is valid
072     * @see #isValid()
073     * @see #subtract(CellExtractor)
074     */
075    boolean add(CellExtractor other);
076
077    /**
078     * Gets the maximum allowable value for a cell count in this Counting filter.
079     *
080     * @return the maximum allowable value for a cell count in this Counting filter.
081     */
082    int getMaxCell();
083
084    /**
085     * Determines the maximum number of times the BitMapExtractor could have been merged into this counting filter.
086     *
087     * @param bitMapExtractor the BitMapExtractor to provide the indices.
088     * @return the maximum number of times the BitMapExtractor could have been inserted.
089     */
090    default int getMaxInsert(final BitMapExtractor bitMapExtractor) {
091        if (!contains(bitMapExtractor)) {
092            return 0;
093        }
094        final long[] bitMaps = bitMapExtractor.asBitMapArray();
095        final int[] max = { Integer.MAX_VALUE };
096        processCells((x, y) -> {
097            if ((bitMaps[BitMaps.getLongIndex(x)] & BitMaps.getLongBit(x)) != 0) {
098                max[0] = max[0] <= y ? max[0] : y;
099            }
100            return true;
101        });
102        return max[0];
103    }
104
105    /**
106     * Determines the maximum number of times the Bloom filter could have been merged into this counting filter.
107     *
108     * @param bloomFilter the Bloom filter the check for.
109     * @return the maximum number of times the Bloom filter could have been inserted.
110     */
111    default int getMaxInsert(final BloomFilter<?> bloomFilter) {
112        return getMaxInsert((BitMapExtractor) bloomFilter);
113    }
114
115    /**
116     * Determines the maximum number of times the Cell Extractor could have been added.
117     *
118     * @param cellExtractor the extractor of cells.
119     * @return the maximum number of times the CellExtractor could have been inserted.
120     */
121    int getMaxInsert(CellExtractor cellExtractor);
122
123    /**
124     * Determines the maximum number of times the Hasher could have been merged into this counting filter.
125     *
126     * @param hasher the Hasher to provide the indices.
127     * @return the maximum number of times the hasher could have been inserted.
128     */
129    default int getMaxInsert(final Hasher hasher) {
130        return getMaxInsert(hasher.indices(getShape()));
131    }
132
133    /**
134     * Determines the maximum number of times the IndexExtractor could have been merged into this counting filter.
135     * <p>
136     * To determine how many times an indexExtractor could have been added create a CellExtractor from the indexExtractor and check that
137     * </p>
138     *
139     * @param indexExtractor the extractor to drive the count check.
140     * @return the maximum number of times the IndexExtractor could have been inserted.
141     * @see #getMaxInsert(CellExtractor)
142     */
143    default int getMaxInsert(final IndexExtractor indexExtractor) {
144        return getMaxInsert(CellExtractor.from(indexExtractor.uniqueIndices()));
145    }
146
147    /**
148     * Returns {@code true} if the internal state is valid.
149     *
150     * <p>This flag is a warning that an addition or
151     * subtraction of cells from this filter resulted in an invalid cell for one or more
152     * indexes. For example this may occur if a cell for an index was
153     * set to negative following a subtraction operation, or overflows the value specified by {@code getMaxCell()} following an
154     * addition operation.</p>
155     *
156     * <p>A counting Bloom filter that has an invalid state is no longer ensured to function
157     * identically to a standard Bloom filter instance that is the merge of all the Bloom filters
158     * that have been added to and not later subtracted from this counting Bloom filter.</p>
159     *
160     * <p>Note: The change to an invalid state may or may not be reversible. Implementations
161     * are expected to document their policy on recovery from an addition or removal operation
162     * that generated an invalid state.</p>
163     *
164     * @return {@code true} if the state is valid
165     */
166    boolean isValid();
167
168    /**
169     * Merges the specified BitMap extractor into this Bloom filter.
170     *
171     * <p>Specifically: all cells for the indexes identified by the {@code bitMapExtractor} will be incremented by 1.</p>
172     *
173     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
174     *
175     * @param bitMapExtractor the BitMapExtractor
176     * @return {@code true} if the removal was successful and the state is valid
177     * @see #isValid()
178     * @see #add(CellExtractor)
179     */
180    @Override
181    default boolean merge(final BitMapExtractor bitMapExtractor) {
182        return merge(IndexExtractor.fromBitMapExtractor(bitMapExtractor));
183    }
184
185    /**
186     * Merges the specified Bloom filter into this Bloom filter.
187     *
188     * <p>Specifically: all cells for the indexes identified by the {@code other} filter will be incremented by 1.</p>
189     *
190     * <p>Note: If the other filter is a counting Bloom filter the other filter's cells are ignored and it is treated as an
191     * IndexExtractor.</p>
192     *
193     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
194     *
195     * @param other the other Bloom filter
196     * @return {@code true} if the removal was successful and the state is valid
197     * @see #isValid()
198     * @see #add(CellExtractor)
199     */
200    @Override
201    default boolean merge(final BloomFilter<?> other) {
202        Objects.requireNonNull(other, "other");
203        return merge((IndexExtractor) other);
204    }
205
206    /**
207     * Merges the specified Hasher into this Bloom filter.
208     *
209     * <p>Specifically: all cells for the unique indexes identified by the {@code hasher} will be incremented by 1.</p>
210     *
211     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
212     *
213     * @param hasher the hasher
214     * @return {@code true} if the removal was successful and the state is valid
215     * @see #isValid()
216     * @see #add(CellExtractor)
217     */
218    @Override
219    default boolean merge(final Hasher hasher) {
220        Objects.requireNonNull(hasher, "hasher");
221        return merge(hasher.indices(getShape()));
222    }
223
224    /**
225     * Merges the specified index extractor into this Bloom filter.
226     *
227     * <p>Specifically: all unique cells for the indices identified by the {@code indexExtractor} will be incremented by 1.</p>
228     *
229     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
230     *
231     * <p>Notes:</p>
232     * <ul>
233     * <li>If indices that are returned multiple times should be incremented multiple times convert the IndexExtractor
234     * to a CellExtractor and add that.</li>
235     * <li>Implementations should throw {@code IllegalArgumentException} and no other exception on bad input.</li>
236     * </ul>
237     * @param indexExtractor the IndexExtractor
238     * @return {@code true} if the removal was successful and the state is valid
239     * @see #isValid()
240     * @see #add(CellExtractor)
241     */
242    @Override
243    default boolean merge(final IndexExtractor indexExtractor) {
244        Objects.requireNonNull(indexExtractor, "indexExtractor");
245        try {
246            return add(CellExtractor.from(indexExtractor.uniqueIndices()));
247        } catch (final IndexOutOfBoundsException e) {
248            throw new IllegalArgumentException(
249                    String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()), e);
250        }
251    }
252
253    /**
254     * Removes the specified BitMapExtractor from this Bloom filter.
255     *
256     * <p>Specifically all cells for the indices produced by the {@code bitMapExtractor} will be
257     * decremented by 1.</p>
258     *
259     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
260     *
261     * @param bitMapExtractor the BitMapExtractor to provide the indexes
262     * @return {@code true} if the removal was successful and the state is valid
263     * @see #isValid()
264     * @see #subtract(CellExtractor)
265     */
266    default boolean remove(final BitMapExtractor bitMapExtractor) {
267        return remove(IndexExtractor.fromBitMapExtractor(bitMapExtractor));
268    }
269
270    /**
271     * Removes the specified Bloom filter from this Bloom filter.
272     *
273     * <p>Specifically: all cells for the indexes identified by the {@code other} filter will be decremented by 1.</p>
274     *
275     * <p>Note: If the other filter is a counting Bloom filter the other filter's cells are ignored and it is treated as an
276     * IndexExtractor.</p>
277     *
278     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
279     *
280     * @param other the other Bloom filter
281     * @return {@code true} if the removal was successful and the state is valid
282     * @see #isValid()
283     * @see #subtract(CellExtractor)
284     */
285    default boolean remove(final BloomFilter<?> other) {
286        return remove((IndexExtractor) other);
287    }
288
289    /**
290     * Removes the unique values from the specified hasher from this Bloom filter.
291     *
292     * <p>Specifically all cells for the unique indices produced by the {@code hasher} will be
293     * decremented by 1.</p>
294     *
295     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
296     *
297     * @param hasher the hasher to provide the indexes
298     * @return {@code true} if the removal was successful and the state is valid
299     * @see #isValid()
300     * @see #subtract(CellExtractor)
301     */
302    default boolean remove(final Hasher hasher) {
303        Objects.requireNonNull(hasher, "hasher");
304        return remove(hasher.indices(getShape()));
305    }
306
307    /**
308     * Removes the values from the specified IndexExtractor from the Bloom filter from this Bloom filter.
309     *
310     * <p>Specifically all cells for the unique indices produced by the {@code hasher} will be
311     * decremented by 1.</p>
312     *
313     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
314     *
315     * <p>Note: If indices that are returned multiple times should be decremented multiple times convert the IndexExtractor
316     * to a CellExtractor and subtract that.</p>
317     *
318     * @param indexExtractor the IndexExtractor to provide the indexes
319     * @return {@code true} if the removal was successful and the state is valid
320     * @see #isValid()
321     * @see #subtract(CellExtractor)
322     */
323    default boolean remove(final IndexExtractor indexExtractor) {
324        Objects.requireNonNull(indexExtractor, "indexExtractor");
325        try {
326            return subtract(CellExtractor.from(indexExtractor.uniqueIndices()));
327        } catch (final IndexOutOfBoundsException e) {
328            throw new IllegalArgumentException(
329                    String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()));
330        }
331    }
332
333    /**
334     * Adds the specified CellExtractor to this Bloom filter.
335     *
336     * <p>Specifically
337     * all cells for the indexes identified by the {@code other} will be decremented
338     * by their corresponding values in the {@code other}.</p>
339     *
340     * <p>This method will return true if the filter is valid after the operation.</p>
341     *
342     * @param other the CellExtractor to subtract.
343     * @return {@code true} if the subtraction was successful and the state is valid
344     * @see #isValid()
345     * @see #add(CellExtractor)
346     */
347    boolean subtract(CellExtractor other);
348
349    /**
350     * The default implementation is a no-op since the counting bloom filter returns an unique IndexExtractor by default.
351     * @return this counting Bloom filter.
352     */
353    @Override
354    default IndexExtractor uniqueIndices() {
355        return this;
356    }
357}