001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.collections4.bloomfilter;
018
019import java.util.TreeMap;
020import java.util.function.IntPredicate;
021
022/**
023 * Some Bloom filter implementations use a count rather than a bit flag. The term {@code Cell} is used to
024 * refer to these counts and their associated index.  This class is the equivalent of the index extractor except
025 * that it produces cells.
026 *
027 * <p>Note that a CellExtractor must not return duplicate indices and must be ordered.</p>
028 *
029 * <p>Implementations must guarantee that:</p>
030 *
031 * <ul>
032 * <li>The IndexExtractor implementation returns unique ordered indices.</li>
033 * <li>The cells are produced in IndexExtractor order.</li>
034 * <li>For every value produced by the IndexExtractor there will be only one matching
035 * cell produced by the CellExtractor.</li>
036 * <li>The CellExtractor will not generate cells with indices that are not output by the IndexExtractor.</li>
037 * <li>The IndexExtractor will not generate indices that have a zero count for the cell.</li>
038 * </ul>
039 *
040 * @since 4.5.0-M2
041 */
042@FunctionalInterface
043public interface CellExtractor extends IndexExtractor {
044
045    /**
046     * Represents an operation that accepts an {@code <index, count>} pair.
047     * Returns {@code true} if processing should continue, {@code false} otherwise.
048     *
049     * <p>Note: This is a functional interface as a specialization of
050     * {@link java.util.function.BiPredicate} for {@code int}.</p>
051     */
052    @FunctionalInterface
053    interface CellPredicate {
054        /**
055         * Performs an operation on the given {@code <index, count>} pair.
056         *
057         * @param index the bit index.
058         * @param count the cell value at the specified bit index.
059         * @return {@code true} if processing should continue, {@code false} if processing should stop.
060         */
061        boolean test(int index, int count);
062    }
063
064    /**
065     * Creates a CellExtractor from an IndexExtractor.
066     *
067     * <p>Note the following properties:</p>
068     * <ul>
069     * <li>Each index returned from the IndexExtractor is assumed to have a cell value of 1.</li>
070     * <li>The CellExtractor aggregates duplicate indices from the IndexExtractor.</li>
071     * </ul>
072     *
073     * <p>A CellExtractor that outputs the mapping [(1,2),(2,3),(3,1)] can be created from many combinations
074     * of indices including:</p>
075     * <pre>
076     * [1, 1, 2, 2, 2, 3]
077     * [1, 3, 1, 2, 2, 2]
078     * [3, 2, 1, 2, 1, 2]
079     * ...
080     * </pre>
081     *
082     * @param indexExtractor An index indexExtractor.
083     * @return A CellExtractor with the same indices as the IndexExtractor.
084     */
085    static CellExtractor from(final IndexExtractor indexExtractor) {
086        return new CellExtractor() {
087            /**
088             * Class to track cell values in the TreeMap.
089             */
090            final class CounterCell implements Comparable<CounterCell> {
091                final int idx;
092                int count;
093
094                CounterCell(final int idx, final int count) {
095                    this.idx = idx;
096                    this.count = count;
097                }
098
099                @Override
100                public int compareTo(final CounterCell other) {
101                    return Integer.compare(idx, other.idx);
102                }
103            }
104
105            TreeMap<CounterCell, CounterCell> counterCells = new TreeMap<>();
106
107            @Override
108            public int[] asIndexArray() {
109                populate();
110                return counterCells.keySet().stream().mapToInt(c -> c.idx).toArray();
111            }
112
113            private void populate() {
114                if (counterCells.isEmpty()) {
115                    indexExtractor.processIndices(idx -> {
116                        final CounterCell cell = new CounterCell(idx, 1);
117                        final CounterCell counter = counterCells.get(cell);
118                        if (counter == null) {
119                            counterCells.put(cell, cell);
120                        } else {
121                            counter.count++;
122                        }
123                        return true;
124                    });
125                }
126            }
127
128            @Override
129            public boolean processCells(final CellPredicate consumer) {
130                populate();
131                for (final CounterCell cell : counterCells.values()) {
132                    if (!consumer.test(cell.idx, cell.count)) {
133                        return false;
134                    }
135                }
136                return true;
137            }
138        };
139    }
140
141    /**
142     * Performs the given action for each {@code cell}  where the cell count is non-zero.
143     *
144     * <p>Some Bloom filter implementations use a count rather than a bit flag.  The term {@code Cell} is used to
145     * refer to these counts.</p>
146     *
147     * <p>Any exceptions thrown by the action are relayed to the caller. The consumer is applied to each
148     * cell. If the consumer returns {@code false} the execution is stopped, {@code false}
149     * is returned, and no further pairs are processed.</p>
150     *
151     * @param consumer the action to be performed for each non-zero cell.
152     * @return {@code true} if all cells return true from consumer, {@code false} otherwise.
153     * @throws NullPointerException if the specified consumer is null
154     */
155    boolean processCells(CellPredicate consumer);
156
157    /**
158     * The default implementation returns distinct and ordered indices for all cells with a non-zero count.
159     */
160    @Override
161    default boolean processIndices(final IntPredicate predicate) {
162        return processCells((i, v) -> predicate.test(i));
163    }
164
165    @Override
166    default IndexExtractor uniqueIndices() {
167        return this;
168    }
169}
170