Bitset,也就是位圖,由於可以用非常緊湊的格式來表示給定范圍的連續數據而經常出現在各種算法設計中。上面的圖來自c++庫中bitset的一張圖。
基本原理是,用1位來表示一個數據是否出現過,0為沒有出現過,1表示出現過。使用用的時候既可根據某一個是否為0表示此數是否出現過。
一個1G的空間,有 8*1024*1024*1024=8.58*10^9bit,也就是可以表示85億個不同的數。
常見的應用是那些需要對海量數據進行一些統計工作的時候,比如日志分析等。
面試題中也常出現,比如:統計40億個數據中沒有出現的數據,將40億個不同數據進行排序等。
又如:現在有1千萬個隨機數,隨機數的范圍在1到1億之間。現在要求寫出一種算法,將1到1億之間沒有在隨機數中的數求出來(百度)。
programming pearls上也有一個關於使用bitset來查找電話號碼的題目。
Bitmap的常見擴展,是用2位或者更多為來表示此數字的更多信息,比如出現了多少次等。
package java.util; import java.io.*; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.LongBuffer; public class BitSet implements Cloneable, java.io.Serializable {、
private long[] words; .... ....同時我們也看到使用long數組來作為內部存儲結構。這個決定了,Bitset至少為一個long的大小。下面的構造函數中也會有所體現。
public BitSet() { initWords(BITS_PER_WORD); sizeIsSticky = false; } public BitSet(int nbits) { // nbits can't be negative; size 0 is OK if (nbits < 0) throw new NegativeArraySizeException("nbits < 0: " + nbits); initWords(nbits); private void initWords(int nbits) { words = new long[wordIndex(nbits-1) + 1]; } private static int wordIndex(int bitIndex) { return bitIndex >> ADDRESS_BITS_PER_WORD; } private final static int ADDRESS_BITS_PER_WORD = 6; private final static int BITS_PER_WORD = 1 << ADDRESS_BITS_PER_WORD;
兩個構造函數,分別是一個指定了初始大小,一個沒指定。如果沒指定,我們可以看到默認的初始大小為, 2^6 = 64-1=63 bit. 我們知道java中long的大小就是8個字節,也就是8*8=64bit。也就是說,bitset默認的是一個long整形的大小。初始化函數指定了必要的大小。
注意:如果指定了bitset的初始化大小,那麼會把他規整到一個大於或者等於這個數字的64的整倍數。比如64位,bitset的大小是1個long,而65位時,bitset大小是2個long,即128位。做這麼一個規定,主要是為了內存對齊,同時避免考慮到不要處理特殊情況,簡化程序。
public void clear() { while (wordsInUse > 0) words[--wordsInUse] = 0; }b. 清空某一位
public void clear(int bitIndex) { if (bitIndex < 0) throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex); int wordIndex = wordIndex(bitIndex); if (wordIndex >= wordsInUse) return; words[wordIndex] &= ~(1L << bitIndex); recalculateWordsInUse(); checkInvariants(); }
/** * Sets the bits from the specified {@code fromIndex} (inclusive) to the * specified {@code toIndex} (exclusive) to {@code false}. * * @param fromIndex index of the first bit to be cleared * @param toIndex index after the last bit to be cleared * @throws IndexOutOfBoundsException if {@code fromIndex} is negative, * or {@code toIndex} is negative, or {@code fromIndex} is * larger than {@code toIndex} * @since 1.4 */ public void clear(int fromIndex, int toIndex) { checkRange(fromIndex, toIndex); if (fromIndex == toIndex) return; int startWordIndex = wordIndex(fromIndex); if (startWordIndex >= wordsInUse) return; int endWordIndex = wordIndex(toIndex - 1); if (endWordIndex >= wordsInUse) { toIndex = length(); endWordIndex = wordsInUse - 1; } long firstWordMask = WORD_MASK << fromIndex; long lastWordMask = WORD_MASK >>> -toIndex; if (startWordIndex == endWordIndex) { // Case 1: One word words[startWordIndex] &= ~(firstWordMask & lastWordMask); } else { // Case 2: Multiple words // Handle first word words[startWordIndex] &= ~firstWordMask; // Handle intermediate words, if any for (int i = startWordIndex+1; i < endWordIndex; i++) words[i] = 0; // Handle last word words[endWordIndex] &= ~lastWordMask; } recalculateWordsInUse(); checkInvariants(); }方法是將這個范圍分成三塊,startword; interval words; stopword。
/** * Sets the field wordsInUse to the logical size in words of the bit set. * WARNING:This method assumes that the number of words actually in use is * less than or equal to the current value of wordsInUse! */ private void recalculateWordsInUse() { // Traverse the bitset until a used word is found int i; for (i = wordsInUse-1; i >= 0; i--) if (words[i] != 0) break; wordsInUse = i+1; // The new logical size }
/** * Every public method must preserve these invariants. */ private void checkInvariants() { assert(wordsInUse == 0 || words[wordsInUse - 1] != 0); assert(wordsInUse >= 0 && wordsInUse <= words.length); assert(wordsInUse == words.length || words[wordsInUse] == 0); }
/** * Sets the bit at the specified index to the complement of its * current value. * * @param bitIndex the index of the bit to flip * @throws IndexOutOfBoundsException if the specified index is negative * @since 1.4 */ public void flip(int bitIndex) { if (bitIndex < 0) throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex); int wordIndex = wordIndex(bitIndex); expandTo(wordIndex); words[wordIndex] ^= (1L << bitIndex); recalculateWordsInUse(); checkInvariants(); }
/** * Ensures that the BitSet can accommodate a given wordIndex, * temporarily violating the invariants. The caller must * restore the invariants before returning to the user, * possibly using recalculateWordsInUse(). * @param wordIndex the index to be accommodated. */ private void expandTo(int wordIndex) { int wordsRequired = wordIndex+1; if (wordsInUse < wordsRequired) { ensureCapacity(wordsRequired); wordsInUse = wordsRequired; } } /** * Ensures that the BitSet can hold enough words. * @param wordsRequired the minimum acceptable number of words. */ private void ensureCapacity(int wordsRequired) { if (words.length < wordsRequired) { // Allocate larger of doubled size or required size int request = Math.max(2 * words.length, wordsRequired); words = Arrays.copyOf(words, request); sizeIsSticky = false; } }
public void flip(int fromIndex, int toIndex) { checkRange(fromIndex, toIndex); if (fromIndex == toIndex) return; int startWordIndex = wordIndex(fromIndex); int endWordIndex = wordIndex(toIndex - 1); expandTo(endWordIndex); long firstWordMask = WORD_MASK << fromIndex; long lastWordMask = WORD_MASK >>> -toIndex; if (startWordIndex == endWordIndex) { // Case 1: One word words[startWordIndex] ^= (firstWordMask & lastWordMask); } else { // Case 2: Multiple words // Handle first word words[startWordIndex] ^= firstWordMask; // Handle intermediate words, if any for (int i = startWordIndex+1; i < endWordIndex; i++) words[i] ^= WORD_MASK; // Handle last word words[endWordIndex] ^= lastWordMask; } recalculateWordsInUse(); checkInvariants(); }
/** * Sets the bit at the specified index to {@code true}. * * @param bitIndex a bit index * @throws IndexOutOfBoundsException if the specified index is negative * @since JDK1.0 */ public void set(int bitIndex) { if (bitIndex < 0) throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex); int wordIndex = wordIndex(bitIndex); expandTo(wordIndex); words[wordIndex] |= (1L << bitIndex); // Restores invariants checkInvariants(); }
public void set(int bitIndex, boolean value) { if (value) set(bitIndex); else clear(bitIndex); }
/** * Returns the value of the bit with the specified index. The value * is {@code true} if the bit with the index {@code bitIndex} * is currently set in this {@code BitSet}; otherwise, the result * is {@code false}. * * @param bitIndex the bit index * @return the value of the bit with the specified index * @throws IndexOutOfBoundsException if the specified index is negative */ public boolean get(int bitIndex) { if (bitIndex < 0) throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex); checkInvariants(); int wordIndex = wordIndex(bitIndex); return (wordIndex < wordsInUse) && ((words[wordIndex] & (1L << bitIndex)) != 0); }
public BitSet get(int fromIndex, int toIndex) { checkRange(fromIndex, toIndex); checkInvariants(); int len = length(); // If no set bits in range return empty bitset if (len <= fromIndex || fromIndex == toIndex) return new BitSet(0); // An optimization if (toIndex > len) toIndex = len; BitSet result = new BitSet(toIndex - fromIndex); int targetWords = wordIndex(toIndex - fromIndex - 1) + 1; int sourceIndex = wordIndex(fromIndex); boolean wordAligned = ((fromIndex & BIT_INDEX_MASK) == 0); // Process all words but the last word for (int i = 0; i < targetWords - 1; i++, sourceIndex++) result.words[i] = wordAligned ? words[sourceIndex] : (words[sourceIndex] >>> fromIndex) | (words[sourceIndex+1] << -fromIndex); // Process the last word long lastWordMask = WORD_MASK >>> -toIndex; result.words[targetWords - 1] = ((toIndex-1) & BIT_INDEX_MASK) < (fromIndex & BIT_INDEX_MASK) ? /* straddles source words */ ((words[sourceIndex] >>> fromIndex) | (words[sourceIndex+1] & lastWordMask) << -fromIndex) : ((words[sourceIndex] & lastWordMask) >>> fromIndex); // Set wordsInUse correctly result.wordsInUse = targetWords; result.recalculateWordsInUse(); result.checkInvariants(); return result; }
/** * Returns the "logical size" of this {@code BitSet}: the index of * the highest set bit in the {@code BitSet} plus one. Returns zero * if the {@code BitSet} contains no set bits. * * @return the logical size of this {@code BitSet} * @since 1.2 */ public int length() { if (wordsInUse == 0) return 0; return BITS_PER_WORD * (wordsInUse - 1) + (BITS_PER_WORD - Long.numberOfLeadingZeros(words[wordsInUse - 1])); }
/** * Returns the hash code value for this bit set. The hash code depends * Note that the hash code changes if the set of bits is altered. * * @return the hash code value for this bit set */ public int hashCode() { long h = 1234; for (int i = wordsInUse; --i >= 0; ) h ^= words[i] * (i + 1); return (int)((h >> 32) ^ h); }