$\newcommand{\O}{\mathrm{O}}$ My Algorithm : kopricky アルゴリズムライブラリ

kopricky アルゴリズムライブラリ

van Emde Boas Tree

コードについての説明(個人的メモ)

van Emde Boas Tree (vEB 木)の実装. van Emde Boas Tree は $[0, U)$ 内の要素について insert, delete, prdecessor, successor クエリに最悪計算量 $\O (\log \log U)$ で答えるデータ構造で, その空間計算量 $\O (U)$ である. ここで predecessor クエリとは vEB 木に含まれる入力の値より小さい要素の中で最大の値を返すクエリで successor クエリとは vEB 木に含まれる入力の値より大きい要素の中で最小の値を返すクエリである.
実装は こちらの資料 を参考にした.
$2$ つコードを用意して $1$ つ目は愚直の実装で特に速度やメモリを気にしていないので余り使い物にならないが, 実装の参考にはなると思う(最終層は uint64_t を用いている.).
$2$ つ目のコードは競プロなどでよくある $10^9$ 以下の非負整数(実装では正確には $2^{30}$ 未満の非負整数) について管理する割と高速な van Emde Boas Tree を実装してみた.
ただ注意として, $2$ つ目のコードは要素数に関わらず $134$ MB ほどメモリを食うので高速であるもののメモリ効率は良くない.
基本的に動かしてみた結果として $2$ つ目のコードについて std::set より悪くなることはほぼなくて, また高速化したくなるような状況, つまりクエリがたくさん来るような状況では $20$ 倍近く速くなった.(これは前述のように vEB 木の処理が $1$ 層目で終わることが多くなることや常に find 操作はビット演算のみで行っていることなどが影響している.)
実際には vEB 木よりワードサイズ(w) で再帰的に分割した方が実装は楽でまた各クエリも $\O( \log_w U)$ 時間で処理できるので十分高速である.
元論文は "Design and implementation of an efficient priority queue" [van Emde Boas, Kaas, Zijlstra 1976]

(愚直版のコンストラクタ)
vanEmdeBoasTree(const uint32_t $U$) : $[0, U)$ の数を管理する順序付き集合を構築

時間計算量: insert, erase(値が存在することを仮定), predecessor, successor $\O (\log \log U)$ ($U$ は値の上限)
predecessor / successor クエリについては返すべき要素が存在しない場合はそれぞれ -1 および $U$ を返す.

コード(愚直版)

class vanEmdeBoasTree
{
private:
    const uint32_t LENGTH;
    uint32_t _size;
    struct layer {
        static const uint32_t THRESHOULD = 64;
        uint32_t length, num_blocks, quo, rem;
        int32_t _max, _min;
        uint64_t data;
        layer *summary, **sublayers;
        layer(const uint32_t length)
            : length(length), num_blocks((int)sqrt(length)),
                quo(length / num_blocks), rem(length % num_blocks),
                    _max(-1), _min(length), data(0ULL){
            if(length <= THRESHOULD) return;
            summary = new layer(num_blocks);
            sublayers = new layer*[num_blocks];
            for(uint32_t i = 0; i < num_blocks; ++i){
                sublayers[i] = new layer(quo + (i < rem));
            }
        }
        layer(const layer& another)
            : length(another.length), num_blocks(another.num_blocks), quo(another.quo), rem(another.rem),
                _max(another._max), _min(another._min), data(another.data){
            if(length <= THRESHOULD) return;
            summary = new layer(*another.summary);
            sublayers = new layer*[num_blocks];
            for(uint32_t i = 0; i < num_blocks; ++i){
                sublayers[i] = new layer(*another.sublayers[i]);
            }
        }
        layer(layer&& another)
            : length(move(another.length)), num_blocks(move(another.num_blocks)),
                quo(move(another.quo)), rem(move(another.rem)),
                    _max(move(another._max)), _min(move(another._min)), data(move(another.data)){
            if(length <= THRESHOULD) return;
            summary = another.summary, sublayers = another.sublayers;
            another.summary = nullptr, another.sublayers = nullptr;
        }
        layer& operator=(const layer& another){
            length = another.length, num_blocks = another.num_blocks, quo = another.quo, rem = another.rem;
            _max = another._max, _min = another._min, data = another.data;
            this->~layer();
            if(length <= THRESHOULD) return *this;
            summary = new layer(*another.summary);
            sublayers = new layer*[num_blocks];
            for(uint32_t i = 0; i < num_blocks; ++i){
                sublayers[i] = new layer(*another.sublayers[i]);
            }
            return *this;
        }
        layer& operator=(layer&& another){
            this->~layer();
            length = move(another.length), num_blocks = move(another.num_blocks);
            quo = move(another.quo), rem = move(another.rem);
            _max = move(another._max), _min = move(another._min), data = move(another.data);
            if(length <= THRESHOULD) return *this;
            summary = another.summary, sublayers = another.sublayers;
            another.summary = nullptr, another.sublayers = nullptr;
            return *this;
        }
        // ~layer(){
        //     if(length <= THRESHOULD) return;
        //     delete summary;
        //     for(uint32_t i = 0; i < num_blocks; ++i){
        //         delete sublayers[i];
        //     }
        //     delete[] sublayers;
        // }
        inline static int32_t msb(uint64_t u){
            return 63 - __builtin_clzll(u);
        }
        inline static int32_t lsb(uint64_t u){
            return __builtin_ctzll(u);
        }
        inline bool empty() const noexcept { return (_max == -1); }
        inline bool isOne() const noexcept { return (_max == _min); }
        inline int32_t max() const noexcept { return _max; }
        inline int32_t min() const noexcept { return _min; }
        inline int32_t index(const uint32_t value) const noexcept {
            return (value >= rem * (quo + 1))
                ? (rem + (value - rem * (quo + 1)) / quo) : (value / (quo + 1));
        }
        inline int32_t sum(uint32_t index) const noexcept {
            return (index > rem)
                ? (rem * (quo + 1) + (index - rem) * quo) : (index * (quo + 1));
        }
        bool small_find(const int32_t value) const noexcept {
            return (data >> value) & 1ULL;
        }
        bool find(const int32_t value) const noexcept {
            if(value == _min) return true;
            if(length <= THRESHOULD) return small_find(value);
            const int32_t id = index(value);
            return sublayers[id]->find(value - sum(id));
        }
        bool small_insert(const int32_t value) noexcept {
            if(small_find(value)) return false;
            data ^= (1ULL << value);
            return true;
        }
        bool insert(int32_t value) noexcept {
            if(empty()){
                _max = _min = value;
                return true;
            }else if(value == _min){
                return false;
            }else if(value < _min){
                swap(value, _min);
            }else if(value > _max){
                _max = value;
            }
            if(length <= THRESHOULD) return small_insert(value);
            const int32_t id = index(value);
            if(sublayers[id]->insert(value - sum(id))){
                if(sublayers[id]->isOne()) summary->insert(id);
                return true;
            }else{
                return false;
            }
        }
        void small_erase(const int32_t value){
            if(value == _min){
                _min = lsb(data);
                data ^= (1ULL << _min);
            }else{
                data ^= (1ULL << value);
                if(data == 0) _max = _min;
                else if(value == _max){
                    _max = msb(data);
                }
            }
        }
        void erase(int32_t value){
            if(isOne()){
                _max = -1, _min = length;
                return;
            }
            if(length <= THRESHOULD) return small_erase(value);
            if(value == _min){
                const int32_t id = summary->min();
                 _min = value = sum(id) + sublayers[id]->min();
            }
            const int32_t id = index(value);
            sublayers[id]->erase(value - sum(id));
            if(sublayers[id]->empty()){
                summary->erase(id);
            }
            if(value == _max){
                if(summary->empty()){
                    _max = _min;
                }else{
                    const int32_t id = summary->max();
                    _max = sum(id) + sublayers[id]->max();
                }
            }
        }
        int32_t small_predecessor(const int32_t value) const noexcept {
            const uint64_t tmp = (data & ((1ULL << value) - 1ULL));
            return tmp ? msb(tmp) : _min;
        }
        int32_t predecessor(const int32_t value) const noexcept {
            if(_min >= value){
                return -1;
            }else if(value > _max){
                return _max;
            }
            if(length <= THRESHOULD) return small_predecessor(value);
            const int32_t id = index(value), sm = sum(id);
            if(value > sm + sublayers[id]->min()){
                return sm + sublayers[id]->predecessor(value - sm);
            }else{
                const int32_t id2 = summary->predecessor(id);
                return (id2 >= 0) ? (sum(id2) + sublayers[id2]->max()) : _min;
            }
        }
        int32_t small_successor(const int32_t value) const noexcept {
            return lsb(data & ~((1ULL << (value + 1)) - 1ULL));
        }
        int32_t successor(const int32_t value) const noexcept {
            if(value < _min){
                return _min;
            }else if(value >= _max){
                return length;
            }
            if(length <= THRESHOULD) return small_successor(value);
            const int32_t id = index(value), sm = sum(id);
            if(value < sm + sublayers[id]->max()){
                return sm + sublayers[id]->successor(value - sm);
            }else{
                const int32_t id2 = summary->successor(id);
                return sum(id2) + sublayers[id2]->min();
            }
        }
    };
    layer base_layer;
public:
    vanEmdeBoasTree(const uint32_t length) : LENGTH(length), _size(0), base_layer(length){}
    vanEmdeBoasTree(const vanEmdeBoasTree&) = default;
    vanEmdeBoasTree(vanEmdeBoasTree&&) = default;
    vanEmdeBoasTree& operator=(const vanEmdeBoasTree&) = default;
    vanEmdeBoasTree& operator=(vanEmdeBoasTree&&) = default;
    friend ostream& operator<< (ostream& os, vanEmdeBoasTree& veb) noexcept {
        for(uint32_t st = veb.successor(-1); st != veb.LENGTH; st = veb.successor(st))
            os << st << " ";
        return os;
    }
    bool empty() const noexcept { return (_size == 0); }
    uint32_t size() const noexcept { return _size; }
    uint32_t max_size() const noexcept { return LENGTH; }
    bool find(const uint32_t value) const noexcept {
        if(value >= LENGTH) return false;
        return base_layer.find(value);
    }
    uint32_t count(const uint32_t value) const noexcept {
        return find(value);
    }
    int32_t max() const noexcept { return base_layer.max(); }
    int32_t min() const noexcept { return base_layer.min(); }
    void insert(const uint32_t value){
        assert(value < LENGTH);
        _size += base_layer.insert(value);
    }
    void erase(const uint32_t value){
        assert(value < LENGTH);
        base_layer.erase(value), --_size;
    }
    int32_t predecessor(const int32_t value) const noexcept {
        return base_layer.predecessor(value);
    }
    int32_t successor(const int32_t value) const noexcept {
        return base_layer.successor(value);
    }
};

コード($2^{30}$ 未満の整数に特化した高速版)

class vanEmdeBoasTree
{
private:
    uint32_t  _size;
    #define msb(u) (63 - __builtin_clzll(u))
    #define lsb(u) (__builtin_ctzll(u))
    struct first_layer;
    struct middle_layer;
    struct last_layer;
    struct first_layer {
        uint64_t *data;
        middle_layer *summary;
        int32_t _max, _min;
        first_layer() : _max(-1), _min(1073741824){
            data = new uint64_t[16777216](), summary = new middle_layer();
        }
        first_layer(const first_layer& another) : _max(another._max), _min(another._min){
            data = new uint64_t[16777216];
            for(uint32_t i = 0; i < 16777216; ++i) data[i] = another.data[i];
            summary = new middle_layer(*another.summary);
        }
        first_layer(first_layer&& another)
            : _max(move(another._max)), _min(move(another._min)){
            data = another.data, summary = another.summary;
            data = nullptr, summary = nullptr;
        }
        first_layer& operator=(const first_layer& another){
            this->~first_layer();
            _max = another._max, _min = another._min;
            data = new uint64_t[16777216];
            for(uint32_t i = 0; i < 16777216; ++i) data[i] = another.data[i];
            summary = new middle_layer(*another.summary);
            return *this;
        }
        first_layer& operator=(first_layer&& another){
            this->~first_layer();
            _max = move(another._max), _min = move(another._min);
            data = another.data, summary = another.summary;
            data = nullptr, summary = nullptr;
            return *this;
        }
        // ~first_layer(){
        //     delete summary;
        //     delete[] data;
        // }
        inline bool empty() const noexcept { return (_max == -1); }
        inline bool isOne() const noexcept { return (_max == _min); }
        inline int32_t max() const noexcept { return _max; }
        inline int32_t min() const noexcept { return _min; }
        bool find(const int32_t value) const noexcept {
            return (value == _min) || ((data[value >> 6] >> (value & 63)) & 1ULL);
        }
        bool insert(int32_t value) noexcept {
            if(value == _min) return false;
            else if(_max == -1) return _max = _min = value, true;
            else if(value < _min) swap(value, _min);
            else if(value > _max) _max = value;
            const int32_t id = (value >> 6);
            if((data[id] >> (value & 63)) & 1ULL) return false;
            else{
                if(data[id] == 0) summary->insert(id);
                data[id] ^= (1ULL << (value & 63));
                return true;
            }
        }
        size_t erase(int32_t value) noexcept {
            if(value == _min){
                if(_max == _min){
                    _max = -1, _min = 1073741824;
                    return 1;
                }
                const int32_t id = summary->min();
                _min = value = (id << 6) + lsb(data[id]);
            }else if(!((data[value >> 6] >> (value & 63)) & 1ULL)){
                return 0;
            }
            const int32_t id = (value >> 6);
            data[id] ^= (1ULL << (value & 63));
            if(data[id] == 0) summary->erase(id);
            if(value == _max){
                if(summary->empty()) _max = _min;
                else{
                    const int32_t id = summary->max();
                    _max = (id << 6) + msb(data[id]);
                }
            }
            return 1;
        }
        int32_t predecessor(const int32_t value) const noexcept {
            if(_min >= value) return -1;
            else if(value > _max) return _max;
            const int32_t id = (value >> 6), sm = (id << 6);
            if(data[id] && value > sm + lsb(data[id]))
                return sm + msb(data[id] & ((1ULL << (value & 63)) - 1ULL));
            else{
                const int32_t id2 = summary->predecessor(id);
                return (id2 >= 0) ? ((id2 << 6) + msb(data[id2])) : _min;
            }
        }
        int32_t successor(const int32_t value) const noexcept {
            if(value < _min) return _min;
            else if(value >= _max) return 1073741824;
            const int32_t id = (value >> 6), sm = (id << 6);
            if(data[id] && value < sm + msb(data[id]))
                return sm + lsb(data[id] & ~((1ULL << ((value & 63) + 1)) - 1ULL));
            else{
                const int32_t id2 = summary->successor(id);
                return (id2 << 6) + lsb(data[id2]);
            }
        }
    };
    struct middle_layer{
        last_layer *sublayers, *summary;
        int32_t _max, _min;
        middle_layer() : _max(-1), _min(16777216){
            sublayers = new last_layer[4096](), summary = new last_layer();
        }
        middle_layer(const middle_layer& another) : _max(another._max), _min(another._min){
            sublayers = new last_layer[4096];
            for(uint32_t i = 0; i < 4096; ++i)
                sublayers[i] = last_layer(another.sublayers[i]);
            summary = new last_layer(*another.summary);
        }
        middle_layer(middle_layer&& another)
            : _max(move(another._max)), _min(move(another._min)){
            sublayers = another.sublayers, summary = another.summary;
            another.sublayers = another.summary = nullptr;
        }
        middle_layer& operator=(const middle_layer& another){
            this->~middle_layer();
            _max = another._max, _min = another._min;
            sublayers = new last_layer[4096];
            for(uint32_t i = 0; i < 4096; ++i)
                sublayers[i] = last_layer(another.sublayers[i]);
            summary = new last_layer(*another.summary);
            return *this;
        }
        middle_layer& operator=(middle_layer&& another){
            this->~middle_layer();
            _max = move(another._max), _min = move(another._min);
            sublayers = another.sublayers, summary = another.summary;
            another.sublayers = another.summary = nullptr;
            return *this;
        }
        // ~middle_layer(){
        //     delete summary;
        //     delete[] sublayers;
        // }
        inline bool empty() const noexcept { return (_max == -1); }
        inline bool isOne() const noexcept { return (_max == _min); }
        inline int32_t max() const noexcept { return _max; }
        inline int32_t min() const noexcept { return _min; }
        bool insert(int32_t value) noexcept {
            if(value == _min) return false;
            else if(_max == -1) return _max = _min = value, true;
            else if(value < _min) swap(value, _min);
            else if(value > _max) _max = value;
            const int32_t id = (value >> 12);
            if(sublayers[id].insert(value & 4095)){
                if(sublayers[id].isOne()) summary->insert(id);
                return true;
            }else return false;
        }
        void erase(int32_t value) noexcept {
            if(_max == _min){
                _max = -1, _min = 16777216;
                return;
            }else if(value == _min){
                const int32_t id = summary->min();
                 _min = value = (id << 12) + sublayers[id].min();
            }
            const int32_t id = (value >> 12);
            sublayers[id].erase(value & 4095);
            if(sublayers[id].empty()) summary->erase(id);
            if(value == _max){
                if(summary->empty()) _max = _min;
                else{
                    const int32_t id = summary->max();
                    _max = (id << 12) + sublayers[id].max();
                }
            }
        }
        int32_t predecessor(const int32_t value) const noexcept {
            if(_min >= value) return -1;
            else if(value > _max) return _max;
            const int32_t id = (value >> 12), sm = (id << 12);
            if(value > sm + sublayers[id].min()){
                return sm + sublayers[id].predecessor(value & 4095);
            }else{
                const int32_t id2 = summary->predecessor(id);
                return (id2 >= 0) ? ((id2 << 12) + sublayers[id2].max()) : _min;
            }
        }
        int32_t successor(const int32_t value) const noexcept {
            if(value < _min) return _min;
            else if(value >= _max) return 16777216;
            const int32_t id = (value >> 12), sm = (id << 12);
            if(value < sm + sublayers[id].max()){
                return sm + sublayers[id].successor(value & 4095);
            }else{
                const int32_t id2 = summary->successor(id);
                return (id2 << 12) + sublayers[id2].min();
            }
        }
    };
    struct last_layer{
        uint64_t data[64], summary;
        int32_t _max, _min;
        last_layer() noexcept : summary(0ULL), _max(-1), _min(4096){
            memset(data, 0, sizeof(data));
        }
        inline bool empty() const noexcept { return (_max == -1); }
        inline bool isOne() const noexcept { return (_max == _min); }
        inline int32_t max() const noexcept { return _max; }
        inline int32_t min() const noexcept { return _min; }
        bool insert(int32_t value) noexcept {
            if(value == _min) return false;
            else if(_max == -1) return _max = _min = value, true;
            else if(value < _min) swap(value, _min);
            else if(value > _max) _max = value;
            const int32_t id = (value >> 6);
            if((data[id] >> (value & 63)) & 1ULL) return false;
            else{
                data[id] ^= (1ULL << (value & 63)), summary |= (1ULL << id);
                return true;
            }
        }
        void erase(int32_t value) noexcept {
            if(_max == _min){
                _max = -1, _min = 4096;
                return;
            }else if(value == _min){
                const int32_t id = lsb(summary);
                _min = value = (id << 6) + lsb(data[id]);
            }
            const int32_t id = (value >> 6);
            data[id] ^= (1ULL << (value & 63));
            if(data[id] == 0) summary ^= (1ULL << id);
            if(value == _max){
                if(summary == 0) _max = _min;
                else{
                    const int32_t id = msb(summary);
                    _max = (id << 6) + msb(data[id]);
                }
            }
        }
        int32_t predecessor(const int32_t value) const noexcept {
            if(_min >= value) return -1;
            else if(value > _max) return _max;
            const int32_t id = (value >> 6), sm = (id << 6);
            if(data[id] && value > sm + lsb(data[id]))
                return sm + msb(data[id] & ((1ULL << (value & 63)) - 1ULL));
            else{
                const uint64_t tmp = (summary & ((1ULL << id) - 1ULL));
                if(tmp == 0ULL) return _min;
                else{
                    const int32_t id2 = msb(tmp);
                    return (id2 << 6) + msb(data[id2]);
                }
            }
        }
        int32_t successor(const int32_t value) const noexcept {
            if(value < _min) return _min;
            else if(value >= _max) return 4096;
            const int32_t id = (value >> 6), sm = (id << 6);
            if(data[id] && value < sm + msb(data[id]))
                return sm + lsb(data[id] & ~((1ULL << ((value & 63) + 1)) - 1ULL));
            else{
                const int32_t id2 = lsb(summary & ~((1ULL << (id + 1)) - 1ULL));
                return (id2 << 6) + lsb(data[id2]);
            }
        }
    };
    first_layer base_layer;
public:
    static const uint32_t  LENGTH = 1073741824;
    vanEmdeBoasTree() : _size(0), base_layer(){}
    vanEmdeBoasTree(const vanEmdeBoasTree&) = default;
    vanEmdeBoasTree(vanEmdeBoasTree&&) = default;
    vanEmdeBoasTree& operator=(const vanEmdeBoasTree&) = default;
    vanEmdeBoasTree& operator=(vanEmdeBoasTree&&) = default;
    friend ostream& operator<< (ostream& os, vanEmdeBoasTree& veb) noexcept {
        for(uint32_t st = veb.successor(-1); st != veb.LENGTH; st = veb.successor(st))
            os << st << " ";
        return os;
    }
    bool empty() const noexcept { return (_size == 0); }
    uint32_t  size() const noexcept { return _size; }
    uint32_t  max_size() const noexcept { return LENGTH; }
    bool find(const uint32_t value) const noexcept {
        if(value >= LENGTH) return false;
        return base_layer.find(value);
    }
    uint32_t  count(const uint32_t value) const noexcept {
        return find(value);
    }
    int32_t max() const noexcept { return base_layer.max(); }
    int32_t min() const noexcept { return base_layer.min(); }
    bool insert(const uint32_t value){
        assert(value < LENGTH);
        bool res = base_layer.insert(value);
        return _size += res, res;
    }
    size_t erase(const uint32_t value){
        assert(value < LENGTH);
        size_t res = base_layer.erase(value);
        return _size -= res, res;
    }
    int32_t predecessor(const int32_t value) const noexcept {
        return base_layer.predecessor(value);
    }
    int32_t successor(const int32_t value) const noexcept {
        return base_layer.successor(value);
    }
};

verify 用の問題

AOJ : Set - Set: Range Search 提出コード(愚直版) 提出コード(高速版)