FC Updates from BitShares and myself #21

Closed
nathanielhourt wants to merge 687 commits from dapp-support into latest-fc
Showing only changes of commit e5ab69acff - Show all commits

View file

@ -1,5 +1,3 @@
#pragma once
/* /*
********************************************************************* *********************************************************************
* * * *
@ -11,27 +9,29 @@
* * * *
* Copyright notice: * * Copyright notice: *
* Free use of the Open Bloom Filter Library is permitted under the * * Free use of the Open Bloom Filter Library is permitted under the *
* guidelines and in accordance with the most current version of the * * guidelines and in accordance with the MIT License. *
* Common Public License. * * http://www.opensource.org/licenses/MIT *
* http://www.opensource.org/licenses/cpl1.0.php *
* * * *
********************************************************************* *********************************************************************
*/ */
#ifndef INCLUDE_BLOOM_FILTER_HPP
#define INCLUDE_BLOOM_FILTER_HPP
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include <cstddef> #include <cstddef>
#include <cstdlib>
#include <iterator> #include <iterator>
#include <limits> #include <limits>
#include <string> #include <string>
#include <vector> #include <vector>
#include <fc/reflect/reflect.hpp>
namespace fc { namespace fc {
static constexpr std::size_t bits_per_char = 0x08; // 8 bits in 1 char(unsigned) static constexpr std::size_t bits_per_char = 0x08; // 8 bits in 1 char(unsigned)
static const unsigned char bit_mask[bits_per_char] = { static const unsigned char bit_mask[bits_per_char] = {
0x01, //00000001 0x01, //00000001
0x02, //00000010 0x02, //00000010
@ -87,22 +87,22 @@ public:
(0xFFFFFFFFFFFFFFFFULL == random_seed); (0xFFFFFFFFFFFFFFFFULL == random_seed);
} }
//Allowed min/max size of the bloom filter in bits // Allowable min/max size of the bloom filter in bits
unsigned long long int minimum_size; unsigned long long int minimum_size;
unsigned long long int maximum_size; unsigned long long int maximum_size;
//Allowed min/max number of hash functions // Allowable min/max number of hash functions
unsigned int minimum_number_of_hashes; unsigned int minimum_number_of_hashes;
unsigned int maximum_number_of_hashes; unsigned int maximum_number_of_hashes;
//The approximate number of elements to be inserted // The approximate number of elements to be inserted
//into the bloom filter, should be within one order // into the bloom filter, should be within one order
//of magnitude. The default is 10000. // of magnitude. The default is 10000.
unsigned long long int projected_element_count; unsigned long long int projected_element_count;
//The approximate false positive probability expected // The approximate false positive probability expected
//from the bloom filter. The default is the reciprocal // from the bloom filter. The default is assumed to be
//of the projected_element_count. // the reciprocal of the projected_element_count.
double false_positive_probability; double false_positive_probability;
unsigned long long int random_seed; unsigned long long int random_seed;
@ -133,28 +133,32 @@ public:
if (!(*this)) if (!(*this))
return false; return false;
double min_m = std::numeric_limits<double>::infinity(); double min_m = std::numeric_limits<double>::infinity();
double min_k = 0.0; double min_k = 0.0;
double curr_m = 0.0; double k = 1.0;
double k = 1.0;
while (k < 1000.0) while (k < 1000.0)
{ {
double numerator = (- k * projected_element_count); const double numerator = (- k * projected_element_count);
double denominator = std::log(1.0 - std::pow(false_positive_probability, 1.0 / k)); const double denominator = std::log(1.0 - std::pow(false_positive_probability, 1.0 / k));
curr_m = numerator / denominator;
const double curr_m = numerator / denominator;
if (curr_m < min_m) if (curr_m < min_m)
{ {
min_m = curr_m; min_m = curr_m;
min_k = k; min_k = k;
} }
k += 1.0; k += 1.0;
} }
optimal_parameters_t& optp = optimal_parameters; optimal_parameters_t& optp = optimal_parameters;
optp.number_of_hashes = static_cast<unsigned int>(min_k); optp.number_of_hashes = static_cast<unsigned int>(min_k);
optp.table_size = static_cast<unsigned long long int>(min_m); optp.table_size = static_cast<unsigned long long int>(min_m);
optp.table_size += (((optp.table_size % bits_per_char) != 0) ? (bits_per_char - (optp.table_size % bits_per_char)) : 0); optp.table_size += (((optp.table_size % bits_per_char) != 0) ? (bits_per_char - (optp.table_size % bits_per_char)) : 0);
if (optp.number_of_hashes < minimum_number_of_hashes) if (optp.number_of_hashes < minimum_number_of_hashes)
@ -178,15 +182,15 @@ protected:
typedef unsigned int bloom_type; typedef unsigned int bloom_type;
typedef unsigned char cell_type; typedef unsigned char cell_type;
typedef std::vector<unsigned char> table_type;
public: public:
bloom_filter() bloom_filter()
: salt_count_(0), : salt_count_(0),
table_size_(0), table_size_(0),
raw_table_size_(0),
projected_element_count_(0), projected_element_count_(0),
inserted_element_count_(0), inserted_element_count_ (0),
random_seed_(0), random_seed_(0),
desired_false_positive_probability_(0.0) desired_false_positive_probability_(0.0)
{} {}
@ -199,12 +203,10 @@ public:
{ {
salt_count_ = p.optimal_parameters.number_of_hashes; salt_count_ = p.optimal_parameters.number_of_hashes;
table_size_ = p.optimal_parameters.table_size; table_size_ = p.optimal_parameters.table_size;
generate_unique_salt();
raw_table_size_ = table_size_ / bits_per_char;
bit_table_.resize( static_cast<std::size_t>(raw_table_size_) ); generate_unique_salt();
//bit_table_ = new cell_type[static_cast<std::size_t>(raw_table_size_)];
std::fill_n(bit_table_.data(),raw_table_size_,0x00); bit_table_.resize(table_size_ / bits_per_char, static_cast<unsigned char>(0x00));
} }
bloom_filter(const bloom_filter& filter) bloom_filter(const bloom_filter& filter)
@ -217,15 +219,15 @@ public:
if (this != &f) if (this != &f)
{ {
return return
(salt_count_ == f.salt_count_) && (salt_count_ == f.salt_count_ ) &&
(table_size_ == f.table_size_) && (table_size_ == f.table_size_ ) &&
(raw_table_size_ == f.raw_table_size_) && (bit_table_.size() == f.bit_table_.size() ) &&
(projected_element_count_ == f.projected_element_count_) && (projected_element_count_ == f.projected_element_count_ ) &&
(inserted_element_count_ == f.inserted_element_count_) && (inserted_element_count_ == f.inserted_element_count_ ) &&
(random_seed_ == f.random_seed_) && (random_seed_ == f.random_seed_ ) &&
(desired_false_positive_probability_ == f.desired_false_positive_probability_) && (desired_false_positive_probability_ == f.desired_false_positive_probability_) &&
(salt_ == f.salt_) && (salt_ == f.salt_ ) &&
std::equal(f.bit_table_.data(),f.bit_table_.data() + raw_table_size_,bit_table_.data()); (bit_table_ == f.bit_table_ ) ;
} }
else else
return true; return true;
@ -242,21 +244,22 @@ public:
{ {
salt_count_ = f.salt_count_; salt_count_ = f.salt_count_;
table_size_ = f.table_size_; table_size_ = f.table_size_;
raw_table_size_ = f.raw_table_size_; bit_table_ = f.bit_table_;
salt_ = f.salt_;
projected_element_count_ = f.projected_element_count_; projected_element_count_ = f.projected_element_count_;
inserted_element_count_ = f.inserted_element_count_; inserted_element_count_ = f.inserted_element_count_;
random_seed_ = f.random_seed_; random_seed_ = f.random_seed_;
desired_false_positive_probability_ = f.desired_false_positive_probability_; desired_false_positive_probability_ = f.desired_false_positive_probability_;
bit_table_.resize( raw_table_size_ );
std::copy(f.bit_table_.data(),f.bit_table_.data() + raw_table_size_,bit_table_.data());
salt_ = f.salt_;
} }
return *this; return *this;
} }
virtual ~bloom_filter() virtual ~bloom_filter()
{ {}
}
inline bool operator!() const inline bool operator!() const
{ {
@ -265,23 +268,26 @@ public:
inline void clear() inline void clear()
{ {
std::fill_n(bit_table_.data(),raw_table_size_,0x00); std::fill(bit_table_.begin(), bit_table_.end(), static_cast<unsigned char>(0x00));
inserted_element_count_ = 0; inserted_element_count_ = 0;
} }
inline void insert(const unsigned char* key_begin, const std::size_t& length) inline void insert(const unsigned char* key_begin, const std::size_t& length)
{ {
std::size_t bit_index = 0; std::size_t bit_index = 0;
std::size_t bit = 0; std::size_t bit = 0;
for (std::size_t i = 0; i < salt_.size(); ++i) for (std::size_t i = 0; i < salt_.size(); ++i)
{ {
compute_indices(hash_ap(key_begin,length,salt_[i]),bit_index,bit); compute_indices(hash_ap(key_begin, length, salt_[i]), bit_index, bit);
bit_table_[bit_index / bits_per_char] |= bit_mask[bit]; bit_table_[bit_index / bits_per_char] |= bit_mask[bit];
} }
++inserted_element_count_; ++inserted_element_count_;
} }
template<typename T> template <typename T>
inline void insert(const T& t) inline void insert(const T& t)
{ {
// Note: T must be a C++ POD type. // Note: T must be a C++ POD type.
@ -290,7 +296,7 @@ public:
inline void insert(const std::string& key) inline void insert(const std::string& key)
{ {
insert(reinterpret_cast<const unsigned char*>(key.c_str()),key.size()); insert(reinterpret_cast<const unsigned char*>(key.data()),key.size());
} }
inline void insert(const char* data, const std::size_t& length) inline void insert(const char* data, const std::size_t& length)
@ -298,10 +304,11 @@ public:
insert(reinterpret_cast<const unsigned char*>(data),length); insert(reinterpret_cast<const unsigned char*>(data),length);
} }
template<typename InputIterator> template <typename InputIterator>
inline void insert(const InputIterator begin, const InputIterator end) inline void insert(const InputIterator begin, const InputIterator end)
{ {
InputIterator itr = begin; InputIterator itr = begin;
while (end != itr) while (end != itr)
{ {
insert(*(itr++)); insert(*(itr++));
@ -311,19 +318,22 @@ public:
inline virtual bool contains(const unsigned char* key_begin, const std::size_t length) const inline virtual bool contains(const unsigned char* key_begin, const std::size_t length) const
{ {
std::size_t bit_index = 0; std::size_t bit_index = 0;
std::size_t bit = 0; std::size_t bit = 0;
for (std::size_t i = 0; i < salt_.size(); ++i) for (std::size_t i = 0; i < salt_.size(); ++i)
{ {
compute_indices(hash_ap(key_begin,length,salt_[i]),bit_index,bit); compute_indices(hash_ap(key_begin, length, salt_[i]), bit_index, bit);
if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit]) if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit])
{ {
return false; return false;
} }
} }
return true; return true;
} }
template<typename T> template <typename T>
inline bool contains(const T& t) const inline bool contains(const T& t) const
{ {
return contains(reinterpret_cast<const unsigned char*>(&t),static_cast<std::size_t>(sizeof(T))); return contains(reinterpret_cast<const unsigned char*>(&t),static_cast<std::size_t>(sizeof(T)));
@ -339,33 +349,39 @@ public:
return contains(reinterpret_cast<const unsigned char*>(data),length); return contains(reinterpret_cast<const unsigned char*>(data),length);
} }
template<typename InputIterator> template <typename InputIterator>
inline InputIterator contains_all(const InputIterator begin, const InputIterator end) const inline InputIterator contains_all(const InputIterator begin, const InputIterator end) const
{ {
InputIterator itr = begin; InputIterator itr = begin;
while (end != itr) while (end != itr)
{ {
if (!contains(*itr)) if (!contains(*itr))
{ {
return itr; return itr;
} }
++itr; ++itr;
} }
return end; return end;
} }
template<typename InputIterator> template <typename InputIterator>
inline InputIterator contains_none(const InputIterator begin, const InputIterator end) const inline InputIterator contains_none(const InputIterator begin, const InputIterator end) const
{ {
InputIterator itr = begin; InputIterator itr = begin;
while (end != itr) while (end != itr)
{ {
if (contains(*itr)) if (contains(*itr))
{ {
return itr; return itr;
} }
++itr; ++itr;
} }
return end; return end;
} }
@ -374,7 +390,7 @@ public:
return table_size_; return table_size_;
} }
inline std::size_t element_count() const inline unsigned long long int element_count() const
{ {
return inserted_element_count_; return inserted_element_count_;
} }
@ -395,16 +411,17 @@ public:
{ {
/* intersection */ /* intersection */
if ( if (
(salt_count_ == f.salt_count_) && (salt_count_ == f.salt_count_ ) &&
(table_size_ == f.table_size_) && (table_size_ == f.table_size_ ) &&
(random_seed_ == f.random_seed_) (random_seed_ == f.random_seed_)
) )
{ {
for (std::size_t i = 0; i < raw_table_size_; ++i) for (std::size_t i = 0; i < bit_table_.size(); ++i)
{ {
bit_table_[i] &= f.bit_table_[i]; bit_table_[i] &= f.bit_table_[i];
} }
} }
return *this; return *this;
} }
@ -412,16 +429,17 @@ public:
{ {
/* union */ /* union */
if ( if (
(salt_count_ == f.salt_count_) && (salt_count_ == f.salt_count_ ) &&
(table_size_ == f.table_size_) && (table_size_ == f.table_size_ ) &&
(random_seed_ == f.random_seed_) (random_seed_ == f.random_seed_)
) )
{ {
for (std::size_t i = 0; i < raw_table_size_; ++i) for (std::size_t i = 0; i < bit_table_.size(); ++i)
{ {
bit_table_[i] |= f.bit_table_[i]; bit_table_[i] |= f.bit_table_[i];
} }
} }
return *this; return *this;
} }
@ -429,16 +447,17 @@ public:
{ {
/* difference */ /* difference */
if ( if (
(salt_count_ == f.salt_count_) && (salt_count_ == f.salt_count_ ) &&
(table_size_ == f.table_size_) && (table_size_ == f.table_size_ ) &&
(random_seed_ == f.random_seed_) (random_seed_ == f.random_seed_)
) )
{ {
for (std::size_t i = 0; i < raw_table_size_; ++i) for (std::size_t i = 0; i < bit_table_.size(); ++i)
{ {
bit_table_[i] ^= f.bit_table_[i]; bit_table_[i] ^= f.bit_table_[i];
} }
} }
return *this; return *this;
} }
@ -457,7 +476,7 @@ protected:
inline virtual void compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const inline virtual void compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const
{ {
bit_index = hash % table_size_; bit_index = hash % table_size_;
bit = bit_index % bits_per_char; bit = bit_index % bits_per_char;
} }
void generate_unique_salt() void generate_unique_salt()
@ -469,6 +488,7 @@ protected:
hash function with different values seems to be adequate. hash function with different values seems to be adequate.
*/ */
const unsigned int predef_salt_count = 128; const unsigned int predef_salt_count = 128;
static const bloom_type predef_salt[predef_salt_count] = static const bloom_type predef_salt[predef_salt_count] =
{ {
0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC, 0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC,
@ -510,25 +530,31 @@ protected:
std::copy(predef_salt, std::copy(predef_salt,
predef_salt + salt_count_, predef_salt + salt_count_,
std::back_inserter(salt_)); std::back_inserter(salt_));
for (unsigned int i = 0; i < salt_.size(); ++i)
{ for (std::size_t i = 0; i < salt_.size(); ++i)
{
/* /*
Note: Note:
This is done to integrate the user defined random seed, This is done to integrate the user defined random seed,
so as to allow for the generation of unique bloom filter so as to allow for the generation of unique bloom filter
instances. instances.
*/ */
salt_[i] = salt_[i] * salt_[(i + 3) % salt_.size()] + static_cast<bloom_type>(random_seed_); salt_[i] = salt_[i] * salt_[(i + 3) % salt_.size()] + static_cast<bloom_type>(random_seed_);
} }
} }
else else
{ {
std::copy(predef_salt,predef_salt + predef_salt_count,std::back_inserter(salt_)); std::copy(predef_salt, predef_salt + predef_salt_count, std::back_inserter(salt_));
srand(static_cast<unsigned int>(random_seed_)); srand(static_cast<unsigned int>(random_seed_));
while (salt_.size() < salt_count_) while (salt_.size() < salt_count_)
{ {
bloom_type current_salt = static_cast<bloom_type>(rand()) * static_cast<bloom_type>(rand()); bloom_type current_salt = static_cast<bloom_type>(rand()) * static_cast<bloom_type>(rand());
if (0 == current_salt) continue;
if (0 == current_salt)
continue;
if (salt_.end() == std::find(salt_.begin(), salt_.end(), current_salt)) if (salt_.end() == std::find(salt_.begin(), salt_.end(), current_salt))
{ {
salt_.push_back(current_salt); salt_.push_back(current_salt);
@ -540,57 +566,71 @@ protected:
inline bloom_type hash_ap(const unsigned char* begin, std::size_t remaining_length, bloom_type hash) const inline bloom_type hash_ap(const unsigned char* begin, std::size_t remaining_length, bloom_type hash) const
{ {
const unsigned char* itr = begin; const unsigned char* itr = begin;
unsigned int loop = 0; unsigned int loop = 0;
while (remaining_length >= 8) while (remaining_length >= 8)
{ {
const unsigned int& i1 = *(reinterpret_cast<const unsigned int*>(itr)); itr += sizeof(unsigned int); const unsigned int& i1 = *(reinterpret_cast<const unsigned int*>(itr)); itr += sizeof(unsigned int);
const unsigned int& i2 = *(reinterpret_cast<const unsigned int*>(itr)); itr += sizeof(unsigned int); const unsigned int& i2 = *(reinterpret_cast<const unsigned int*>(itr)); itr += sizeof(unsigned int);
hash ^= (hash << 7) ^ i1 * (hash >> 3) ^ hash ^= (hash << 7) ^ i1 * (hash >> 3) ^
(~((hash << 11) + (i2 ^ (hash >> 5)))); (~((hash << 11) + (i2 ^ (hash >> 5))));
remaining_length -= 8; remaining_length -= 8;
} }
if (remaining_length) if (remaining_length)
{ {
if (remaining_length >= 4) if (remaining_length >= 4)
{ {
const unsigned int& i = *(reinterpret_cast<const unsigned int*>(itr)); const unsigned int& i = *(reinterpret_cast<const unsigned int*>(itr));
if (loop & 0x01) if (loop & 0x01)
hash ^= (hash << 7) ^ i * (hash >> 3); hash ^= (hash << 7) ^ i * (hash >> 3);
else else
hash ^= (~((hash << 11) + (i ^ (hash >> 5)))); hash ^= (~((hash << 11) + (i ^ (hash >> 5))));
++loop; ++loop;
remaining_length -= 4; remaining_length -= 4;
itr += sizeof(unsigned int); itr += sizeof(unsigned int);
} }
if (remaining_length >= 2) if (remaining_length >= 2)
{ {
const unsigned short& i = *(reinterpret_cast<const unsigned short*>(itr)); const unsigned short& i = *(reinterpret_cast<const unsigned short*>(itr));
if (loop & 0x01) if (loop & 0x01)
hash ^= (hash << 7) ^ i * (hash >> 3); hash ^= (hash << 7) ^ i * (hash >> 3);
else else
hash ^= (~((hash << 11) + (i ^ (hash >> 5)))); hash ^= (~((hash << 11) + (i ^ (hash >> 5))));
++loop; ++loop;
remaining_length -= 2; remaining_length -= 2;
itr += sizeof(unsigned short); itr += sizeof(unsigned short);
} }
if (remaining_length) if (remaining_length)
{ {
hash += ((*itr) ^ (hash * 0xA5A5A5A5)) + loop; hash += ((*itr) ^ (hash * 0xA5A5A5A5)) + loop;
} }
} }
return hash; return hash;
} }
public: public:
std::vector<bloom_type> salt_; std::vector<bloom_type> salt_;
std::vector<unsigned char> bit_table_; std::vector<unsigned char> bit_table_;
unsigned int salt_count_; unsigned int salt_count_;
unsigned long long int table_size_; unsigned long long int table_size_;
unsigned long long int raw_table_size_; unsigned long long int projected_element_count_;
unsigned long long int projected_element_count_; unsigned long long int inserted_element_count_;
unsigned int inserted_element_count_; unsigned long long int random_seed_;
unsigned long long int random_seed_; double desired_false_positive_probability_;
double desired_false_positive_probability_;
}; };
inline bloom_filter operator & (const bloom_filter& a, const bloom_filter& b) inline bloom_filter operator & (const bloom_filter& a, const bloom_filter& b)
@ -617,12 +657,15 @@ inline bloom_filter operator ^ (const bloom_filter& a, const bloom_filter& b)
} // namespace fc } // namespace fc
#endif
/* /*
Note 1: Note 1:
If it can be guaranteed that bits_per_char will be of the form 2^n then If it can be guaranteed that bits_per_char will be of the form 2^n then
the following optimization can be used: the following optimization can be used:
hash_table[bit_index >> n] |= bit_mask[bit_index & (bits_per_char - 1)]; bit_table_[bit_index >> n] |= bit_mask[bit_index & (bits_per_char - 1)];
Note 2: Note 2:
For performance reasons where possible when allocating memory it should For performance reasons where possible when allocating memory it should