FC Updates from BitShares and myself #21

Closed
nathanielhourt wants to merge 687 commits from dapp-support into latest-fc
Showing only changes of commit 1d59bb3d47 - Show all commits

View file

@ -1,5 +1,3 @@
#pragma once
/*
*********************************************************************
* *
@ -11,16 +9,20 @@
* *
* Copyright notice: *
* Free use of the Open Bloom Filter Library is permitted under the *
* guidelines and in accordance with the most current version of the *
* Common Public License. *
* http://www.opensource.org/licenses/cpl1.0.php *
* guidelines and in accordance with the MIT License. *
* http://www.opensource.org/licenses/MIT *
* *
*********************************************************************
*/
#ifndef INCLUDE_BLOOM_FILTER_HPP
#define INCLUDE_BLOOM_FILTER_HPP
#include <algorithm>
#include <cmath>
#include <cstddef>
#include <cstdlib>
#include <iterator>
#include <limits>
#include <string>
@ -32,6 +34,7 @@ namespace fc {
static constexpr std::size_t bits_per_char = 0x08; // 8 bits in 1 char(unsigned)
static const unsigned char bit_mask[bits_per_char] = {
0x01, //00000001
0x02, //00000010
@ -87,11 +90,11 @@ public:
(0xFFFFFFFFFFFFFFFFULL == random_seed);
}
//Allowed min/max size of the bloom filter in bits
// Allowable min/max size of the bloom filter in bits
unsigned long long int minimum_size;
unsigned long long int maximum_size;
//Allowed min/max number of hash functions
// Allowable min/max number of hash functions
unsigned int minimum_number_of_hashes;
unsigned int maximum_number_of_hashes;
@ -101,8 +104,8 @@ public:
unsigned long long int projected_element_count;
// The approximate false positive probability expected
//from the bloom filter. The default is the reciprocal
//of the projected_element_count.
// from the bloom filter. The default is assumed to be
// the reciprocal of the projected_element_count.
double false_positive_probability;
unsigned long long int random_seed;
@ -135,26 +138,30 @@ public:
double min_m = std::numeric_limits<double>::infinity();
double min_k = 0.0;
double curr_m = 0.0;
double k = 1.0;
while (k < 1000.0)
{
double numerator = (- k * projected_element_count);
double denominator = std::log(1.0 - std::pow(false_positive_probability, 1.0 / k));
curr_m = numerator / denominator;
const double numerator = (- k * projected_element_count);
const double denominator = std::log(1.0 - std::pow(false_positive_probability, 1.0 / k));
const double curr_m = numerator / denominator;
if (curr_m < min_m)
{
min_m = curr_m;
min_k = k;
}
k += 1.0;
}
optimal_parameters_t& optp = optimal_parameters;
optp.number_of_hashes = static_cast<unsigned int>(min_k);
optp.table_size = static_cast<unsigned long long int>(min_m);
optp.table_size += (((optp.table_size % bits_per_char) != 0) ? (bits_per_char - (optp.table_size % bits_per_char)) : 0);
if (optp.number_of_hashes < minimum_number_of_hashes)
@ -178,13 +185,13 @@ protected:
typedef unsigned int bloom_type;
typedef unsigned char cell_type;
typedef std::vector<unsigned char> table_type;
public:
bloom_filter()
: salt_count_(0),
table_size_(0),
raw_table_size_(0),
projected_element_count_(0),
inserted_element_count_ (0),
random_seed_(0),
@ -199,12 +206,10 @@ public:
{
salt_count_ = p.optimal_parameters.number_of_hashes;
table_size_ = p.optimal_parameters.table_size;
generate_unique_salt();
raw_table_size_ = table_size_ / bits_per_char;
bit_table_.resize( static_cast<std::size_t>(raw_table_size_) );
//bit_table_ = new cell_type[static_cast<std::size_t>(raw_table_size_)];
std::fill_n(bit_table_.data(),raw_table_size_,0x00);
generate_unique_salt();
bit_table_.resize(table_size_ / bits_per_char, static_cast<unsigned char>(0x00));
}
bloom_filter(const bloom_filter& filter)
@ -219,13 +224,13 @@ public:
return
(salt_count_ == f.salt_count_ ) &&
(table_size_ == f.table_size_ ) &&
(raw_table_size_ == f.raw_table_size_) &&
(bit_table_.size() == f.bit_table_.size() ) &&
(projected_element_count_ == f.projected_element_count_ ) &&
(inserted_element_count_ == f.inserted_element_count_ ) &&
(random_seed_ == f.random_seed_ ) &&
(desired_false_positive_probability_ == f.desired_false_positive_probability_) &&
(salt_ == f.salt_ ) &&
std::equal(f.bit_table_.data(),f.bit_table_.data() + raw_table_size_,bit_table_.data());
(bit_table_ == f.bit_table_ ) ;
}
else
return true;
@ -242,21 +247,22 @@ public:
{
salt_count_ = f.salt_count_;
table_size_ = f.table_size_;
raw_table_size_ = f.raw_table_size_;
bit_table_ = f.bit_table_;
salt_ = f.salt_;
projected_element_count_ = f.projected_element_count_;
inserted_element_count_ = f.inserted_element_count_;
random_seed_ = f.random_seed_;
desired_false_positive_probability_ = f.desired_false_positive_probability_;
bit_table_.resize( raw_table_size_ );
std::copy(f.bit_table_.data(),f.bit_table_.data() + raw_table_size_,bit_table_.data());
salt_ = f.salt_;
}
return *this;
}
virtual ~bloom_filter()
{
}
{}
inline bool operator!() const
{
@ -265,7 +271,7 @@ public:
inline void clear()
{
std::fill_n(bit_table_.data(),raw_table_size_,0x00);
std::fill(bit_table_.begin(), bit_table_.end(), static_cast<unsigned char>(0x00));
inserted_element_count_ = 0;
}
@ -273,11 +279,14 @@ public:
{
std::size_t bit_index = 0;
std::size_t bit = 0;
for (std::size_t i = 0; i < salt_.size(); ++i)
{
compute_indices(hash_ap(key_begin, length, salt_[i]), bit_index, bit);
bit_table_[bit_index / bits_per_char] |= bit_mask[bit];
}
++inserted_element_count_;
}
@ -290,7 +299,7 @@ public:
inline void insert(const std::string& key)
{
insert(reinterpret_cast<const unsigned char*>(key.c_str()),key.size());
insert(reinterpret_cast<const unsigned char*>(key.data()),key.size());
}
inline void insert(const char* data, const std::size_t& length)
@ -302,6 +311,7 @@ public:
inline void insert(const InputIterator begin, const InputIterator end)
{
InputIterator itr = begin;
while (end != itr)
{
insert(*(itr++));
@ -312,14 +322,17 @@ public:
{
std::size_t bit_index = 0;
std::size_t bit = 0;
for (std::size_t i = 0; i < salt_.size(); ++i)
{
compute_indices(hash_ap(key_begin, length, salt_[i]), bit_index, bit);
if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit])
{
return false;
}
}
return true;
}
@ -343,14 +356,17 @@ public:
inline InputIterator contains_all(const InputIterator begin, const InputIterator end) const
{
InputIterator itr = begin;
while (end != itr)
{
if (!contains(*itr))
{
return itr;
}
++itr;
}
return end;
}
@ -358,14 +374,17 @@ public:
inline InputIterator contains_none(const InputIterator begin, const InputIterator end) const
{
InputIterator itr = begin;
while (end != itr)
{
if (contains(*itr))
{
return itr;
}
++itr;
}
return end;
}
@ -374,7 +393,7 @@ public:
return table_size_;
}
inline std::size_t element_count() const
inline unsigned long long int element_count() const
{
return inserted_element_count_;
}
@ -400,11 +419,12 @@ public:
(random_seed_ == f.random_seed_)
)
{
for (std::size_t i = 0; i < raw_table_size_; ++i)
for (std::size_t i = 0; i < bit_table_.size(); ++i)
{
bit_table_[i] &= f.bit_table_[i];
}
}
return *this;
}
@ -417,11 +437,12 @@ public:
(random_seed_ == f.random_seed_)
)
{
for (std::size_t i = 0; i < raw_table_size_; ++i)
for (std::size_t i = 0; i < bit_table_.size(); ++i)
{
bit_table_[i] |= f.bit_table_[i];
}
}
return *this;
}
@ -434,11 +455,12 @@ public:
(random_seed_ == f.random_seed_)
)
{
for (std::size_t i = 0; i < raw_table_size_; ++i)
for (std::size_t i = 0; i < bit_table_.size(); ++i)
{
bit_table_[i] ^= f.bit_table_[i];
}
}
return *this;
}
@ -469,6 +491,7 @@ protected:
hash function with different values seems to be adequate.
*/
const unsigned int predef_salt_count = 128;
static const bloom_type predef_salt[predef_salt_count] =
{
0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC,
@ -510,7 +533,8 @@ protected:
std::copy(predef_salt,
predef_salt + salt_count_,
std::back_inserter(salt_));
for (unsigned int i = 0; i < salt_.size(); ++i)
for (std::size_t i = 0; i < salt_.size(); ++i)
{
/*
Note:
@ -524,11 +548,16 @@ protected:
else
{
std::copy(predef_salt, predef_salt + predef_salt_count, std::back_inserter(salt_));
srand(static_cast<unsigned int>(random_seed_));
while (salt_.size() < salt_count_)
{
bloom_type current_salt = static_cast<bloom_type>(rand()) * static_cast<bloom_type>(rand());
if (0 == current_salt) continue;
if (0 == current_salt)
continue;
if (salt_.end() == std::find(salt_.begin(), salt_.end(), current_salt))
{
salt_.push_back(current_salt);
@ -541,43 +570,58 @@ protected:
{
const unsigned char* itr = begin;
unsigned int loop = 0;
while (remaining_length >= 8)
{
const unsigned int& i1 = *(reinterpret_cast<const unsigned int*>(itr)); itr += sizeof(unsigned int);
const unsigned int& i2 = *(reinterpret_cast<const unsigned int*>(itr)); itr += sizeof(unsigned int);
hash ^= (hash << 7) ^ i1 * (hash >> 3) ^
(~((hash << 11) + (i2 ^ (hash >> 5))));
remaining_length -= 8;
}
if (remaining_length)
{
if (remaining_length >= 4)
{
const unsigned int& i = *(reinterpret_cast<const unsigned int*>(itr));
if (loop & 0x01)
hash ^= (hash << 7) ^ i * (hash >> 3);
else
hash ^= (~((hash << 11) + (i ^ (hash >> 5))));
++loop;
remaining_length -= 4;
itr += sizeof(unsigned int);
}
if (remaining_length >= 2)
{
const unsigned short& i = *(reinterpret_cast<const unsigned short*>(itr));
if (loop & 0x01)
hash ^= (hash << 7) ^ i * (hash >> 3);
else
hash ^= (~((hash << 11) + (i ^ (hash >> 5))));
++loop;
remaining_length -= 2;
itr += sizeof(unsigned short);
}
if (remaining_length)
{
hash += ((*itr) ^ (hash * 0xA5A5A5A5)) + loop;
}
}
return hash;
}
@ -586,9 +630,8 @@ public:
std::vector<unsigned char> bit_table_;
unsigned int salt_count_;
unsigned long long int table_size_;
unsigned long long int raw_table_size_;
unsigned long long int projected_element_count_;
unsigned int inserted_element_count_;
unsigned long long int inserted_element_count_;
unsigned long long int random_seed_;
double desired_false_positive_probability_;
};
@ -617,12 +660,21 @@ inline bloom_filter operator ^ (const bloom_filter& a, const bloom_filter& b)
} // namespace fc
FC_REFLECT( fc::bloom_filter, (salt_)(bit_table_)(salt_count_)(table_size_)(projected_element_count_)
(inserted_element_count_)(random_seed_)(desired_false_positive_probability_) )
FC_REFLECT( fc::bloom_parameters::optimal_parameters_t, (number_of_hashes)(table_size) )
FC_REFLECT( fc::bloom_parameters, (minimum_size)(maximum_size)(minimum_number_of_hashes)
(maximum_number_of_hashes)(projected_element_count)
(false_positive_probability)(random_seed)(optimal_parameters) )
#endif
/*
Note 1:
If it can be guaranteed that bits_per_char will be of the form 2^n then
the following optimization can be used:
hash_table[bit_index >> n] |= bit_mask[bit_index & (bits_per_char - 1)];
bit_table_[bit_index >> n] |= bit_mask[bit_index & (bits_per_char - 1)];
Note 2:
For performance reasons where possible when allocating memory it should