reorder the p2p network's terminate_inactive_connections_loop() to prevent us

from being interrupted before we delete an unresponsive peer #288
This commit is contained in:
Eric Frias 2015-09-02 12:15:02 -04:00
parent dd04bab521
commit d35e2d6d4e

View file

@ -1253,6 +1253,8 @@ namespace graphene { namespace net { namespace detail {
std::list<peer_connection_ptr> peers_to_send_keep_alive; std::list<peer_connection_ptr> peers_to_send_keep_alive;
std::list<peer_connection_ptr> peers_to_terminate; std::list<peer_connection_ptr> peers_to_terminate;
uint8_t current_block_interval_in_seconds = _delegate->get_current_block_interval_in_seconds();
// Disconnect peers that haven't sent us any data recently // Disconnect peers that haven't sent us any data recently
// These numbers are just guesses and we need to think through how this works better. // These numbers are just guesses and we need to think through how this works better.
// If we and our peers get disconnected from the rest of the network, we will not // If we and our peers get disconnected from the rest of the network, we will not
@ -1261,6 +1263,11 @@ namespace graphene { namespace net { namespace detail {
// them (but they won't have sent us anything since they aren't getting blocks either). // them (but they won't have sent us anything since they aren't getting blocks either).
// This might not be so bad because it could make us initiate more connections and // This might not be so bad because it could make us initiate more connections and
// reconnect with the rest of the network, or it might just futher isolate us. // reconnect with the rest of the network, or it might just futher isolate us.
{
// As usual, the first step is to walk through all our peers and figure out which
// peers need action (disconneting, sending keepalives, etc), then we walk through
// those lists yielding at our leisure later.
ASSERT_TASK_NOT_PREEMPTED();
uint32_t handshaking_timeout = _peer_inactivity_timeout; uint32_t handshaking_timeout = _peer_inactivity_timeout;
fc::time_point handshaking_disconnect_threshold = fc::time_point::now() - fc::seconds(handshaking_timeout); fc::time_point handshaking_disconnect_threshold = fc::time_point::now() - fc::seconds(handshaking_timeout);
@ -1285,7 +1292,6 @@ namespace graphene { namespace net { namespace detail {
} }
// timeout for any active peers is two block intervals // timeout for any active peers is two block intervals
uint8_t current_block_interval_in_seconds = _delegate->get_current_block_interval_in_seconds();
uint32_t active_disconnect_timeout = 10 * current_block_interval_in_seconds; uint32_t active_disconnect_timeout = 10 * current_block_interval_in_seconds;
uint32_t active_send_keepalive_timeount = active_disconnect_timeout / 2; uint32_t active_send_keepalive_timeount = active_disconnect_timeout / 2;
uint32_t active_ignored_request_timeount = 3 * current_block_interval_in_seconds; uint32_t active_ignored_request_timeount = 3 * current_block_interval_in_seconds;
@ -1368,6 +1374,33 @@ namespace graphene { namespace net { namespace detail {
peers_to_terminate.push_back(peer); peers_to_terminate.push_back(peer);
} }
// That's the end of the sorting step; now all peers that require further processing are now in one of the
// lists peers_to_disconnect_gently, peers_to_disconnect_forcibly, peers_to_send_keep_alive, or peers_to_terminate
// if we've decided to delete any peers, do it now; in its current implementation this doesn't yield,
// and once we start yielding, we may find that we've moved that peer to another list (closed or active)
// and that triggers assertions, maybe even errors
for (const peer_connection_ptr& peer : peers_to_terminate )
{
assert(_terminating_connections.find(peer) != _terminating_connections.end());
_terminating_connections.erase(peer);
schedule_peer_for_deletion(peer);
}
peers_to_terminate.clear();
// if we're going to abruptly disconnect anyone, do it here
// (it doesn't yield). I don't think there would be any harm if this were
// moved to the yielding section
for( const peer_connection_ptr& peer : peers_to_disconnect_forcibly )
{
move_peer_to_terminating_list(peer);
peer->close_connection();
}
peers_to_disconnect_forcibly.clear();
} // end ASSERT_TASK_NOT_PREEMPTED()
// Now process the peers that we need to do yielding functions with (disconnect sends a message with the
// disconnect reason, so it may yield)
for( const peer_connection_ptr& peer : peers_to_disconnect_gently ) for( const peer_connection_ptr& peer : peers_to_disconnect_gently )
{ {
fc::exception detailed_error( FC_LOG_MESSAGE(warn, "Disconnecting due to inactivity", fc::exception detailed_error( FC_LOG_MESSAGE(warn, "Disconnecting due to inactivity",
@ -1378,25 +1411,11 @@ namespace graphene { namespace net { namespace detail {
} }
peers_to_disconnect_gently.clear(); peers_to_disconnect_gently.clear();
for( const peer_connection_ptr& peer : peers_to_disconnect_forcibly )
{
move_peer_to_terminating_list(peer);
peer->close_connection();
}
peers_to_disconnect_forcibly.clear();
for( const peer_connection_ptr& peer : peers_to_send_keep_alive ) for( const peer_connection_ptr& peer : peers_to_send_keep_alive )
peer->send_message(current_time_request_message(), peer->send_message(current_time_request_message(),
offsetof(current_time_request_message, request_sent_time)); offsetof(current_time_request_message, request_sent_time));
peers_to_send_keep_alive.clear(); peers_to_send_keep_alive.clear();
for (const peer_connection_ptr& peer : peers_to_terminate )
{
assert(_terminating_connections.find(peer) != _terminating_connections.end());
_terminating_connections.erase(peer);
schedule_peer_for_deletion(peer);
}
if (!_node_is_shutting_down && !_terminate_inactive_connections_loop_done.canceled()) if (!_node_is_shutting_down && !_terminate_inactive_connections_loop_done.canceled())
_terminate_inactive_connections_loop_done = fc::schedule( [this](){ terminate_inactive_connections_loop(); }, _terminate_inactive_connections_loop_done = fc::schedule( [this](){ terminate_inactive_connections_loop(); },
fc::time_point::now() + fc::seconds(GRAPHENE_NET_PEER_HANDSHAKE_INACTIVITY_TIMEOUT / 2), fc::time_point::now() + fc::seconds(GRAPHENE_NET_PEER_HANDSHAKE_INACTIVITY_TIMEOUT / 2),