9e7b3ebdf9
In BIRD, RX has lower priority than TX with the exception of RX from control socket. The patch replaces heuristic based on socket type with explicit mark and uses it for both control socket and BGP session waiting to be established. This should avoid an issue when during heavy load, outgoing connection could connect (TX event), send open, but then failed to receive OPEN / establish in time, not sending notifications between and therefore got hold timer expired error from the neighbor immediately after it finally established the connection.
1594 lines
44 KiB
C
1594 lines
44 KiB
C
/*
|
|
* BIRD -- The Border Gateway Protocol
|
|
*
|
|
* (c) 2000 Martin Mares <mj@ucw.cz>
|
|
*
|
|
* Can be freely distributed and used under the terms of the GNU GPL.
|
|
*/
|
|
|
|
/**
|
|
* DOC: Border Gateway Protocol
|
|
*
|
|
* The BGP protocol is implemented in three parts: |bgp.c| which takes care of the
|
|
* connection and most of the interface with BIRD core, |packets.c| handling
|
|
* both incoming and outgoing BGP packets and |attrs.c| containing functions for
|
|
* manipulation with BGP attribute lists.
|
|
*
|
|
* As opposed to the other existing routing daemons, BIRD has a sophisticated core
|
|
* architecture which is able to keep all the information needed by BGP in the
|
|
* primary routing table, therefore no complex data structures like a central
|
|
* BGP table are needed. This increases memory footprint of a BGP router with
|
|
* many connections, but not too much and, which is more important, it makes
|
|
* BGP much easier to implement.
|
|
*
|
|
* Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto
|
|
* structure to which are attached individual connections represented by &bgp_connection
|
|
* (usually, there exists only one connection, but during BGP session setup, there
|
|
* can be more of them). The connections are handled according to the BGP state machine
|
|
* defined in the RFC with all the timers and all the parameters configurable.
|
|
*
|
|
* In incoming direction, we listen on the connection's socket and each time we receive
|
|
* some input, we pass it to bgp_rx(). It decodes packet headers and the markers and
|
|
* passes complete packets to bgp_rx_packet() which distributes the packet according
|
|
* to its type.
|
|
*
|
|
* In outgoing direction, we gather all the routing updates and sort them to buckets
|
|
* (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison
|
|
* of &rta's and a &fib which helps us to find if we already have another route for
|
|
* the same destination queued for sending, so that we can replace it with the new one
|
|
* immediately instead of sending both updates). There also exists a special bucket holding
|
|
* all the route withdrawals which cannot be queued anywhere else as they don't have any
|
|
* attributes. If we have any packet to send (due to either new routes or the connection
|
|
* tracking code wanting to send a Open, Keepalive or Notification message), we call
|
|
* bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send
|
|
* bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty,
|
|
* we call bgp_fire_tx(). It inspects state of all the packet type bits and calls
|
|
* the corresponding bgp_create_xx() functions, eventually rescheduling the same packet
|
|
* type if we have more data of the same type to send.
|
|
*
|
|
* The processing of attributes consists of two functions: bgp_decode_attrs() for checking
|
|
* of the attribute blocks and translating them to the language of BIRD's extended attributes
|
|
* and bgp_encode_attrs() which does the converse. Both functions are built around a
|
|
* @bgp_attr_table array describing all important characteristics of all known attributes.
|
|
* Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
|
|
*
|
|
* BGP protocol implements graceful restart in both restarting (local restart)
|
|
* and receiving (neighbor restart) roles. The first is handled mostly by the
|
|
* graceful restart code in the nest, BGP protocol just handles capabilities,
|
|
* sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
|
|
* The second is implemented by internal restart of the BGP state to %BS_IDLE
|
|
* and protocol state to %PS_START, but keeping the protocol up from the core
|
|
* point of view and therefore maintaining received routes. Routing table
|
|
* refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
|
|
* stale routes after reestablishment of BGP session during graceful restart.
|
|
*/
|
|
|
|
#undef LOCAL_DEBUG
|
|
|
|
#include "nest/bird.h"
|
|
#include "nest/iface.h"
|
|
#include "nest/protocol.h"
|
|
#include "nest/route.h"
|
|
#include "nest/cli.h"
|
|
#include "nest/locks.h"
|
|
#include "conf/conf.h"
|
|
#include "lib/socket.h"
|
|
#include "lib/resource.h"
|
|
#include "lib/string.h"
|
|
|
|
#include "bgp.h"
|
|
|
|
|
|
struct linpool *bgp_linpool; /* Global temporary pool */
|
|
static sock *bgp_listen_sk; /* Global listening socket */
|
|
static int bgp_counter; /* Number of protocol instances using the listening socket */
|
|
|
|
static void bgp_close(struct bgp_proto *p, int apply_md5);
|
|
static void bgp_connect(struct bgp_proto *p);
|
|
static void bgp_active(struct bgp_proto *p);
|
|
static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags);
|
|
static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
|
|
|
|
|
|
/**
|
|
* bgp_open - open a BGP instance
|
|
* @p: BGP instance
|
|
*
|
|
* This function allocates and configures shared BGP resources.
|
|
* Should be called as the last step during initialization
|
|
* (when lock is acquired and neighbor is ready).
|
|
* When error, state changed to PS_DOWN, -1 is returned and caller
|
|
* should return immediately.
|
|
*/
|
|
static int
|
|
bgp_open(struct bgp_proto *p)
|
|
{
|
|
struct config *cfg = p->cf->c.global;
|
|
int errcode;
|
|
|
|
if (!bgp_listen_sk)
|
|
bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags);
|
|
|
|
if (!bgp_listen_sk)
|
|
{
|
|
errcode = BEM_NO_SOCKET;
|
|
goto err;
|
|
}
|
|
|
|
if (!bgp_linpool)
|
|
bgp_linpool = lp_new(&root_pool, 4080);
|
|
|
|
bgp_counter++;
|
|
|
|
if (p->cf->password)
|
|
if (sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->iface, p->cf->password) < 0)
|
|
{
|
|
sk_log_error(bgp_listen_sk, p->p.name);
|
|
bgp_close(p, 0);
|
|
errcode = BEM_INVALID_MD5;
|
|
goto err;
|
|
}
|
|
|
|
return 0;
|
|
|
|
err:
|
|
p->p.disabled = 1;
|
|
bgp_store_error(p, NULL, BE_MISC, errcode);
|
|
proto_notify_state(&p->p, PS_DOWN);
|
|
return -1;
|
|
}
|
|
|
|
static void
|
|
bgp_startup(struct bgp_proto *p)
|
|
{
|
|
BGP_TRACE(D_EVENTS, "Started");
|
|
p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP;
|
|
|
|
if (!p->cf->passive)
|
|
bgp_active(p);
|
|
}
|
|
|
|
static void
|
|
bgp_startup_timeout(timer *t)
|
|
{
|
|
bgp_startup(t->data);
|
|
}
|
|
|
|
|
|
static void
|
|
bgp_initiate(struct bgp_proto *p)
|
|
{
|
|
int rv = bgp_open(p);
|
|
if (rv < 0)
|
|
return;
|
|
|
|
if (p->cf->bfd)
|
|
bgp_update_bfd(p, p->cf->bfd);
|
|
|
|
if (p->startup_delay)
|
|
{
|
|
p->start_state = BSS_DELAY;
|
|
BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
|
|
bgp_start_timer(p->startup_timer, p->startup_delay);
|
|
}
|
|
else
|
|
bgp_startup(p);
|
|
}
|
|
|
|
/**
|
|
* bgp_close - close a BGP instance
|
|
* @p: BGP instance
|
|
* @apply_md5: 0 to disable unsetting MD5 auth
|
|
*
|
|
* This function frees and deconfigures shared BGP resources.
|
|
* @apply_md5 is set to 0 when bgp_close is called as a cleanup
|
|
* from failed bgp_open().
|
|
*/
|
|
static void
|
|
bgp_close(struct bgp_proto *p, int apply_md5)
|
|
{
|
|
ASSERT(bgp_counter);
|
|
bgp_counter--;
|
|
|
|
if (p->cf->password && apply_md5)
|
|
if (sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->iface, NULL) < 0)
|
|
sk_log_error(bgp_listen_sk, p->p.name);
|
|
|
|
if (!bgp_counter)
|
|
{
|
|
rfree(bgp_listen_sk);
|
|
bgp_listen_sk = NULL;
|
|
rfree(bgp_linpool);
|
|
bgp_linpool = NULL;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* bgp_start_timer - start a BGP timer
|
|
* @t: timer
|
|
* @value: time to fire (0 to disable the timer)
|
|
*
|
|
* This functions calls tm_start() on @t with time @value and the
|
|
* amount of randomization suggested by the BGP standard. Please use
|
|
* it for all BGP timers.
|
|
*/
|
|
void
|
|
bgp_start_timer(timer *t, int value)
|
|
{
|
|
if (value)
|
|
{
|
|
/* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
|
|
t->randomize = value / 4;
|
|
tm_start(t, value - t->randomize);
|
|
}
|
|
else
|
|
tm_stop(t);
|
|
}
|
|
|
|
/**
|
|
* bgp_close_conn - close a BGP connection
|
|
* @conn: connection to close
|
|
*
|
|
* This function takes a connection described by the &bgp_conn structure,
|
|
* closes its socket and frees all resources associated with it.
|
|
*/
|
|
void
|
|
bgp_close_conn(struct bgp_conn *conn)
|
|
{
|
|
// struct bgp_proto *p = conn->bgp;
|
|
|
|
DBG("BGP: Closing connection\n");
|
|
conn->packets_to_send = 0;
|
|
rfree(conn->connect_retry_timer);
|
|
conn->connect_retry_timer = NULL;
|
|
rfree(conn->keepalive_timer);
|
|
conn->keepalive_timer = NULL;
|
|
rfree(conn->hold_timer);
|
|
conn->hold_timer = NULL;
|
|
rfree(conn->sk);
|
|
conn->sk = NULL;
|
|
rfree(conn->tx_ev);
|
|
conn->tx_ev = NULL;
|
|
}
|
|
|
|
|
|
/**
|
|
* bgp_update_startup_delay - update a startup delay
|
|
* @p: BGP instance
|
|
*
|
|
* This function updates a startup delay that is used to postpone next BGP connect.
|
|
* It also handles disable_after_error and might stop BGP instance when error
|
|
* happened and disable_after_error is on.
|
|
*
|
|
* It should be called when BGP protocol error happened.
|
|
*/
|
|
void
|
|
bgp_update_startup_delay(struct bgp_proto *p)
|
|
{
|
|
struct bgp_config *cf = p->cf;
|
|
|
|
DBG("BGP: Updating startup delay\n");
|
|
|
|
if (p->last_proto_error && ((now - p->last_proto_error) >= (int) cf->error_amnesia_time))
|
|
p->startup_delay = 0;
|
|
|
|
p->last_proto_error = now;
|
|
|
|
if (cf->disable_after_error)
|
|
{
|
|
p->startup_delay = 0;
|
|
p->p.disabled = 1;
|
|
return;
|
|
}
|
|
|
|
if (!p->startup_delay)
|
|
p->startup_delay = cf->error_delay_time_min;
|
|
else
|
|
p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
|
|
}
|
|
|
|
static void
|
|
bgp_graceful_close_conn(struct bgp_conn *conn, unsigned subcode)
|
|
{
|
|
switch (conn->state)
|
|
{
|
|
case BS_IDLE:
|
|
case BS_CLOSE:
|
|
return;
|
|
case BS_CONNECT:
|
|
case BS_ACTIVE:
|
|
bgp_conn_enter_idle_state(conn);
|
|
return;
|
|
case BS_OPENSENT:
|
|
case BS_OPENCONFIRM:
|
|
case BS_ESTABLISHED:
|
|
bgp_error(conn, 6, subcode, NULL, 0);
|
|
return;
|
|
default:
|
|
bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
|
|
}
|
|
}
|
|
|
|
static void
|
|
bgp_down(struct bgp_proto *p)
|
|
{
|
|
if (p->start_state > BSS_PREPARE)
|
|
bgp_close(p, 1);
|
|
|
|
BGP_TRACE(D_EVENTS, "Down");
|
|
proto_notify_state(&p->p, PS_DOWN);
|
|
}
|
|
|
|
static void
|
|
bgp_decision(void *vp)
|
|
{
|
|
struct bgp_proto *p = vp;
|
|
|
|
DBG("BGP: Decision start\n");
|
|
if ((p->p.proto_state == PS_START)
|
|
&& (p->outgoing_conn.state == BS_IDLE)
|
|
&& (p->incoming_conn.state != BS_OPENCONFIRM)
|
|
&& (!p->cf->passive))
|
|
bgp_active(p);
|
|
|
|
if ((p->p.proto_state == PS_STOP)
|
|
&& (p->outgoing_conn.state == BS_IDLE)
|
|
&& (p->incoming_conn.state == BS_IDLE))
|
|
bgp_down(p);
|
|
}
|
|
|
|
void
|
|
bgp_stop(struct bgp_proto *p, unsigned subcode)
|
|
{
|
|
proto_notify_state(&p->p, PS_STOP);
|
|
bgp_graceful_close_conn(&p->outgoing_conn, subcode);
|
|
bgp_graceful_close_conn(&p->incoming_conn, subcode);
|
|
ev_schedule(p->event);
|
|
}
|
|
|
|
static inline void
|
|
bgp_conn_set_state(struct bgp_conn *conn, unsigned new_state)
|
|
{
|
|
if (conn->bgp->p.mrtdump & MD_STATES)
|
|
mrt_dump_bgp_state_change(conn, conn->state, new_state);
|
|
|
|
conn->state = new_state;
|
|
}
|
|
|
|
void
|
|
bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
|
|
{
|
|
/* Really, most of the work is done in bgp_rx_open(). */
|
|
bgp_conn_set_state(conn, BS_OPENCONFIRM);
|
|
}
|
|
|
|
void
|
|
bgp_conn_enter_established_state(struct bgp_conn *conn)
|
|
{
|
|
struct bgp_proto *p = conn->bgp;
|
|
|
|
BGP_TRACE(D_EVENTS, "BGP session established");
|
|
DBG("BGP: UP!!!\n");
|
|
|
|
/* For multi-hop BGP sessions */
|
|
if (ipa_zero(p->source_addr))
|
|
p->source_addr = conn->sk->saddr;
|
|
|
|
conn->sk->fast_rx = 0;
|
|
|
|
p->conn = conn;
|
|
p->last_error_class = 0;
|
|
p->last_error_code = 0;
|
|
p->feed_state = BFS_NONE;
|
|
p->load_state = BFS_NONE;
|
|
bgp_init_bucket_table(p);
|
|
bgp_init_prefix_table(p, 8);
|
|
|
|
int peer_gr_ready = conn->peer_gr_aware && !(conn->peer_gr_flags & BGP_GRF_RESTART);
|
|
|
|
if (p->p.gr_recovery && !peer_gr_ready)
|
|
proto_graceful_restart_unlock(&p->p);
|
|
|
|
if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready)
|
|
p->p.gr_wait = 1;
|
|
|
|
if (p->gr_active)
|
|
tm_stop(p->gr_timer);
|
|
|
|
if (p->gr_active && (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
|
|
bgp_graceful_restart_done(p);
|
|
|
|
/* GR capability implies that neighbor will send End-of-RIB */
|
|
if (conn->peer_gr_aware)
|
|
p->load_state = BFS_LOADING;
|
|
|
|
/* proto_notify_state() will likely call bgp_feed_begin(), setting p->feed_state */
|
|
|
|
bgp_conn_set_state(conn, BS_ESTABLISHED);
|
|
proto_notify_state(&p->p, PS_UP);
|
|
}
|
|
|
|
static void
|
|
bgp_conn_leave_established_state(struct bgp_proto *p)
|
|
{
|
|
BGP_TRACE(D_EVENTS, "BGP session closed");
|
|
p->conn = NULL;
|
|
|
|
if (p->p.proto_state == PS_UP)
|
|
bgp_stop(p, 0);
|
|
}
|
|
|
|
void
|
|
bgp_conn_enter_close_state(struct bgp_conn *conn)
|
|
{
|
|
struct bgp_proto *p = conn->bgp;
|
|
int os = conn->state;
|
|
|
|
bgp_conn_set_state(conn, BS_CLOSE);
|
|
tm_stop(conn->keepalive_timer);
|
|
conn->sk->rx_hook = NULL;
|
|
|
|
/* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
|
|
bgp_start_timer(conn->hold_timer, 10);
|
|
|
|
if (os == BS_ESTABLISHED)
|
|
bgp_conn_leave_established_state(p);
|
|
}
|
|
|
|
void
|
|
bgp_conn_enter_idle_state(struct bgp_conn *conn)
|
|
{
|
|
struct bgp_proto *p = conn->bgp;
|
|
int os = conn->state;
|
|
|
|
bgp_close_conn(conn);
|
|
bgp_conn_set_state(conn, BS_IDLE);
|
|
ev_schedule(p->event);
|
|
|
|
if (os == BS_ESTABLISHED)
|
|
bgp_conn_leave_established_state(p);
|
|
}
|
|
|
|
/**
|
|
* bgp_handle_graceful_restart - handle detected BGP graceful restart
|
|
* @p: BGP instance
|
|
*
|
|
* This function is called when a BGP graceful restart of the neighbor is
|
|
* detected (when the TCP connection fails or when a new TCP connection
|
|
* appears). The function activates processing of the restart - starts routing
|
|
* table refresh cycle and activates BGP restart timer. The protocol state goes
|
|
* back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
|
|
* caller.
|
|
*/
|
|
void
|
|
bgp_handle_graceful_restart(struct bgp_proto *p)
|
|
{
|
|
ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
|
|
|
|
BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
|
|
p->gr_active ? " - already pending" : "");
|
|
proto_notify_state(&p->p, PS_START);
|
|
|
|
if (p->gr_active)
|
|
rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
|
|
|
|
p->gr_active = 1;
|
|
bgp_start_timer(p->gr_timer, p->conn->peer_gr_time);
|
|
rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
|
|
}
|
|
|
|
/**
|
|
* bgp_graceful_restart_done - finish active BGP graceful restart
|
|
* @p: BGP instance
|
|
*
|
|
* This function is called when the active BGP graceful restart of the neighbor
|
|
* should be finished - either successfully (the neighbor sends all paths and
|
|
* reports end-of-RIB on the new session) or unsuccessfully (the neighbor does
|
|
* not support BGP graceful restart on the new session). The function ends
|
|
* routing table refresh cycle and stops BGP restart timer.
|
|
*/
|
|
void
|
|
bgp_graceful_restart_done(struct bgp_proto *p)
|
|
{
|
|
BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
|
|
p->gr_active = 0;
|
|
tm_stop(p->gr_timer);
|
|
rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
|
|
}
|
|
|
|
/**
|
|
* bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
|
|
* @t: timer
|
|
*
|
|
* This function is a timeout hook for @gr_timer, implementing BGP restart time
|
|
* limit for reestablisment of the BGP session after the graceful restart. When
|
|
* fired, we just proceed with the usual protocol restart.
|
|
*/
|
|
|
|
static void
|
|
bgp_graceful_restart_timeout(timer *t)
|
|
{
|
|
struct bgp_proto *p = t->data;
|
|
|
|
BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
|
|
bgp_stop(p, 0);
|
|
}
|
|
|
|
|
|
/**
|
|
* bgp_refresh_begin - start incoming enhanced route refresh sequence
|
|
* @p: BGP instance
|
|
*
|
|
* This function is called when an incoming enhanced route refresh sequence is
|
|
* started by the neighbor, demarcated by the BoRR packet. The function updates
|
|
* the load state and starts the routing table refresh cycle. Note that graceful
|
|
* restart also uses routing table refresh cycle, but RFC 7313 and load states
|
|
* ensure that these two sequences do not overlap.
|
|
*/
|
|
void
|
|
bgp_refresh_begin(struct bgp_proto *p)
|
|
{
|
|
if (p->load_state == BFS_LOADING)
|
|
{ log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
|
|
|
|
p->load_state = BFS_REFRESHING;
|
|
rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
|
|
}
|
|
|
|
/**
|
|
* bgp_refresh_end - finish incoming enhanced route refresh sequence
|
|
* @p: BGP instance
|
|
*
|
|
* This function is called when an incoming enhanced route refresh sequence is
|
|
* finished by the neighbor, demarcated by the EoRR packet. The function updates
|
|
* the load state and ends the routing table refresh cycle. Routes not received
|
|
* during the sequence are removed by the nest.
|
|
*/
|
|
void
|
|
bgp_refresh_end(struct bgp_proto *p)
|
|
{
|
|
if (p->load_state != BFS_REFRESHING)
|
|
{ log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
|
|
|
|
p->load_state = BFS_NONE;
|
|
rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
|
|
}
|
|
|
|
|
|
static void
|
|
bgp_send_open(struct bgp_conn *conn)
|
|
{
|
|
conn->start_state = conn->bgp->start_state;
|
|
|
|
// Default values, possibly changed by receiving capabilities.
|
|
conn->advertised_as = 0;
|
|
conn->peer_refresh_support = 0;
|
|
conn->peer_as4_support = 0;
|
|
conn->peer_add_path = 0;
|
|
conn->peer_enhanced_refresh_support = 0;
|
|
conn->peer_gr_aware = 0;
|
|
conn->peer_gr_able = 0;
|
|
conn->peer_gr_time = 0;
|
|
conn->peer_gr_flags = 0;
|
|
conn->peer_gr_aflags = 0;
|
|
conn->peer_ext_messages_support = 0;
|
|
|
|
DBG("BGP: Sending open\n");
|
|
conn->sk->rx_hook = bgp_rx;
|
|
conn->sk->tx_hook = bgp_tx;
|
|
tm_stop(conn->connect_retry_timer);
|
|
bgp_schedule_packet(conn, PKT_OPEN);
|
|
bgp_conn_set_state(conn, BS_OPENSENT);
|
|
bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
|
|
}
|
|
|
|
static void
|
|
bgp_connected(sock *sk)
|
|
{
|
|
struct bgp_conn *conn = sk->data;
|
|
struct bgp_proto *p = conn->bgp;
|
|
|
|
BGP_TRACE(D_EVENTS, "Connected");
|
|
bgp_send_open(conn);
|
|
}
|
|
|
|
static void
|
|
bgp_connect_timeout(timer *t)
|
|
{
|
|
struct bgp_conn *conn = t->data;
|
|
struct bgp_proto *p = conn->bgp;
|
|
|
|
DBG("BGP: connect_timeout\n");
|
|
if (p->p.proto_state == PS_START)
|
|
{
|
|
bgp_close_conn(conn);
|
|
bgp_connect(p);
|
|
}
|
|
else
|
|
bgp_conn_enter_idle_state(conn);
|
|
}
|
|
|
|
static void
|
|
bgp_sock_err(sock *sk, int err)
|
|
{
|
|
struct bgp_conn *conn = sk->data;
|
|
struct bgp_proto *p = conn->bgp;
|
|
|
|
/*
|
|
* This error hook may be called either asynchronously from main
|
|
* loop, or synchronously from sk_send(). But sk_send() is called
|
|
* only from bgp_tx() and bgp_kick_tx(), which are both called
|
|
* asynchronously from main loop. Moreover, they end if err hook is
|
|
* called. Therefore, we could suppose that it is always called
|
|
* asynchronously.
|
|
*/
|
|
|
|
bgp_store_error(p, conn, BE_SOCKET, err);
|
|
|
|
if (err)
|
|
BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
|
|
else
|
|
BGP_TRACE(D_EVENTS, "Connection closed");
|
|
|
|
if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
|
|
bgp_handle_graceful_restart(p);
|
|
|
|
bgp_conn_enter_idle_state(conn);
|
|
}
|
|
|
|
static void
|
|
bgp_hold_timeout(timer *t)
|
|
{
|
|
struct bgp_conn *conn = t->data;
|
|
struct bgp_proto *p = conn->bgp;
|
|
|
|
DBG("BGP: Hold timeout\n");
|
|
|
|
/* We are already closing the connection - just do hangup */
|
|
if (conn->state == BS_CLOSE)
|
|
{
|
|
BGP_TRACE(D_EVENTS, "Connection stalled");
|
|
bgp_conn_enter_idle_state(conn);
|
|
return;
|
|
}
|
|
|
|
/* If there is something in input queue, we are probably congested
|
|
and perhaps just not processed BGP packets in time. */
|
|
|
|
if (sk_rx_ready(conn->sk) > 0)
|
|
bgp_start_timer(conn->hold_timer, 10);
|
|
else
|
|
bgp_error(conn, 4, 0, NULL, 0);
|
|
}
|
|
|
|
static void
|
|
bgp_keepalive_timeout(timer *t)
|
|
{
|
|
struct bgp_conn *conn = t->data;
|
|
|
|
DBG("BGP: Keepalive timer\n");
|
|
bgp_schedule_packet(conn, PKT_KEEPALIVE);
|
|
}
|
|
|
|
static void
|
|
bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
|
|
{
|
|
timer *t;
|
|
|
|
conn->sk = NULL;
|
|
conn->bgp = p;
|
|
conn->packets_to_send = 0;
|
|
|
|
t = conn->connect_retry_timer = tm_new(p->p.pool);
|
|
t->hook = bgp_connect_timeout;
|
|
t->data = conn;
|
|
t = conn->hold_timer = tm_new(p->p.pool);
|
|
t->hook = bgp_hold_timeout;
|
|
t->data = conn;
|
|
t = conn->keepalive_timer = tm_new(p->p.pool);
|
|
t->hook = bgp_keepalive_timeout;
|
|
t->data = conn;
|
|
conn->tx_ev = ev_new(p->p.pool);
|
|
conn->tx_ev->hook = bgp_kick_tx;
|
|
conn->tx_ev->data = conn;
|
|
}
|
|
|
|
static void
|
|
bgp_setup_sk(struct bgp_conn *conn, sock *s)
|
|
{
|
|
s->data = conn;
|
|
s->err_hook = bgp_sock_err;
|
|
s->fast_rx = 1;
|
|
conn->sk = s;
|
|
}
|
|
|
|
static void
|
|
bgp_active(struct bgp_proto *p)
|
|
{
|
|
int delay = MAX(1, p->cf->connect_delay_time);
|
|
struct bgp_conn *conn = &p->outgoing_conn;
|
|
|
|
BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
|
|
bgp_setup_conn(p, conn);
|
|
bgp_conn_set_state(conn, BS_ACTIVE);
|
|
bgp_start_timer(conn->connect_retry_timer, delay);
|
|
}
|
|
|
|
/**
|
|
* bgp_connect - initiate an outgoing connection
|
|
* @p: BGP instance
|
|
*
|
|
* The bgp_connect() function creates a new &bgp_conn and initiates
|
|
* a TCP connection to the peer. The rest of connection setup is governed
|
|
* by the BGP state machine as described in the standard.
|
|
*/
|
|
static void
|
|
bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing connection */
|
|
{
|
|
sock *s;
|
|
struct bgp_conn *conn = &p->outgoing_conn;
|
|
int hops = p->cf->multihop ? : 1;
|
|
|
|
DBG("BGP: Connecting\n");
|
|
s = sk_new(p->p.pool);
|
|
s->type = SK_TCP_ACTIVE;
|
|
s->saddr = p->source_addr;
|
|
s->daddr = p->cf->remote_ip;
|
|
s->dport = p->cf->remote_port;
|
|
s->iface = p->neigh ? p->neigh->iface : NULL;
|
|
s->ttl = p->cf->ttl_security ? 255 : hops;
|
|
s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE;
|
|
s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE;
|
|
s->tos = IP_PREC_INTERNET_CONTROL;
|
|
s->password = p->cf->password;
|
|
s->tx_hook = bgp_connected;
|
|
BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J", s->daddr, p->cf->iface,
|
|
s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
|
|
bgp_setup_conn(p, conn);
|
|
bgp_setup_sk(conn, s);
|
|
bgp_conn_set_state(conn, BS_CONNECT);
|
|
|
|
if (sk_open(s) < 0)
|
|
goto err;
|
|
|
|
/* Set minimal receive TTL if needed */
|
|
if (p->cf->ttl_security)
|
|
if (sk_set_min_ttl(s, 256 - hops) < 0)
|
|
goto err;
|
|
|
|
DBG("BGP: Waiting for connect success\n");
|
|
bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
|
|
return;
|
|
|
|
err:
|
|
sk_log_error(s, p->p.name);
|
|
bgp_sock_err(s, 0);
|
|
return;
|
|
}
|
|
|
|
/**
|
|
* bgp_find_proto - find existing proto for incoming connection
|
|
* @sk: TCP socket
|
|
*
|
|
*/
|
|
static struct bgp_proto *
|
|
bgp_find_proto(sock *sk)
|
|
{
|
|
struct proto_config *pc;
|
|
|
|
WALK_LIST(pc, config->protos)
|
|
if ((pc->protocol == &proto_bgp) && pc->proto)
|
|
{
|
|
struct bgp_proto *p = (struct bgp_proto *) pc->proto;
|
|
if (ipa_equal(p->cf->remote_ip, sk->daddr) &&
|
|
(!ipa_is_link_local(sk->daddr) || (p->cf->iface == sk->iface)))
|
|
return p;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
* bgp_incoming_connection - handle an incoming connection
|
|
* @sk: TCP socket
|
|
* @dummy: unused
|
|
*
|
|
* This function serves as a socket hook for accepting of new BGP
|
|
* connections. It searches a BGP instance corresponding to the peer
|
|
* which has connected and if such an instance exists, it creates a
|
|
* &bgp_conn structure, attaches it to the instance and either sends
|
|
* an Open message or (if there already is an active connection) it
|
|
* closes the new connection by sending a Notification message.
|
|
*/
|
|
static int
|
|
bgp_incoming_connection(sock *sk, int dummy UNUSED)
|
|
{
|
|
struct bgp_proto *p;
|
|
int acc, hops;
|
|
|
|
DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
|
|
p = bgp_find_proto(sk);
|
|
if (!p)
|
|
{
|
|
log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
|
|
sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
|
|
rfree(sk);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* BIRD should keep multiple incoming connections in OpenSent state (for
|
|
* details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming
|
|
* connections are rejected istead. The exception is the case where an
|
|
* incoming connection triggers a graceful restart.
|
|
*/
|
|
|
|
acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
|
|
(p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
|
|
|
|
if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
|
|
{
|
|
bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
|
|
bgp_handle_graceful_restart(p);
|
|
bgp_conn_enter_idle_state(p->conn);
|
|
acc = 1;
|
|
|
|
/* There might be separate incoming connection in OpenSent state */
|
|
if (p->incoming_conn.state > BS_ACTIVE)
|
|
bgp_close_conn(&p->incoming_conn);
|
|
}
|
|
|
|
BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
|
|
sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
|
|
sk->dport, acc ? "accepted" : "rejected");
|
|
|
|
if (!acc)
|
|
{
|
|
rfree(sk);
|
|
return 0;
|
|
}
|
|
|
|
hops = p->cf->multihop ? : 1;
|
|
|
|
if (sk_set_ttl(sk, p->cf->ttl_security ? 255 : hops) < 0)
|
|
goto err;
|
|
|
|
if (p->cf->ttl_security)
|
|
if (sk_set_min_ttl(sk, 256 - hops) < 0)
|
|
goto err;
|
|
|
|
if (p->cf->enable_extended_messages)
|
|
{
|
|
sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
|
|
sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
|
|
sk_reallocate(sk);
|
|
}
|
|
|
|
bgp_setup_conn(p, &p->incoming_conn);
|
|
bgp_setup_sk(&p->incoming_conn, sk);
|
|
bgp_send_open(&p->incoming_conn);
|
|
return 0;
|
|
|
|
err:
|
|
sk_log_error(sk, p->p.name);
|
|
log(L_ERR "%s: Incoming connection aborted", p->p.name);
|
|
rfree(sk);
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
bgp_listen_sock_err(sock *sk UNUSED, int err)
|
|
{
|
|
if (err == ECONNABORTED)
|
|
log(L_WARN "BGP: Incoming connection aborted");
|
|
else
|
|
log(L_ERR "BGP: Error on listening socket: %M", err);
|
|
}
|
|
|
|
static sock *
|
|
bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags)
|
|
{
|
|
sock *s = sk_new(&root_pool);
|
|
DBG("BGP: Creating listening socket\n");
|
|
s->type = SK_TCP_PASSIVE;
|
|
s->ttl = 255;
|
|
s->saddr = addr;
|
|
s->sport = port ? port : BGP_PORT;
|
|
s->flags = flags ? 0 : SKF_V6ONLY;
|
|
s->tos = IP_PREC_INTERNET_CONTROL;
|
|
s->rbsize = BGP_RX_BUFFER_SIZE;
|
|
s->tbsize = BGP_TX_BUFFER_SIZE;
|
|
s->rx_hook = bgp_incoming_connection;
|
|
s->err_hook = bgp_listen_sock_err;
|
|
|
|
if (sk_open(s) < 0)
|
|
goto err;
|
|
|
|
return s;
|
|
|
|
err:
|
|
sk_log_error(s, "BGP");
|
|
log(L_ERR "BGP: Cannot open listening socket");
|
|
rfree(s);
|
|
return NULL;
|
|
}
|
|
|
|
static void
|
|
bgp_start_neighbor(struct bgp_proto *p)
|
|
{
|
|
/* Called only for single-hop BGP sessions */
|
|
|
|
if (ipa_zero(p->source_addr))
|
|
p->source_addr = p->neigh->ifa->ip;
|
|
|
|
#ifdef IPV6
|
|
{
|
|
struct ifa *a;
|
|
p->local_link = IPA_NONE;
|
|
WALK_LIST(a, p->neigh->iface->addrs)
|
|
if (a->scope == SCOPE_LINK)
|
|
{
|
|
p->local_link = a->ip;
|
|
break;
|
|
}
|
|
|
|
if (! ipa_nonzero(p->local_link))
|
|
log(L_WARN "%s: Missing link local address on interface %s", p->p.name, p->neigh->iface->name);
|
|
|
|
DBG("BGP: Selected link-level address %I\n", p->local_link);
|
|
}
|
|
#endif
|
|
|
|
bgp_initiate(p);
|
|
}
|
|
|
|
static void
|
|
bgp_neigh_notify(neighbor *n)
|
|
{
|
|
struct bgp_proto *p = (struct bgp_proto *) n->proto;
|
|
int ps = p->p.proto_state;
|
|
|
|
if (n != p->neigh)
|
|
return;
|
|
|
|
if ((ps == PS_DOWN) || (ps == PS_STOP))
|
|
return;
|
|
|
|
int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
|
|
|
|
if (n->scope <= 0)
|
|
{
|
|
if (!prepare)
|
|
{
|
|
BGP_TRACE(D_EVENTS, "Neighbor lost");
|
|
bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
|
|
/* Perhaps also run bgp_update_startup_delay(p)? */
|
|
bgp_stop(p, 0);
|
|
}
|
|
}
|
|
else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
|
|
{
|
|
if (!prepare)
|
|
{
|
|
BGP_TRACE(D_EVENTS, "Link down");
|
|
bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
|
|
if (ps == PS_UP)
|
|
bgp_update_startup_delay(p);
|
|
bgp_stop(p, 0);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (prepare)
|
|
{
|
|
BGP_TRACE(D_EVENTS, "Neighbor ready");
|
|
bgp_start_neighbor(p);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
bgp_bfd_notify(struct bfd_request *req)
|
|
{
|
|
struct bgp_proto *p = req->data;
|
|
int ps = p->p.proto_state;
|
|
|
|
if (req->down && ((ps == PS_START) || (ps == PS_UP)))
|
|
{
|
|
BGP_TRACE(D_EVENTS, "BFD session down");
|
|
bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
|
|
if (ps == PS_UP)
|
|
bgp_update_startup_delay(p);
|
|
bgp_stop(p, 0);
|
|
}
|
|
}
|
|
|
|
static void
|
|
bgp_update_bfd(struct bgp_proto *p, int use_bfd)
|
|
{
|
|
if (use_bfd && !p->bfd_req)
|
|
p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr,
|
|
p->cf->multihop ? NULL : p->neigh->iface,
|
|
bgp_bfd_notify, p);
|
|
|
|
if (!use_bfd && p->bfd_req)
|
|
{
|
|
rfree(p->bfd_req);
|
|
p->bfd_req = NULL;
|
|
}
|
|
}
|
|
|
|
static int
|
|
bgp_reload_routes(struct proto *P)
|
|
{
|
|
struct bgp_proto *p = (struct bgp_proto *) P;
|
|
if (!p->conn || !p->conn->peer_refresh_support)
|
|
return 0;
|
|
|
|
bgp_schedule_packet(p->conn, PKT_ROUTE_REFRESH);
|
|
return 1;
|
|
}
|
|
|
|
static void
|
|
bgp_feed_begin(struct proto *P, int initial)
|
|
{
|
|
struct bgp_proto *p = (struct bgp_proto *) P;
|
|
|
|
/* This should not happen */
|
|
if (!p->conn)
|
|
return;
|
|
|
|
if (initial && p->cf->gr_mode)
|
|
p->feed_state = BFS_LOADING;
|
|
|
|
/* It is refeed and both sides support enhanced route refresh */
|
|
if (!initial && p->cf->enable_refresh &&
|
|
p->conn->peer_enhanced_refresh_support)
|
|
{
|
|
/* BoRR must not be sent before End-of-RIB */
|
|
if (p->feed_state == BFS_LOADING || p->feed_state == BFS_LOADED)
|
|
return;
|
|
|
|
p->feed_state = BFS_REFRESHING;
|
|
bgp_schedule_packet(p->conn, PKT_BEGIN_REFRESH);
|
|
}
|
|
}
|
|
|
|
static void
|
|
bgp_feed_end(struct proto *P)
|
|
{
|
|
struct bgp_proto *p = (struct bgp_proto *) P;
|
|
|
|
/* This should not happen */
|
|
if (!p->conn)
|
|
return;
|
|
|
|
/* Non-demarcated feed ended, nothing to do */
|
|
if (p->feed_state == BFS_NONE)
|
|
return;
|
|
|
|
/* Schedule End-of-RIB packet */
|
|
if (p->feed_state == BFS_LOADING)
|
|
p->feed_state = BFS_LOADED;
|
|
|
|
/* Schedule EoRR packet */
|
|
if (p->feed_state == BFS_REFRESHING)
|
|
p->feed_state = BFS_REFRESHED;
|
|
|
|
/* Kick TX hook */
|
|
bgp_schedule_packet(p->conn, PKT_UPDATE);
|
|
}
|
|
|
|
|
|
static void
|
|
bgp_start_locked(struct object_lock *lock)
|
|
{
|
|
struct bgp_proto *p = lock->data;
|
|
struct bgp_config *cf = p->cf;
|
|
|
|
if (p->p.proto_state != PS_START)
|
|
{
|
|
DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
|
|
return;
|
|
}
|
|
|
|
DBG("BGP: Got lock\n");
|
|
|
|
if (cf->multihop)
|
|
{
|
|
/* Multi-hop sessions do not use neighbor entries */
|
|
bgp_initiate(p);
|
|
return;
|
|
}
|
|
|
|
neighbor *n = neigh_find2(&p->p, &cf->remote_ip, cf->iface, NEF_STICKY);
|
|
if (!n)
|
|
{
|
|
log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
|
|
/* As we do not start yet, we can just disable protocol */
|
|
p->p.disabled = 1;
|
|
bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
|
|
proto_notify_state(&p->p, PS_DOWN);
|
|
return;
|
|
}
|
|
|
|
p->neigh = n;
|
|
|
|
if (n->scope <= 0)
|
|
BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface);
|
|
else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
|
|
BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
|
|
else
|
|
bgp_start_neighbor(p);
|
|
}
|
|
|
|
static int
|
|
bgp_start(struct proto *P)
|
|
{
|
|
struct bgp_proto *p = (struct bgp_proto *) P;
|
|
struct object_lock *lock;
|
|
|
|
DBG("BGP: Startup.\n");
|
|
p->start_state = BSS_PREPARE;
|
|
p->outgoing_conn.state = BS_IDLE;
|
|
p->incoming_conn.state = BS_IDLE;
|
|
p->neigh = NULL;
|
|
p->bfd_req = NULL;
|
|
p->gr_ready = 0;
|
|
p->gr_active = 0;
|
|
|
|
rt_lock_table(p->igp_table);
|
|
|
|
p->event = ev_new(p->p.pool);
|
|
p->event->hook = bgp_decision;
|
|
p->event->data = p;
|
|
|
|
p->startup_timer = tm_new(p->p.pool);
|
|
p->startup_timer->hook = bgp_startup_timeout;
|
|
p->startup_timer->data = p;
|
|
|
|
p->gr_timer = tm_new(p->p.pool);
|
|
p->gr_timer->hook = bgp_graceful_restart_timeout;
|
|
p->gr_timer->data = p;
|
|
|
|
p->local_id = proto_get_router_id(P->cf);
|
|
if (p->rr_client)
|
|
p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
|
|
|
|
p->remote_id = 0;
|
|
p->source_addr = p->cf->source_addr;
|
|
|
|
if (p->p.gr_recovery && p->cf->gr_mode)
|
|
proto_graceful_restart_lock(P);
|
|
|
|
/*
|
|
* Before attempting to create the connection, we need to lock the
|
|
* port, so that are sure we're the only instance attempting to talk
|
|
* with that neighbor.
|
|
*/
|
|
|
|
lock = p->lock = olock_new(P->pool);
|
|
lock->addr = p->cf->remote_ip;
|
|
lock->port = p->cf->remote_port;
|
|
lock->iface = p->cf->iface;
|
|
lock->type = OBJLOCK_TCP;
|
|
lock->hook = bgp_start_locked;
|
|
lock->data = p;
|
|
olock_acquire(lock);
|
|
|
|
return PS_START;
|
|
}
|
|
|
|
extern int proto_restart;
|
|
|
|
static int
|
|
bgp_shutdown(struct proto *P)
|
|
{
|
|
struct bgp_proto *p = (struct bgp_proto *) P;
|
|
unsigned subcode = 0;
|
|
|
|
BGP_TRACE(D_EVENTS, "Shutdown requested");
|
|
|
|
switch (P->down_code)
|
|
{
|
|
case PDC_CF_REMOVE:
|
|
case PDC_CF_DISABLE:
|
|
subcode = 3; // Errcode 6, 3 - peer de-configured
|
|
break;
|
|
|
|
case PDC_CF_RESTART:
|
|
subcode = 6; // Errcode 6, 6 - other configuration change
|
|
break;
|
|
|
|
case PDC_CMD_DISABLE:
|
|
case PDC_CMD_SHUTDOWN:
|
|
subcode = 2; // Errcode 6, 2 - administrative shutdown
|
|
break;
|
|
|
|
case PDC_CMD_RESTART:
|
|
subcode = 4; // Errcode 6, 4 - administrative reset
|
|
break;
|
|
|
|
case PDC_RX_LIMIT_HIT:
|
|
case PDC_IN_LIMIT_HIT:
|
|
subcode = 1; // Errcode 6, 1 - max number of prefixes reached
|
|
/* log message for compatibility */
|
|
log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
|
|
goto limit;
|
|
|
|
case PDC_OUT_LIMIT_HIT:
|
|
subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
|
|
|
|
limit:
|
|
bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
|
|
if (proto_restart)
|
|
bgp_update_startup_delay(p);
|
|
else
|
|
p->startup_delay = 0;
|
|
goto done;
|
|
}
|
|
|
|
bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
|
|
p->startup_delay = 0;
|
|
|
|
done:
|
|
bgp_stop(p, subcode);
|
|
return p->p.proto_state;
|
|
}
|
|
|
|
static void
|
|
bgp_cleanup(struct proto *P)
|
|
{
|
|
struct bgp_proto *p = (struct bgp_proto *) P;
|
|
rt_unlock_table(p->igp_table);
|
|
}
|
|
|
|
static rtable *
|
|
get_igp_table(struct bgp_config *cf)
|
|
{
|
|
return cf->igp_table ? cf->igp_table->table : cf->c.table->table;
|
|
}
|
|
|
|
static struct proto *
|
|
bgp_init(struct proto_config *C)
|
|
{
|
|
struct proto *P = proto_new(C, sizeof(struct bgp_proto));
|
|
struct bgp_config *c = (struct bgp_config *) C;
|
|
struct bgp_proto *p = (struct bgp_proto *) P;
|
|
|
|
P->accept_ra_types = c->secondary ? RA_ACCEPTED : RA_OPTIMAL;
|
|
P->rt_notify = bgp_rt_notify;
|
|
P->import_control = bgp_import_control;
|
|
P->neigh_notify = bgp_neigh_notify;
|
|
P->reload_routes = bgp_reload_routes;
|
|
P->feed_begin = bgp_feed_begin;
|
|
P->feed_end = bgp_feed_end;
|
|
P->rte_better = bgp_rte_better;
|
|
P->rte_mergable = bgp_rte_mergable;
|
|
P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
|
|
|
|
p->cf = c;
|
|
p->local_as = c->local_as;
|
|
p->remote_as = c->remote_as;
|
|
p->is_internal = (c->local_as == c->remote_as);
|
|
p->rs_client = c->rs_client;
|
|
p->rr_client = c->rr_client;
|
|
p->igp_table = get_igp_table(c);
|
|
|
|
return P;
|
|
}
|
|
|
|
|
|
void
|
|
bgp_check_config(struct bgp_config *c)
|
|
{
|
|
int internal = (c->local_as == c->remote_as);
|
|
|
|
/* Do not check templates at all */
|
|
if (c->c.class == SYM_TEMPLATE)
|
|
return;
|
|
|
|
|
|
/* EBGP direct by default, IBGP multihop by default */
|
|
if (c->multihop < 0)
|
|
c->multihop = internal ? 64 : 0;
|
|
|
|
/* Different default for gw_mode */
|
|
if (!c->gw_mode)
|
|
c->gw_mode = c->multihop ? GW_RECURSIVE : GW_DIRECT;
|
|
|
|
/* Different default based on rs_client */
|
|
if (!c->missing_lladdr)
|
|
c->missing_lladdr = c->rs_client ? MLL_IGNORE : MLL_SELF;
|
|
|
|
/* Disable after error incompatible with restart limit action */
|
|
if (c->c.in_limit && (c->c.in_limit->action == PLA_RESTART) && c->disable_after_error)
|
|
c->c.in_limit->action = PLA_DISABLE;
|
|
|
|
|
|
if (!c->local_as)
|
|
cf_error("Local AS number must be set");
|
|
|
|
if (ipa_zero(c->remote_ip))
|
|
cf_error("Neighbor must be configured");
|
|
|
|
if (!c->remote_as)
|
|
cf_error("Remote AS number must be set");
|
|
|
|
// if (ipa_is_link_local(c->remote_ip) && !c->iface)
|
|
// cf_error("Link-local neighbor address requires specified interface");
|
|
|
|
if (!ipa_is_link_local(c->remote_ip) != !c->iface)
|
|
cf_error("Link-local address and interface scope must be used together");
|
|
|
|
if (!(c->capabilities && c->enable_as4) && (c->remote_as > 0xFFFF))
|
|
cf_error("Neighbor AS number out of range (AS4 not available)");
|
|
|
|
if (!internal && c->rr_client)
|
|
cf_error("Only internal neighbor can be RR client");
|
|
|
|
if (internal && c->rs_client)
|
|
cf_error("Only external neighbor can be RS client");
|
|
|
|
if (c->multihop && (c->gw_mode == GW_DIRECT))
|
|
cf_error("Multihop BGP cannot use direct gateway mode");
|
|
|
|
if (c->multihop && (ipa_is_link_local(c->remote_ip) ||
|
|
ipa_is_link_local(c->source_addr)))
|
|
cf_error("Multihop BGP cannot be used with link-local addresses");
|
|
|
|
if (c->multihop && c->check_link)
|
|
cf_error("Multihop BGP cannot depend on link state");
|
|
|
|
if (c->multihop && c->bfd && ipa_zero(c->source_addr))
|
|
cf_error("Multihop BGP with BFD requires specified source address");
|
|
|
|
if ((c->gw_mode == GW_RECURSIVE) && c->c.table->sorted)
|
|
cf_error("BGP in recursive mode prohibits sorted table");
|
|
|
|
if (c->deterministic_med && c->c.table->sorted)
|
|
cf_error("BGP with deterministic MED prohibits sorted table");
|
|
|
|
if (c->secondary && !c->c.table->sorted)
|
|
cf_error("BGP with secondary option requires sorted table");
|
|
}
|
|
|
|
static int
|
|
bgp_reconfigure(struct proto *P, struct proto_config *C)
|
|
{
|
|
struct bgp_config *new = (struct bgp_config *) C;
|
|
struct bgp_proto *p = (struct bgp_proto *) P;
|
|
struct bgp_config *old = p->cf;
|
|
|
|
if (proto_get_router_id(C) != p->local_id)
|
|
return 0;
|
|
|
|
int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
|
|
((byte *) new) + sizeof(struct proto_config),
|
|
// password item is last and must be checked separately
|
|
OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
|
|
&& ((!old->password && !new->password)
|
|
|| (old->password && new->password && !strcmp(old->password, new->password)))
|
|
&& (get_igp_table(old) == get_igp_table(new));
|
|
|
|
if (same && (p->start_state > BSS_PREPARE))
|
|
bgp_update_bfd(p, new->bfd);
|
|
|
|
/* We should update our copy of configuration ptr as old configuration will be freed */
|
|
if (same)
|
|
p->cf = new;
|
|
|
|
return same;
|
|
}
|
|
|
|
static void
|
|
bgp_copy_config(struct proto_config *dest, struct proto_config *src)
|
|
{
|
|
/* Just a shallow copy */
|
|
proto_copy_rest(dest, src, sizeof(struct bgp_config));
|
|
}
|
|
|
|
|
|
/**
|
|
* bgp_error - report a protocol error
|
|
* @c: connection
|
|
* @code: error code (according to the RFC)
|
|
* @subcode: error sub-code
|
|
* @data: data to be passed in the Notification message
|
|
* @len: length of the data
|
|
*
|
|
* bgp_error() sends a notification packet to tell the other side that a protocol
|
|
* error has occurred (including the data considered erroneous if possible) and
|
|
* closes the connection.
|
|
*/
|
|
void
|
|
bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len)
|
|
{
|
|
struct bgp_proto *p = c->bgp;
|
|
|
|
if (c->state == BS_CLOSE)
|
|
return;
|
|
|
|
bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len);
|
|
bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
|
|
bgp_conn_enter_close_state(c);
|
|
|
|
c->notify_code = code;
|
|
c->notify_subcode = subcode;
|
|
c->notify_data = data;
|
|
c->notify_size = (len > 0) ? len : 0;
|
|
bgp_schedule_packet(c, PKT_NOTIFICATION);
|
|
|
|
if (code != 6)
|
|
{
|
|
bgp_update_startup_delay(p);
|
|
bgp_stop(p, 0);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* bgp_store_error - store last error for status report
|
|
* @p: BGP instance
|
|
* @c: connection
|
|
* @class: error class (BE_xxx constants)
|
|
* @code: error code (class specific)
|
|
*
|
|
* bgp_store_error() decides whether given error is interesting enough
|
|
* and store that error to last_error variables of @p
|
|
*/
|
|
void
|
|
bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
|
|
{
|
|
/* During PS_UP, we ignore errors on secondary connection */
|
|
if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
|
|
return;
|
|
|
|
/* During PS_STOP, we ignore any errors, as we want to report
|
|
* the error that caused transition to PS_STOP
|
|
*/
|
|
if (p->p.proto_state == PS_STOP)
|
|
return;
|
|
|
|
p->last_error_class = class;
|
|
p->last_error_code = code;
|
|
}
|
|
|
|
static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
|
|
static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
|
|
static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
|
|
static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
|
|
|
|
static const char *
|
|
bgp_last_errmsg(struct bgp_proto *p)
|
|
{
|
|
switch (p->last_error_class)
|
|
{
|
|
case BE_MISC:
|
|
return bgp_misc_errors[p->last_error_code];
|
|
case BE_SOCKET:
|
|
return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
|
|
case BE_BGP_RX:
|
|
case BE_BGP_TX:
|
|
return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
|
|
case BE_AUTO_DOWN:
|
|
return bgp_auto_errors[p->last_error_code];
|
|
default:
|
|
return "";
|
|
}
|
|
}
|
|
|
|
static const char *
|
|
bgp_state_dsc(struct bgp_proto *p)
|
|
{
|
|
if (p->p.proto_state == PS_DOWN)
|
|
return "Down";
|
|
|
|
int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
|
|
if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive)
|
|
return "Passive";
|
|
|
|
return bgp_state_names[state];
|
|
}
|
|
|
|
static void
|
|
bgp_get_status(struct proto *P, byte *buf)
|
|
{
|
|
struct bgp_proto *p = (struct bgp_proto *) P;
|
|
|
|
const char *err1 = bgp_err_classes[p->last_error_class];
|
|
const char *err2 = bgp_last_errmsg(p);
|
|
|
|
if (P->proto_state == PS_DOWN)
|
|
bsprintf(buf, "%s%s", err1, err2);
|
|
else
|
|
bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
|
|
}
|
|
|
|
static void
|
|
bgp_show_proto_info(struct proto *P)
|
|
{
|
|
struct bgp_proto *p = (struct bgp_proto *) P;
|
|
struct bgp_conn *c = p->conn;
|
|
|
|
proto_show_basic_info(P);
|
|
|
|
cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p));
|
|
cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
|
|
cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
|
|
|
|
if (p->gr_active)
|
|
cli_msg(-1006, " Neighbor graceful restart active");
|
|
|
|
if (P->proto_state == PS_START)
|
|
{
|
|
struct bgp_conn *oc = &p->outgoing_conn;
|
|
|
|
if ((p->start_state < BSS_CONNECT) &&
|
|
(p->startup_timer->expires))
|
|
cli_msg(-1006, " Error wait: %d/%d",
|
|
p->startup_timer->expires - now, p->startup_delay);
|
|
|
|
if ((oc->state == BS_ACTIVE) &&
|
|
(oc->connect_retry_timer->expires))
|
|
cli_msg(-1006, " Connect delay: %d/%d",
|
|
oc->connect_retry_timer->expires - now, p->cf->connect_delay_time);
|
|
|
|
if (p->gr_active && p->gr_timer->expires)
|
|
cli_msg(-1006, " Restart timer: %d/-", p->gr_timer->expires - now);
|
|
}
|
|
else if (P->proto_state == PS_UP)
|
|
{
|
|
cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
|
|
cli_msg(-1006, " Neighbor caps: %s%s%s%s%s%s%s",
|
|
c->peer_refresh_support ? " refresh" : "",
|
|
c->peer_enhanced_refresh_support ? " enhanced-refresh" : "",
|
|
c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
|
|
c->peer_as4_support ? " AS4" : "",
|
|
(c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
|
|
(c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "",
|
|
c->peer_ext_messages_support ? " ext-messages" : "");
|
|
cli_msg(-1006, " Session: %s%s%s%s%s%s%s%s",
|
|
p->is_internal ? "internal" : "external",
|
|
p->cf->multihop ? " multihop" : "",
|
|
p->rr_client ? " route-reflector" : "",
|
|
p->rs_client ? " route-server" : "",
|
|
p->as4_session ? " AS4" : "",
|
|
p->add_path_rx ? " add-path-rx" : "",
|
|
p->add_path_tx ? " add-path-tx" : "",
|
|
p->ext_messages ? " ext-messages" : "");
|
|
cli_msg(-1006, " Source address: %I", p->source_addr);
|
|
if (P->cf->in_limit)
|
|
cli_msg(-1006, " Route limit: %d/%d",
|
|
p->p.stats.imp_routes + p->p.stats.filt_routes, P->cf->in_limit->limit);
|
|
cli_msg(-1006, " Hold timer: %d/%d",
|
|
tm_remains(c->hold_timer), c->hold_time);
|
|
cli_msg(-1006, " Keepalive timer: %d/%d",
|
|
tm_remains(c->keepalive_timer), c->keepalive_time);
|
|
}
|
|
|
|
if ((p->last_error_class != BE_NONE) &&
|
|
(p->last_error_class != BE_MAN_DOWN))
|
|
{
|
|
const char *err1 = bgp_err_classes[p->last_error_class];
|
|
const char *err2 = bgp_last_errmsg(p);
|
|
cli_msg(-1006, " Last error: %s%s", err1, err2);
|
|
}
|
|
}
|
|
|
|
struct protocol proto_bgp = {
|
|
.name = "BGP",
|
|
.template = "bgp%d",
|
|
.attr_class = EAP_BGP,
|
|
.preference = DEF_PREF_BGP,
|
|
.config_size = sizeof(struct bgp_config),
|
|
.init = bgp_init,
|
|
.start = bgp_start,
|
|
.shutdown = bgp_shutdown,
|
|
.cleanup = bgp_cleanup,
|
|
.reconfigure = bgp_reconfigure,
|
|
.copy_config = bgp_copy_config,
|
|
.get_status = bgp_get_status,
|
|
.get_attr = bgp_get_attr,
|
|
.get_route_info = bgp_get_route_info,
|
|
.show_proto_info = bgp_show_proto_info
|
|
};
|