Merge branch 'bgp-grace'
This commit is contained in:
commit
c980f8002e
19 changed files with 1045 additions and 238 deletions
|
@ -98,6 +98,7 @@ config_alloc(byte *name)
|
||||||
c->load_time = now;
|
c->load_time = now;
|
||||||
c->tf_route = c->tf_proto = (struct timeformat){"%T", "%F", 20*3600};
|
c->tf_route = c->tf_proto = (struct timeformat){"%T", "%F", 20*3600};
|
||||||
c->tf_base = c->tf_log = (struct timeformat){"%F %T", NULL, 0};
|
c->tf_base = c->tf_log = (struct timeformat){"%F %T", NULL, 0};
|
||||||
|
c->gr_wait = DEFAULT_GR_WAIT;
|
||||||
|
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,6 +38,7 @@ struct config {
|
||||||
struct timeformat tf_proto; /* Time format for 'show protocol' */
|
struct timeformat tf_proto; /* Time format for 'show protocol' */
|
||||||
struct timeformat tf_log; /* Time format for the logfile */
|
struct timeformat tf_log; /* Time format for the logfile */
|
||||||
struct timeformat tf_base; /* Time format for other purposes */
|
struct timeformat tf_base; /* Time format for other purposes */
|
||||||
|
u32 gr_wait; /* Graceful restart wait timeout */
|
||||||
|
|
||||||
int cli_debug; /* Tracing of CLI connections and commands */
|
int cli_debug; /* Tracing of CLI connections and commands */
|
||||||
char *err_msg; /* Parser error message */
|
char *err_msg; /* Parser error message */
|
||||||
|
|
|
@ -157,6 +157,9 @@ options. The most important ones are:
|
||||||
|
|
||||||
<tag>-f</tag>
|
<tag>-f</tag>
|
||||||
run bird in foreground.
|
run bird in foreground.
|
||||||
|
|
||||||
|
<tag>-R</tag>
|
||||||
|
apply graceful restart recovery after start.
|
||||||
</descrip>
|
</descrip>
|
||||||
|
|
||||||
<p>BIRD writes messages about its work to log files or syslog (according to config).
|
<p>BIRD writes messages about its work to log files or syslog (according to config).
|
||||||
|
@ -187,6 +190,7 @@ configuration, but it is generally easy -- BIRD needs just the
|
||||||
standard library, privileges to read the config file and create the
|
standard library, privileges to read the config file and create the
|
||||||
control socket and the CAP_NET_* capabilities.
|
control socket and the CAP_NET_* capabilities.
|
||||||
|
|
||||||
|
|
||||||
<chapt>About routing tables
|
<chapt>About routing tables
|
||||||
|
|
||||||
<p>BIRD has one or more routing tables which may or may not be
|
<p>BIRD has one or more routing tables which may or may not be
|
||||||
|
@ -242,6 +246,20 @@ using comparison and ordering). Minor advantage is that routes are
|
||||||
shown sorted in <cf/show route/, minor disadvantage is that it is
|
shown sorted in <cf/show route/, minor disadvantage is that it is
|
||||||
slightly more computationally expensive.
|
slightly more computationally expensive.
|
||||||
|
|
||||||
|
<sect>Graceful restart
|
||||||
|
|
||||||
|
<p>When BIRD is started after restart or crash, it repopulates routing tables in
|
||||||
|
an uncoordinated manner, like after clean start. This may be impractical in some
|
||||||
|
cases, because if the forwarding plane (i.e. kernel routing tables) remains
|
||||||
|
intact, then its synchronization with BIRD would temporarily disrupt packet
|
||||||
|
forwarding until protocols converge. Graceful restart is a mechanism that could
|
||||||
|
help with this issue. Generally, it works by starting protocols and letting them
|
||||||
|
repopulate routing tables while deferring route propagation until protocols
|
||||||
|
acknowledge their convergence. Note that graceful restart behavior have to be
|
||||||
|
configured for all relevant protocols and requires protocol-specific support
|
||||||
|
(currently implemented for Kernel and BGP protocols), it is activated for
|
||||||
|
particular boot by option <cf/-R/.
|
||||||
|
|
||||||
|
|
||||||
<chapt>Configuration
|
<chapt>Configuration
|
||||||
|
|
||||||
|
@ -371,6 +389,12 @@ protocol rip {
|
||||||
would accept IPv6 routes only). Such behavior was default in
|
would accept IPv6 routes only). Such behavior was default in
|
||||||
older versions of BIRD.
|
older versions of BIRD.
|
||||||
|
|
||||||
|
<tag>graceful restart wait <m/number/</tag>
|
||||||
|
During graceful restart recovery, BIRD waits for convergence of routing
|
||||||
|
protocols. This option allows to specify a timeout for the recovery to
|
||||||
|
prevent waiting indefinitely if some protocols cannot converge. Default:
|
||||||
|
240 seconds.
|
||||||
|
|
||||||
<tag>timeformat route|protocol|base|log "<m/format1/" [<m/limit/ "<m/format2/"]</tag>
|
<tag>timeformat route|protocol|base|log "<m/format1/" [<m/limit/ "<m/format2/"]</tag>
|
||||||
This option allows to specify a format of date/time used by
|
This option allows to specify a format of date/time used by
|
||||||
BIRD. The first argument specifies for which purpose such
|
BIRD. The first argument specifies for which purpose such
|
||||||
|
@ -1493,6 +1517,8 @@ extended communities
|
||||||
(RFC 4360<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc4360.txt">),
|
(RFC 4360<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc4360.txt">),
|
||||||
route reflectors
|
route reflectors
|
||||||
(RFC 4456<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc4456.txt">),
|
(RFC 4456<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc4456.txt">),
|
||||||
|
graceful restart
|
||||||
|
(RFC 4724<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc4724.txt">),
|
||||||
multiprotocol extensions
|
multiprotocol extensions
|
||||||
(RFC 4760<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc4760.txt">),
|
(RFC 4760<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc4760.txt">),
|
||||||
4B AS numbers
|
4B AS numbers
|
||||||
|
@ -1502,9 +1528,7 @@ and 4B AS numbers in extended communities
|
||||||
|
|
||||||
|
|
||||||
For IPv6, it uses the standard multiprotocol extensions defined in
|
For IPv6, it uses the standard multiprotocol extensions defined in
|
||||||
RFC 2283<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc2283.txt">
|
RFC 4760<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc4760.txt">
|
||||||
including changes described in the
|
|
||||||
latest draft<htmlurl url="ftp://ftp.rfc-editor.org/internet-drafts/draft-ietf-idr-bgp4-multiprotocol-v2-05.txt">
|
|
||||||
and applied to IPv6 according to
|
and applied to IPv6 according to
|
||||||
RFC 2545<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc2545.txt">.
|
RFC 2545<htmlurl url="ftp://ftp.rfc-editor.org/in-notes/rfc2545.txt">.
|
||||||
|
|
||||||
|
@ -1716,6 +1740,26 @@ for each neighbor using the following configuration parameters:
|
||||||
capability and accepts such requests. Even when disabled, BIRD
|
capability and accepts such requests. Even when disabled, BIRD
|
||||||
can send route refresh requests. Default: on.
|
can send route refresh requests. Default: on.
|
||||||
|
|
||||||
|
<tag>graceful restart <m/switch/|aware</tag>
|
||||||
|
When a BGP speaker restarts or crashes, neighbors will discard all
|
||||||
|
received paths from the speaker, which disrupts packet forwarding even
|
||||||
|
when the forwarding plane of the speaker remains intact. RFC 4724
|
||||||
|
specifies an optional graceful restart mechanism to alleviate this
|
||||||
|
issue. This option controls the mechanism. It has three states:
|
||||||
|
Disabled, when no support is provided. Aware, when the graceful restart
|
||||||
|
support is announced and the support for restarting neighbors is
|
||||||
|
provided, but no local graceful restart is allowed (i.e. receiving-only
|
||||||
|
role). Enabled, when the full graceful restart support is provided
|
||||||
|
(i.e. both restarting and receiving role). Note that proper support for
|
||||||
|
local graceful restart requires also configuration of other protocols.
|
||||||
|
Default: aware.
|
||||||
|
|
||||||
|
<tag>graceful restart time <m/number/</tag>
|
||||||
|
The restart time is announced in the BGP graceful restart capability
|
||||||
|
and specifies how long the neighbor would wait for the BGP session to
|
||||||
|
re-establish after a restart before deleting stale routes. Default:
|
||||||
|
120 seconds.
|
||||||
|
|
||||||
<tag>interpret communities <m/switch/</tag> RFC 1997 demands
|
<tag>interpret communities <m/switch/</tag> RFC 1997 demands
|
||||||
that BGP speaker should process well-known communities like
|
that BGP speaker should process well-known communities like
|
||||||
no-export (65535, 65281) or no-advertise (65535, 65282). For
|
no-export (65535, 65281) or no-advertise (65535, 65282). For
|
||||||
|
@ -2063,25 +2107,36 @@ overcome using another routing table and the pipe protocol.
|
||||||
<sect1>Configuration
|
<sect1>Configuration
|
||||||
|
|
||||||
<p><descrip>
|
<p><descrip>
|
||||||
<tag>persist <m/switch/</tag> Tell BIRD to leave all its routes in the
|
<tag>persist <m/switch/</tag>
|
||||||
routing tables when it exits (instead of cleaning them up).
|
Tell BIRD to leave all its routes in the routing tables when it exits
|
||||||
<tag>scan time <m/number/</tag> Time in seconds between two consecutive scans of the
|
(instead of cleaning them up).
|
||||||
kernel routing table.
|
|
||||||
<tag>learn <m/switch/</tag> Enable learning of routes added to the kernel
|
|
||||||
routing tables by other routing daemons or by the system administrator.
|
|
||||||
This is possible only on systems which support identification of route
|
|
||||||
authorship.
|
|
||||||
|
|
||||||
<tag>device routes <m/switch/</tag> Enable export of device
|
<tag>scan time <m/number/</tag>
|
||||||
routes to the kernel routing table. By default, such routes
|
Time in seconds between two consecutive scans of the kernel routing
|
||||||
are rejected (with the exception of explicitly configured
|
table.
|
||||||
device routes from the static protocol) regardless of the
|
|
||||||
export filter to protect device routes in kernel routing table
|
|
||||||
(managed by OS itself) from accidental overwriting or erasing.
|
|
||||||
|
|
||||||
<tag>kernel table <m/number/</tag> Select which kernel table should
|
<tag>learn <m/switch/</tag>
|
||||||
this particular instance of the Kernel protocol work with. Available
|
Enable learning of routes added to the kernel routing tables by other
|
||||||
only on systems supporting multiple routing tables.
|
routing daemons or by the system administrator. This is possible only on
|
||||||
|
systems which support identification of route authorship.
|
||||||
|
|
||||||
|
<tag>device routes <m/switch/</tag>
|
||||||
|
Enable export of device routes to the kernel routing table. By default,
|
||||||
|
such routes are rejected (with the exception of explicitly configured
|
||||||
|
device routes from the static protocol) regardless of the export filter
|
||||||
|
to protect device routes in kernel routing table (managed by OS itself)
|
||||||
|
from accidental overwriting or erasing.
|
||||||
|
|
||||||
|
<tag>kernel table <m/number/</tag>
|
||||||
|
Select which kernel table should this particular instance of the Kernel
|
||||||
|
protocol work with. Available only on systems supporting multiple
|
||||||
|
routing tables.
|
||||||
|
|
||||||
|
<tag>graceful restart <m/switch/</tag>
|
||||||
|
Participate in graceful restart recovery. If this option is enabled and
|
||||||
|
a graceful restart recovery is active, the Kernel protocol will defer
|
||||||
|
synchronization of routing tables until the end of the recovery. Note
|
||||||
|
that import of kernel routes to BIRD is not affected.
|
||||||
</descrip>
|
</descrip>
|
||||||
|
|
||||||
<sect1>Attributes
|
<sect1>Attributes
|
||||||
|
|
|
@ -32,6 +32,7 @@ Reply codes of BIRD command-line interface
|
||||||
0021 Undo requested
|
0021 Undo requested
|
||||||
0022 Undo scheduled
|
0022 Undo scheduled
|
||||||
0023 Evaluation of expression
|
0023 Evaluation of expression
|
||||||
|
0024 Graceful restart status report
|
||||||
|
|
||||||
1000 BIRD version
|
1000 BIRD version
|
||||||
1001 Interface list
|
1001 Interface list
|
||||||
|
|
|
@ -36,6 +36,8 @@ typedef struct list { /* In fact two overlayed nodes */
|
||||||
#define NODE_NEXT(n) ((void *)((NODE (n))->next))
|
#define NODE_NEXT(n) ((void *)((NODE (n))->next))
|
||||||
#define NODE_VALID(n) ((NODE (n))->next)
|
#define NODE_VALID(n) ((NODE (n))->next)
|
||||||
#define WALK_LIST(n,list) for(n=HEAD(list); NODE_VALID(n); n=NODE_NEXT(n))
|
#define WALK_LIST(n,list) for(n=HEAD(list); NODE_VALID(n); n=NODE_NEXT(n))
|
||||||
|
#define WALK_LIST2(n,nn,list,pos) \
|
||||||
|
for(nn=(list).head; NODE_VALID(nn) && (n=SKIP_BACK(typeof(*n),pos,nn)); nn=nn->next)
|
||||||
#define WALK_LIST_DELSAFE(n,nxt,list) \
|
#define WALK_LIST_DELSAFE(n,nxt,list) \
|
||||||
for(n=HEAD(list); nxt=NODE_NEXT(n); n=(void *) nxt)
|
for(n=HEAD(list); nxt=NODE_NEXT(n); n=(void *) nxt)
|
||||||
/* WALK_LIST_FIRST supposes that called code removes each processed node */
|
/* WALK_LIST_FIRST supposes that called code removes each processed node */
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "nest/bird.h"
|
#include "nest/bird.h"
|
||||||
|
#include "nest/protocol.h"
|
||||||
#include "nest/route.h"
|
#include "nest/route.h"
|
||||||
#include "nest/cli.h"
|
#include "nest/cli.h"
|
||||||
#include "conf/conf.h"
|
#include "conf/conf.h"
|
||||||
|
@ -32,6 +33,8 @@ cmd_show_status(void)
|
||||||
tm_format_datetime(tim, &config->tf_base, config->load_time);
|
tm_format_datetime(tim, &config->tf_base, config->load_time);
|
||||||
cli_msg(-1011, "Last reconfiguration on %s", tim);
|
cli_msg(-1011, "Last reconfiguration on %s", tim);
|
||||||
|
|
||||||
|
graceful_restart_show_status();
|
||||||
|
|
||||||
if (shutting_down)
|
if (shutting_down)
|
||||||
cli_msg(13, "Shutdown in progress");
|
cli_msg(13, "Shutdown in progress");
|
||||||
else if (configuring)
|
else if (configuring)
|
||||||
|
|
|
@ -49,6 +49,7 @@ CF_KEYWORDS(PASSWORD, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, INTERFA
|
||||||
CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, COMMANDS, PREEXPORT, GENERATE, ROA, MAX, FLUSH, AS)
|
CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, COMMANDS, PREEXPORT, GENERATE, ROA, MAX, FLUSH, AS)
|
||||||
CF_KEYWORDS(LISTEN, BGP, V6ONLY, DUAL, ADDRESS, PORT, PASSWORDS, DESCRIPTION, SORTED)
|
CF_KEYWORDS(LISTEN, BGP, V6ONLY, DUAL, ADDRESS, PORT, PASSWORDS, DESCRIPTION, SORTED)
|
||||||
CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP)
|
CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP)
|
||||||
|
CF_KEYWORDS(GRACEFUL, RESTART, WAIT)
|
||||||
|
|
||||||
CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT,
|
CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT,
|
||||||
RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE)
|
RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE)
|
||||||
|
@ -110,6 +111,11 @@ listen_opt:
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|
||||||
|
CF_ADDTO(conf, gr_opts)
|
||||||
|
|
||||||
|
gr_opts: GRACEFUL RESTART WAIT expr ';' { new_config->gr_wait = $4; } ;
|
||||||
|
|
||||||
|
|
||||||
/* Creation of routing tables */
|
/* Creation of routing tables */
|
||||||
|
|
||||||
tab_sorted:
|
tab_sorted:
|
||||||
|
|
585
nest/proto.c
585
nest/proto.c
|
@ -35,26 +35,53 @@ static struct proto *initial_device_proto;
|
||||||
|
|
||||||
static event *proto_flush_event;
|
static event *proto_flush_event;
|
||||||
static timer *proto_shutdown_timer;
|
static timer *proto_shutdown_timer;
|
||||||
|
static timer *gr_wait_timer;
|
||||||
|
|
||||||
|
#define GRS_NONE 0
|
||||||
|
#define GRS_INIT 1
|
||||||
|
#define GRS_ACTIVE 2
|
||||||
|
#define GRS_DONE 3
|
||||||
|
|
||||||
|
static int graceful_restart_state;
|
||||||
|
static u32 graceful_restart_locks;
|
||||||
|
|
||||||
static char *p_states[] = { "DOWN", "START", "UP", "STOP" };
|
static char *p_states[] = { "DOWN", "START", "UP", "STOP" };
|
||||||
static char *c_states[] = { "HUNGRY", "FEEDING", "HAPPY", "FLUSHING" };
|
static char *c_states[] = { "HUNGRY", "???", "HAPPY", "FLUSHING" };
|
||||||
|
|
||||||
static void proto_flush_loop(void *);
|
static void proto_flush_loop(void *);
|
||||||
static void proto_shutdown_loop(struct timer *);
|
static void proto_shutdown_loop(struct timer *);
|
||||||
static void proto_rethink_goal(struct proto *p);
|
static void proto_rethink_goal(struct proto *p);
|
||||||
|
static void proto_want_export_up(struct proto *p);
|
||||||
|
static void proto_fell_down(struct proto *p);
|
||||||
static char *proto_state_name(struct proto *p);
|
static char *proto_state_name(struct proto *p);
|
||||||
|
|
||||||
static void
|
|
||||||
proto_enqueue(list *l, struct proto *p)
|
|
||||||
{
|
|
||||||
add_tail(l, &p->n);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
proto_relink(struct proto *p)
|
proto_relink(struct proto *p)
|
||||||
{
|
{
|
||||||
list *l = NULL;
|
list *l = NULL;
|
||||||
|
|
||||||
|
switch (p->core_state)
|
||||||
|
{
|
||||||
|
case FS_HUNGRY:
|
||||||
|
l = &inactive_proto_list;
|
||||||
|
break;
|
||||||
|
case FS_HAPPY:
|
||||||
|
l = &active_proto_list;
|
||||||
|
break;
|
||||||
|
case FS_FLUSHING:
|
||||||
|
l = &flush_proto_list;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ASSERT(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
rem_node(&p->n);
|
||||||
|
add_tail(l, &p->n);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
proto_log_state_change(struct proto *p)
|
||||||
|
{
|
||||||
if (p->debug & D_STATES)
|
if (p->debug & D_STATES)
|
||||||
{
|
{
|
||||||
char *name = proto_state_name(p);
|
char *name = proto_state_name(p);
|
||||||
|
@ -66,25 +93,9 @@ proto_relink(struct proto *p)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
p->last_state_name_announced = NULL;
|
p->last_state_name_announced = NULL;
|
||||||
rem_node(&p->n);
|
|
||||||
switch (p->core_state)
|
|
||||||
{
|
|
||||||
case FS_HUNGRY:
|
|
||||||
l = &inactive_proto_list;
|
|
||||||
break;
|
|
||||||
case FS_FEEDING:
|
|
||||||
case FS_HAPPY:
|
|
||||||
l = &active_proto_list;
|
|
||||||
break;
|
|
||||||
case FS_FLUSHING:
|
|
||||||
l = &flush_proto_list;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
ASSERT(0);
|
|
||||||
}
|
|
||||||
proto_enqueue(l, p);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* proto_new - create a new protocol instance
|
* proto_new - create a new protocol instance
|
||||||
* @c: protocol configuration
|
* @c: protocol configuration
|
||||||
|
@ -126,6 +137,9 @@ proto_init_instance(struct proto *p)
|
||||||
p->attn = ev_new(p->pool);
|
p->attn = ev_new(p->pool);
|
||||||
p->attn->data = p;
|
p->attn->data = p;
|
||||||
|
|
||||||
|
if (graceful_restart_state == GRS_INIT)
|
||||||
|
p->gr_recovery = 1;
|
||||||
|
|
||||||
if (! p->proto->multitable)
|
if (! p->proto->multitable)
|
||||||
rt_lock_table(p->table);
|
rt_lock_table(p->table);
|
||||||
}
|
}
|
||||||
|
@ -137,21 +151,20 @@ extern pool *rt_table_pool;
|
||||||
* @t: routing table to connect to
|
* @t: routing table to connect to
|
||||||
* @stats: per-table protocol statistics
|
* @stats: per-table protocol statistics
|
||||||
*
|
*
|
||||||
* This function creates a connection between the protocol instance @p
|
* This function creates a connection between the protocol instance @p and the
|
||||||
* and the routing table @t, making the protocol hear all changes in
|
* routing table @t, making the protocol hear all changes in the table.
|
||||||
* the table.
|
|
||||||
*
|
*
|
||||||
* The announce hook is linked in the protocol ahook list and, if the
|
* The announce hook is linked in the protocol ahook list. Announce hooks are
|
||||||
* protocol accepts routes, also in the table ahook list. Announce
|
* allocated from the routing table resource pool and when protocol accepts
|
||||||
* hooks are allocated from the routing table resource pool, they are
|
* routes also in the table ahook list. The are linked to the table ahook list
|
||||||
* unlinked from the table ahook list after the protocol went down,
|
* and unlinked from it depending on export_state (in proto_want_export_up() and
|
||||||
* (in proto_schedule_flush()) and they are automatically freed after the
|
* proto_want_export_down()) and they are automatically freed after the protocol
|
||||||
* protocol is flushed (in proto_fell_down()).
|
* is flushed (in proto_fell_down()).
|
||||||
*
|
*
|
||||||
* Unless you want to listen to multiple routing tables (as the Pipe
|
* Unless you want to listen to multiple routing tables (as the Pipe protocol
|
||||||
* protocol does), you needn't to worry about this function since the
|
* does), you needn't to worry about this function since the connection to the
|
||||||
* connection to the protocol's primary routing table is initialized
|
* protocol's primary routing table is initialized automatically by the core
|
||||||
* automatically by the core code.
|
* code.
|
||||||
*/
|
*/
|
||||||
struct announce_hook *
|
struct announce_hook *
|
||||||
proto_add_announce_hook(struct proto *p, struct rtable *t, struct proto_stats *stats)
|
proto_add_announce_hook(struct proto *p, struct rtable *t, struct proto_stats *stats)
|
||||||
|
@ -169,7 +182,7 @@ proto_add_announce_hook(struct proto *p, struct rtable *t, struct proto_stats *s
|
||||||
h->next = p->ahooks;
|
h->next = p->ahooks;
|
||||||
p->ahooks = h;
|
p->ahooks = h;
|
||||||
|
|
||||||
if (p->rt_notify)
|
if (p->rt_notify && (p->export_state != ES_DOWN))
|
||||||
add_tail(&t->hooks, &h->n);
|
add_tail(&t->hooks, &h->n);
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
|
@ -193,6 +206,16 @@ proto_find_announce_hook(struct proto *p, struct rtable *t)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
proto_link_ahooks(struct proto *p)
|
||||||
|
{
|
||||||
|
struct announce_hook *h;
|
||||||
|
|
||||||
|
if (p->rt_notify)
|
||||||
|
for(h=p->ahooks; h; h=h->next)
|
||||||
|
add_tail(&h->table->hooks, &h->n);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
proto_unlink_ahooks(struct proto *p)
|
proto_unlink_ahooks(struct proto *p)
|
||||||
{
|
{
|
||||||
|
@ -362,9 +385,11 @@ proto_init(struct proto_config *c)
|
||||||
|
|
||||||
q->proto_state = PS_DOWN;
|
q->proto_state = PS_DOWN;
|
||||||
q->core_state = FS_HUNGRY;
|
q->core_state = FS_HUNGRY;
|
||||||
|
q->export_state = ES_DOWN;
|
||||||
q->last_state_change = now;
|
q->last_state_change = now;
|
||||||
|
|
||||||
proto_enqueue(&initial_proto_list, q);
|
add_tail(&initial_proto_list, &q->n);
|
||||||
|
|
||||||
if (p == &proto_unix_iface)
|
if (p == &proto_unix_iface)
|
||||||
initial_device_proto = q;
|
initial_device_proto = q;
|
||||||
|
|
||||||
|
@ -590,6 +615,7 @@ static void
|
||||||
proto_rethink_goal(struct proto *p)
|
proto_rethink_goal(struct proto *p)
|
||||||
{
|
{
|
||||||
struct protocol *q;
|
struct protocol *q;
|
||||||
|
byte goal;
|
||||||
|
|
||||||
if (p->reconfiguring && p->core_state == FS_HUNGRY && p->proto_state == PS_DOWN)
|
if (p->reconfiguring && p->core_state == FS_HUNGRY && p->proto_state == PS_DOWN)
|
||||||
{
|
{
|
||||||
|
@ -606,22 +632,14 @@ proto_rethink_goal(struct proto *p)
|
||||||
|
|
||||||
/* Determine what state we want to reach */
|
/* Determine what state we want to reach */
|
||||||
if (p->disabled || p->reconfiguring)
|
if (p->disabled || p->reconfiguring)
|
||||||
{
|
goal = PS_DOWN;
|
||||||
p->core_goal = FS_HUNGRY;
|
|
||||||
if (p->core_state == FS_HUNGRY && p->proto_state == PS_DOWN)
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
goal = PS_UP;
|
||||||
p->core_goal = FS_HAPPY;
|
|
||||||
if (p->core_state == FS_HAPPY && p->proto_state == PS_UP)
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
q = p->proto;
|
q = p->proto;
|
||||||
if (p->core_goal == FS_HAPPY) /* Going up */
|
if (goal == PS_UP) /* Going up */
|
||||||
{
|
{
|
||||||
if (p->core_state == FS_HUNGRY && p->proto_state == PS_DOWN)
|
if (p->proto_state == PS_DOWN && p->core_state == FS_HUNGRY)
|
||||||
{
|
{
|
||||||
DBG("Kicking %s up\n", p->name);
|
DBG("Kicking %s up\n", p->name);
|
||||||
PD(p, "Starting");
|
PD(p, "Starting");
|
||||||
|
@ -640,6 +658,178 @@ proto_rethink_goal(struct proto *p)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DOC: Graceful restart recovery
|
||||||
|
*
|
||||||
|
* Graceful restart of a router is a process when the routing plane (e.g. BIRD)
|
||||||
|
* restarts but both the forwarding plane (e.g kernel routing table) and routing
|
||||||
|
* neighbors keep proper routes, and therefore uninterrupted packet forwarding
|
||||||
|
* is maintained.
|
||||||
|
*
|
||||||
|
* BIRD implements graceful restart recovery by deferring export of routes to
|
||||||
|
* protocols until routing tables are refilled with the expected content. After
|
||||||
|
* start, protocols generate routes as usual, but routes are not propagated to
|
||||||
|
* them, until protocols report that they generated all routes. After that,
|
||||||
|
* graceful restart recovery is finished and the export (and the initial feed)
|
||||||
|
* to protocols is enabled.
|
||||||
|
*
|
||||||
|
* When graceful restart recovery need is detected during initialization, then
|
||||||
|
* enabled protocols are marked with @gr_recovery flag before start. Such
|
||||||
|
* protocols then decide how to proceed with graceful restart, participation is
|
||||||
|
* voluntary. Protocols could lock the recovery by proto_graceful_restart_lock()
|
||||||
|
* (stored in @gr_lock flag), which means that they want to postpone the end of
|
||||||
|
* the recovery until they converge and then unlock it. They also could set
|
||||||
|
* @gr_wait before advancing to %PS_UP, which means that the core should defer
|
||||||
|
* route export to that protocol until the end of the recovery. This should be
|
||||||
|
* done by protocols that expect their neigbors to keep the proper routes
|
||||||
|
* (kernel table, BGP sessions with BGP graceful restart capability).
|
||||||
|
*
|
||||||
|
* The graceful restart recovery is finished when either all graceful restart
|
||||||
|
* locks are unlocked or when graceful restart wait timer fires.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void graceful_restart_done(struct timer *t);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* graceful_restart_recovery - request initial graceful restart recovery
|
||||||
|
*
|
||||||
|
* Called by the platform initialization code if the need for recovery
|
||||||
|
* after graceful restart is detected during boot. Have to be called
|
||||||
|
* before protos_commit().
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
graceful_restart_recovery(void)
|
||||||
|
{
|
||||||
|
graceful_restart_state = GRS_INIT;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* graceful_restart_init - initialize graceful restart
|
||||||
|
*
|
||||||
|
* When graceful restart recovery was requested, the function starts an active
|
||||||
|
* phase of the recovery and initializes graceful restart wait timer. The
|
||||||
|
* function have to be called after protos_commit().
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
graceful_restart_init(void)
|
||||||
|
{
|
||||||
|
if (!graceful_restart_state)
|
||||||
|
return;
|
||||||
|
|
||||||
|
log(L_INFO "Graceful restart started");
|
||||||
|
|
||||||
|
if (!graceful_restart_locks)
|
||||||
|
{
|
||||||
|
graceful_restart_done(NULL);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
graceful_restart_state = GRS_ACTIVE;
|
||||||
|
gr_wait_timer = tm_new(proto_pool);
|
||||||
|
gr_wait_timer->hook = graceful_restart_done;
|
||||||
|
tm_start(gr_wait_timer, config->gr_wait);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* graceful_restart_done - finalize graceful restart
|
||||||
|
*
|
||||||
|
* When there are no locks on graceful restart, the functions finalizes the
|
||||||
|
* graceful restart recovery. Protocols postponing route export until the end of
|
||||||
|
* the recovery are awakened and the export to them is enabled. All other
|
||||||
|
* related state is cleared. The function is also called when the graceful
|
||||||
|
* restart wait timer fires (but there are still some locks).
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
graceful_restart_done(struct timer *t UNUSED)
|
||||||
|
{
|
||||||
|
struct proto *p;
|
||||||
|
node *n;
|
||||||
|
|
||||||
|
log(L_INFO "Graceful restart done");
|
||||||
|
graceful_restart_state = GRS_DONE;
|
||||||
|
|
||||||
|
WALK_LIST2(p, n, proto_list, glob_node)
|
||||||
|
{
|
||||||
|
if (!p->gr_recovery)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Resume postponed export of routes */
|
||||||
|
if ((p->proto_state == PS_UP) && p->gr_wait)
|
||||||
|
{
|
||||||
|
proto_want_export_up(p);
|
||||||
|
proto_log_state_change(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Cleanup */
|
||||||
|
p->gr_recovery = 0;
|
||||||
|
p->gr_wait = 0;
|
||||||
|
p->gr_lock = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
graceful_restart_locks = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
graceful_restart_show_status(void)
|
||||||
|
{
|
||||||
|
if (graceful_restart_state != GRS_ACTIVE)
|
||||||
|
return;
|
||||||
|
|
||||||
|
cli_msg(-24, "Graceful restart recovery in progress");
|
||||||
|
cli_msg(-24, " Waiting for %d protocols to recover", graceful_restart_locks);
|
||||||
|
cli_msg(-24, " Wait timer is %d/%d", tm_remains(gr_wait_timer), config->gr_wait);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* proto_graceful_restart_lock - lock graceful restart by protocol
|
||||||
|
* @p: protocol instance
|
||||||
|
*
|
||||||
|
* This function allows a protocol to postpone the end of graceful restart
|
||||||
|
* recovery until it converges. The lock is removed when the protocol calls
|
||||||
|
* proto_graceful_restart_unlock() or when the protocol is stopped.
|
||||||
|
*
|
||||||
|
* The function have to be called during the initial phase of graceful restart
|
||||||
|
* recovery and only for protocols that are part of graceful restart (i.e. their
|
||||||
|
* @gr_recovery is set), which means it should be called from protocol start
|
||||||
|
* hooks.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
proto_graceful_restart_lock(struct proto *p)
|
||||||
|
{
|
||||||
|
ASSERT(graceful_restart_state == GRS_INIT);
|
||||||
|
ASSERT(p->gr_recovery);
|
||||||
|
|
||||||
|
if (p->gr_lock)
|
||||||
|
return;
|
||||||
|
|
||||||
|
p->gr_lock = 1;
|
||||||
|
graceful_restart_locks++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* proto_graceful_restart_unlock - unlock graceful restart by protocol
|
||||||
|
* @p: protocol instance
|
||||||
|
*
|
||||||
|
* This function unlocks a lock from proto_graceful_restart_lock(). It is also
|
||||||
|
* automatically called when the lock holding protocol went down.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
proto_graceful_restart_unlock(struct proto *p)
|
||||||
|
{
|
||||||
|
if (!p->gr_lock)
|
||||||
|
return;
|
||||||
|
|
||||||
|
p->gr_lock = 0;
|
||||||
|
graceful_restart_locks--;
|
||||||
|
|
||||||
|
if ((graceful_restart_state == GRS_ACTIVE) && !graceful_restart_locks)
|
||||||
|
tm_start(gr_wait_timer, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* protos_dump_all - dump status of all protocols
|
* protos_dump_all - dump status of all protocols
|
||||||
*
|
*
|
||||||
|
@ -753,41 +943,23 @@ protos_build(void)
|
||||||
proto_shutdown_timer->hook = proto_shutdown_loop;
|
proto_shutdown_timer->hook = proto_shutdown_loop;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
proto_fell_down(struct proto *p)
|
|
||||||
{
|
|
||||||
DBG("Protocol %s down\n", p->name);
|
|
||||||
|
|
||||||
u32 all_routes = p->stats.imp_routes + p->stats.filt_routes;
|
|
||||||
if (all_routes != 0)
|
|
||||||
log(L_ERR "Protocol %s is down but still has %d routes", p->name, all_routes);
|
|
||||||
|
|
||||||
bzero(&p->stats, sizeof(struct proto_stats));
|
|
||||||
proto_free_ahooks(p);
|
|
||||||
|
|
||||||
if (! p->proto->multitable)
|
|
||||||
rt_unlock_table(p->table);
|
|
||||||
|
|
||||||
if (p->proto->cleanup)
|
|
||||||
p->proto->cleanup(p);
|
|
||||||
|
|
||||||
proto_rethink_goal(p);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
proto_feed_more(void *P)
|
proto_feed_more(void *P)
|
||||||
{
|
{
|
||||||
struct proto *p = P;
|
struct proto *p = P;
|
||||||
|
|
||||||
if (p->core_state != FS_FEEDING)
|
if (p->export_state != ES_FEEDING)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
DBG("Feeding protocol %s continued\n", p->name);
|
DBG("Feeding protocol %s continued\n", p->name);
|
||||||
if (rt_feed_baby(p))
|
if (rt_feed_baby(p))
|
||||||
{
|
{
|
||||||
p->core_state = FS_HAPPY;
|
DBG("Feeding protocol %s finished\n", p->name);
|
||||||
proto_relink(p);
|
p->export_state = ES_READY;
|
||||||
DBG("Protocol %s up and running\n", p->name);
|
proto_log_state_change(p);
|
||||||
|
|
||||||
|
if (p->feed_done)
|
||||||
|
p->feed_done(p);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -801,7 +973,7 @@ proto_feed_initial(void *P)
|
||||||
{
|
{
|
||||||
struct proto *p = P;
|
struct proto *p = P;
|
||||||
|
|
||||||
if (p->core_state != FS_FEEDING)
|
if (p->export_state != ES_FEEDING)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
DBG("Feeding protocol %s\n", p->name);
|
DBG("Feeding protocol %s\n", p->name);
|
||||||
|
@ -814,40 +986,10 @@ static void
|
||||||
proto_schedule_feed(struct proto *p, int initial)
|
proto_schedule_feed(struct proto *p, int initial)
|
||||||
{
|
{
|
||||||
DBG("%s: Scheduling meal\n", p->name);
|
DBG("%s: Scheduling meal\n", p->name);
|
||||||
p->core_state = FS_FEEDING;
|
|
||||||
|
p->export_state = ES_FEEDING;
|
||||||
p->refeeding = !initial;
|
p->refeeding = !initial;
|
||||||
|
|
||||||
/* FIXME: This should be changed for better support of multitable protos */
|
|
||||||
if (!initial)
|
|
||||||
{
|
|
||||||
struct announce_hook *ah;
|
|
||||||
for (ah = p->ahooks; ah; ah = ah->next)
|
|
||||||
proto_reset_limit(ah->out_limit);
|
|
||||||
|
|
||||||
/* Hack: reset exp_routes during refeed, and do not decrease it later */
|
|
||||||
p->stats.exp_routes = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Connect protocol to routing table */
|
|
||||||
if (initial && !p->proto->multitable)
|
|
||||||
{
|
|
||||||
p->main_source = rt_get_source(p, 0);
|
|
||||||
rt_lock_source(p->main_source);
|
|
||||||
|
|
||||||
p->main_ahook = proto_add_announce_hook(p, p->table, &p->stats);
|
|
||||||
p->main_ahook->in_filter = p->cf->in_filter;
|
|
||||||
p->main_ahook->out_filter = p->cf->out_filter;
|
|
||||||
p->main_ahook->rx_limit = p->cf->rx_limit;
|
|
||||||
p->main_ahook->in_limit = p->cf->in_limit;
|
|
||||||
p->main_ahook->out_limit = p->cf->out_limit;
|
|
||||||
p->main_ahook->in_keep_filtered = p->cf->in_keep_filtered;
|
|
||||||
|
|
||||||
proto_reset_limit(p->main_ahook->rx_limit);
|
|
||||||
proto_reset_limit(p->main_ahook->in_limit);
|
|
||||||
proto_reset_limit(p->main_ahook->out_limit);
|
|
||||||
}
|
|
||||||
|
|
||||||
proto_relink(p);
|
|
||||||
p->attn->hook = initial ? proto_feed_initial : proto_feed_more;
|
p->attn->hook = initial ? proto_feed_initial : proto_feed_more;
|
||||||
ev_schedule(p->attn);
|
ev_schedule(p->attn);
|
||||||
}
|
}
|
||||||
|
@ -877,7 +1019,7 @@ proto_schedule_flush_loop(void)
|
||||||
{
|
{
|
||||||
p->flushing = 1;
|
p->flushing = 1;
|
||||||
for (h=p->ahooks; h; h=h->next)
|
for (h=p->ahooks; h; h=h->next)
|
||||||
h->table->prune_state = 1;
|
rt_mark_for_prune(h->table);
|
||||||
}
|
}
|
||||||
|
|
||||||
ev_schedule(proto_flush_event);
|
ev_schedule(proto_flush_event);
|
||||||
|
@ -910,6 +1052,7 @@ proto_flush_loop(void *unused UNUSED)
|
||||||
p->flushing = 0;
|
p->flushing = 0;
|
||||||
p->core_state = FS_HUNGRY;
|
p->core_state = FS_HUNGRY;
|
||||||
proto_relink(p);
|
proto_relink(p);
|
||||||
|
proto_log_state_change(p);
|
||||||
if (p->proto_state == PS_DOWN)
|
if (p->proto_state == PS_DOWN)
|
||||||
proto_fell_down(p);
|
proto_fell_down(p);
|
||||||
goto again;
|
goto again;
|
||||||
|
@ -921,19 +1064,6 @@ proto_flush_loop(void *unused UNUSED)
|
||||||
proto_schedule_flush_loop();
|
proto_schedule_flush_loop();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
proto_schedule_flush(struct proto *p)
|
|
||||||
{
|
|
||||||
/* Need to abort feeding */
|
|
||||||
if (p->core_state == FS_FEEDING)
|
|
||||||
rt_feed_baby_abort(p);
|
|
||||||
|
|
||||||
DBG("%s: Scheduling flush\n", p->name);
|
|
||||||
p->core_state = FS_FLUSHING;
|
|
||||||
proto_relink(p);
|
|
||||||
proto_unlink_ahooks(p);
|
|
||||||
proto_schedule_flush_loop();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Temporary hack to propagate restart to BGP */
|
/* Temporary hack to propagate restart to BGP */
|
||||||
int proto_restart;
|
int proto_restart;
|
||||||
|
@ -980,9 +1110,9 @@ proto_schedule_down(struct proto *p, byte restart, byte code)
|
||||||
*
|
*
|
||||||
* Sometimes it is needed to send again all routes to the
|
* Sometimes it is needed to send again all routes to the
|
||||||
* protocol. This is called feeding and can be requested by this
|
* protocol. This is called feeding and can be requested by this
|
||||||
* function. This would cause protocol core state transition
|
* function. This would cause protocol export state transition
|
||||||
* to FS_FEEDING (during feeding) and when completed, it will
|
* to ES_FEEDING (during feeding) and when completed, it will
|
||||||
* switch back to FS_HAPPY. This function can be called even
|
* switch back to ES_READY. This function can be called even
|
||||||
* when feeding is already running, in that case it is restarted.
|
* when feeding is already running, in that case it is restarted.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
|
@ -990,8 +1120,12 @@ proto_request_feeding(struct proto *p)
|
||||||
{
|
{
|
||||||
ASSERT(p->proto_state == PS_UP);
|
ASSERT(p->proto_state == PS_UP);
|
||||||
|
|
||||||
|
/* Do nothing if we are still waiting for feeding */
|
||||||
|
if (p->export_state == ES_DOWN)
|
||||||
|
return;
|
||||||
|
|
||||||
/* If we are already feeding, we want to restart it */
|
/* If we are already feeding, we want to restart it */
|
||||||
if (p->core_state == FS_FEEDING)
|
if (p->export_state == ES_FEEDING)
|
||||||
{
|
{
|
||||||
/* Unless feeding is in initial state */
|
/* Unless feeding is in initial state */
|
||||||
if (p->attn->hook == proto_feed_initial)
|
if (p->attn->hook == proto_feed_initial)
|
||||||
|
@ -1000,7 +1134,16 @@ proto_request_feeding(struct proto *p)
|
||||||
rt_feed_baby_abort(p);
|
rt_feed_baby_abort(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* FIXME: This should be changed for better support of multitable protos */
|
||||||
|
struct announce_hook *ah;
|
||||||
|
for (ah = p->ahooks; ah; ah = ah->next)
|
||||||
|
proto_reset_limit(ah->out_limit);
|
||||||
|
|
||||||
|
/* Hack: reset exp_routes during refeed, and do not decrease it later */
|
||||||
|
p->stats.exp_routes = 0;
|
||||||
|
|
||||||
proto_schedule_feed(p, 0);
|
proto_schedule_feed(p, 0);
|
||||||
|
proto_log_state_change(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char *
|
static const char *
|
||||||
|
@ -1060,6 +1203,106 @@ proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, int dir, u32
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
proto_want_core_up(struct proto *p)
|
||||||
|
{
|
||||||
|
ASSERT(p->core_state == FS_HUNGRY);
|
||||||
|
|
||||||
|
if (!p->proto->multitable)
|
||||||
|
{
|
||||||
|
p->main_source = rt_get_source(p, 0);
|
||||||
|
rt_lock_source(p->main_source);
|
||||||
|
|
||||||
|
/* Connect protocol to routing table */
|
||||||
|
p->main_ahook = proto_add_announce_hook(p, p->table, &p->stats);
|
||||||
|
p->main_ahook->in_filter = p->cf->in_filter;
|
||||||
|
p->main_ahook->out_filter = p->cf->out_filter;
|
||||||
|
p->main_ahook->rx_limit = p->cf->rx_limit;
|
||||||
|
p->main_ahook->in_limit = p->cf->in_limit;
|
||||||
|
p->main_ahook->out_limit = p->cf->out_limit;
|
||||||
|
p->main_ahook->in_keep_filtered = p->cf->in_keep_filtered;
|
||||||
|
|
||||||
|
proto_reset_limit(p->main_ahook->rx_limit);
|
||||||
|
proto_reset_limit(p->main_ahook->in_limit);
|
||||||
|
proto_reset_limit(p->main_ahook->out_limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
p->core_state = FS_HAPPY;
|
||||||
|
proto_relink(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
proto_want_export_up(struct proto *p)
|
||||||
|
{
|
||||||
|
ASSERT(p->core_state == CS_HAPPY);
|
||||||
|
ASSERT(p->export_state == ES_DOWN);
|
||||||
|
|
||||||
|
proto_link_ahooks(p);
|
||||||
|
proto_schedule_feed(p, 1); /* Sets ES_FEEDING */
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
proto_want_export_down(struct proto *p)
|
||||||
|
{
|
||||||
|
ASSERT(p->export_state != ES_DOWN);
|
||||||
|
|
||||||
|
/* Need to abort feeding */
|
||||||
|
if (p->export_state == ES_FEEDING)
|
||||||
|
rt_feed_baby_abort(p);
|
||||||
|
|
||||||
|
p->export_state = ES_DOWN;
|
||||||
|
proto_unlink_ahooks(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
proto_want_core_down(struct proto *p)
|
||||||
|
{
|
||||||
|
ASSERT(p->core_state == CS_HAPPY);
|
||||||
|
ASSERT(p->export_state == ES_DOWN);
|
||||||
|
|
||||||
|
p->core_state = FS_FLUSHING;
|
||||||
|
proto_relink(p);
|
||||||
|
proto_schedule_flush_loop();
|
||||||
|
|
||||||
|
if (!p->proto->multitable)
|
||||||
|
{
|
||||||
|
rt_unlock_source(p->main_source);
|
||||||
|
p->main_source = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
proto_falling_down(struct proto *p)
|
||||||
|
{
|
||||||
|
p->gr_recovery = 0;
|
||||||
|
p->gr_wait = 0;
|
||||||
|
if (p->gr_lock)
|
||||||
|
proto_graceful_restart_unlock(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
proto_fell_down(struct proto *p)
|
||||||
|
{
|
||||||
|
DBG("Protocol %s down\n", p->name);
|
||||||
|
|
||||||
|
u32 all_routes = p->stats.imp_routes + p->stats.filt_routes;
|
||||||
|
if (all_routes != 0)
|
||||||
|
log(L_ERR "Protocol %s is down but still has %d routes", p->name, all_routes);
|
||||||
|
|
||||||
|
bzero(&p->stats, sizeof(struct proto_stats));
|
||||||
|
proto_free_ahooks(p);
|
||||||
|
|
||||||
|
if (! p->proto->multitable)
|
||||||
|
rt_unlock_table(p->table);
|
||||||
|
|
||||||
|
if (p->proto->cleanup)
|
||||||
|
p->proto->cleanup(p);
|
||||||
|
|
||||||
|
proto_rethink_goal(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* proto_notify_state - notify core about protocol state change
|
* proto_notify_state - notify core about protocol state change
|
||||||
* @p: protocol the state of which has changed
|
* @p: protocol the state of which has changed
|
||||||
|
@ -1079,6 +1322,7 @@ proto_notify_state(struct proto *p, unsigned ps)
|
||||||
{
|
{
|
||||||
unsigned ops = p->proto_state;
|
unsigned ops = p->proto_state;
|
||||||
unsigned cs = p->core_state;
|
unsigned cs = p->core_state;
|
||||||
|
unsigned es = p->export_state;
|
||||||
|
|
||||||
DBG("%s reporting state transition %s/%s -> */%s\n", p->name, c_states[cs], p_states[ops], p_states[ps]);
|
DBG("%s reporting state transition %s/%s -> */%s\n", p->name, c_states[cs], p_states[ops], p_states[ps]);
|
||||||
if (ops == ps)
|
if (ops == ps)
|
||||||
|
@ -1089,17 +1333,47 @@ proto_notify_state(struct proto *p, unsigned ps)
|
||||||
|
|
||||||
switch (ps)
|
switch (ps)
|
||||||
{
|
{
|
||||||
|
case PS_START:
|
||||||
|
ASSERT(ops == PS_DOWN || ops == PS_UP);
|
||||||
|
ASSERT(cs == FS_HUNGRY || cs == FS_HAPPY);
|
||||||
|
|
||||||
|
if (es != ES_DOWN)
|
||||||
|
proto_want_export_down(p);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PS_UP:
|
||||||
|
ASSERT(ops == PS_DOWN || ops == PS_START);
|
||||||
|
ASSERT(cs == FS_HUNGRY || cs == FS_HAPPY);
|
||||||
|
ASSERT(es == ES_DOWN);
|
||||||
|
|
||||||
|
if (cs == FS_HUNGRY)
|
||||||
|
proto_want_core_up(p);
|
||||||
|
if (!p->gr_wait)
|
||||||
|
proto_want_export_up(p);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PS_STOP:
|
||||||
|
ASSERT(ops == PS_START || ops == PS_UP);
|
||||||
|
|
||||||
|
p->down_sched = 0;
|
||||||
|
|
||||||
|
if (es != ES_DOWN)
|
||||||
|
proto_want_export_down(p);
|
||||||
|
if (cs == FS_HAPPY)
|
||||||
|
proto_want_core_down(p);
|
||||||
|
proto_falling_down(p);
|
||||||
|
break;
|
||||||
|
|
||||||
case PS_DOWN:
|
case PS_DOWN:
|
||||||
p->down_code = 0;
|
p->down_code = 0;
|
||||||
p->down_sched = 0;
|
p->down_sched = 0;
|
||||||
if ((cs == FS_FEEDING) || (cs == FS_HAPPY))
|
|
||||||
proto_schedule_flush(p);
|
|
||||||
|
|
||||||
if (!p->proto->multitable)
|
if (es != ES_DOWN)
|
||||||
{
|
proto_want_export_down(p);
|
||||||
rt_unlock_source(p->main_source);
|
if (cs == FS_HAPPY)
|
||||||
p->main_source = NULL;
|
proto_want_core_down(p);
|
||||||
}
|
if (ops != PS_STOP)
|
||||||
|
proto_falling_down(p);
|
||||||
|
|
||||||
neigh_prune(); // FIXME convert neighbors to resource?
|
neigh_prune(); // FIXME convert neighbors to resource?
|
||||||
rfree(p->pool);
|
rfree(p->pool);
|
||||||
|
@ -1107,27 +1381,17 @@ proto_notify_state(struct proto *p, unsigned ps)
|
||||||
|
|
||||||
if (cs == FS_HUNGRY) /* Shutdown finished */
|
if (cs == FS_HUNGRY) /* Shutdown finished */
|
||||||
{
|
{
|
||||||
|
proto_log_state_change(p);
|
||||||
proto_fell_down(p);
|
proto_fell_down(p);
|
||||||
return; /* The protocol might have ceased to exist */
|
return; /* The protocol might have ceased to exist */
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case PS_START:
|
|
||||||
ASSERT(ops == PS_DOWN);
|
|
||||||
ASSERT(cs == FS_HUNGRY);
|
|
||||||
break;
|
|
||||||
case PS_UP:
|
|
||||||
ASSERT(ops == PS_DOWN || ops == PS_START);
|
|
||||||
ASSERT(cs == FS_HUNGRY);
|
|
||||||
proto_schedule_feed(p, 1);
|
|
||||||
break;
|
|
||||||
case PS_STOP:
|
|
||||||
p->down_sched = 0;
|
|
||||||
if ((cs == FS_FEEDING) || (cs == FS_HAPPY))
|
|
||||||
proto_schedule_flush(p);
|
|
||||||
break;
|
|
||||||
default:
|
default:
|
||||||
bug("Invalid state transition for %s from %s/%s to */%s", p->name, c_states[cs], p_states[ops], p_states[ps]);
|
bug("%s: Invalid state %d", p->name, ps);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
proto_log_state_change(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1141,12 +1405,18 @@ proto_state_name(struct proto *p)
|
||||||
switch (P(p->proto_state, p->core_state))
|
switch (P(p->proto_state, p->core_state))
|
||||||
{
|
{
|
||||||
case P(PS_DOWN, FS_HUNGRY): return "down";
|
case P(PS_DOWN, FS_HUNGRY): return "down";
|
||||||
case P(PS_START, FS_HUNGRY): return "start";
|
case P(PS_START, FS_HUNGRY):
|
||||||
case P(PS_UP, FS_HUNGRY):
|
case P(PS_START, FS_HAPPY): return "start";
|
||||||
case P(PS_UP, FS_FEEDING): return "feed";
|
case P(PS_UP, FS_HAPPY):
|
||||||
case P(PS_STOP, FS_HUNGRY): return "stop";
|
switch (p->export_state)
|
||||||
case P(PS_UP, FS_HAPPY): return "up";
|
{
|
||||||
case P(PS_STOP, FS_FLUSHING):
|
case ES_DOWN: return "wait";
|
||||||
|
case ES_FEEDING: return "feed";
|
||||||
|
case ES_READY: return "up";
|
||||||
|
default: return "???";
|
||||||
|
}
|
||||||
|
case P(PS_STOP, FS_HUNGRY):
|
||||||
|
case P(PS_STOP, FS_FLUSHING): return "stop";
|
||||||
case P(PS_DOWN, FS_FLUSHING): return "flush";
|
case P(PS_DOWN, FS_FLUSHING): return "flush";
|
||||||
default: return "???";
|
default: return "???";
|
||||||
}
|
}
|
||||||
|
@ -1196,6 +1466,11 @@ proto_show_basic_info(struct proto *p)
|
||||||
cli_msg(-1006, " Input filter: %s", filter_name(p->cf->in_filter));
|
cli_msg(-1006, " Input filter: %s", filter_name(p->cf->in_filter));
|
||||||
cli_msg(-1006, " Output filter: %s", filter_name(p->cf->out_filter));
|
cli_msg(-1006, " Output filter: %s", filter_name(p->cf->out_filter));
|
||||||
|
|
||||||
|
if (graceful_restart_state == GRS_ACTIVE)
|
||||||
|
cli_msg(-1006, " GR recovery: %s%s",
|
||||||
|
p->gr_lock ? " pending" : "",
|
||||||
|
p->gr_wait ? " waiting" : "");
|
||||||
|
|
||||||
proto_show_limit(p->cf->rx_limit, "Receive limit:");
|
proto_show_limit(p->cf->rx_limit, "Receive limit:");
|
||||||
proto_show_limit(p->cf->in_limit, "Import limit:");
|
proto_show_limit(p->cf->in_limit, "Import limit:");
|
||||||
proto_show_limit(p->cf->out_limit, "Export limit:");
|
proto_show_limit(p->cf->out_limit, "Export limit:");
|
||||||
|
|
|
@ -148,10 +148,13 @@ struct proto {
|
||||||
byte disabled; /* Manually disabled */
|
byte disabled; /* Manually disabled */
|
||||||
byte proto_state; /* Protocol state machine (PS_*, see below) */
|
byte proto_state; /* Protocol state machine (PS_*, see below) */
|
||||||
byte core_state; /* Core state machine (FS_*, see below) */
|
byte core_state; /* Core state machine (FS_*, see below) */
|
||||||
byte core_goal; /* State we want to reach (FS_*, see below) */
|
byte export_state; /* Route export state (ES_*, see below) */
|
||||||
byte reconfiguring; /* We're shutting down due to reconfiguration */
|
byte reconfiguring; /* We're shutting down due to reconfiguration */
|
||||||
byte refeeding; /* We are refeeding (valid only if core_state == FS_FEEDING) */
|
byte refeeding; /* We are refeeding (valid only if export_state == ES_FEEDING) */
|
||||||
byte flushing; /* Protocol is flushed in current flush loop round */
|
byte flushing; /* Protocol is flushed in current flush loop round */
|
||||||
|
byte gr_recovery; /* Protocol should participate in graceful restart recovery */
|
||||||
|
byte gr_lock; /* Graceful restart mechanism should wait for this proto */
|
||||||
|
byte gr_wait; /* Route export to protocol is postponed until graceful restart */
|
||||||
byte down_sched; /* Shutdown is scheduled for later (PDS_*) */
|
byte down_sched; /* Shutdown is scheduled for later (PDS_*) */
|
||||||
byte down_code; /* Reason for shutdown (PDC_* codes) */
|
byte down_code; /* Reason for shutdown (PDC_* codes) */
|
||||||
u32 hash_key; /* Random key used for hashing of neighbors */
|
u32 hash_key; /* Random key used for hashing of neighbors */
|
||||||
|
@ -175,6 +178,7 @@ struct proto {
|
||||||
* reload_routes Request protocol to reload all its routes to the core
|
* reload_routes Request protocol to reload all its routes to the core
|
||||||
* (using rte_update()). Returns: 0=reload cannot be done,
|
* (using rte_update()). Returns: 0=reload cannot be done,
|
||||||
* 1= reload is scheduled and will happen (asynchronously).
|
* 1= reload is scheduled and will happen (asynchronously).
|
||||||
|
* feed_done Notify protocol about finish of route feeding.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void (*if_notify)(struct proto *, unsigned flags, struct iface *i);
|
void (*if_notify)(struct proto *, unsigned flags, struct iface *i);
|
||||||
|
@ -185,6 +189,7 @@ struct proto {
|
||||||
void (*store_tmp_attrs)(struct rte *rt, struct ea_list *attrs);
|
void (*store_tmp_attrs)(struct rte *rt, struct ea_list *attrs);
|
||||||
int (*import_control)(struct proto *, struct rte **rt, struct ea_list **attrs, struct linpool *pool);
|
int (*import_control)(struct proto *, struct rte **rt, struct ea_list **attrs, struct linpool *pool);
|
||||||
int (*reload_routes)(struct proto *);
|
int (*reload_routes)(struct proto *);
|
||||||
|
void (*feed_done)(struct proto *);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Routing entry hooks (called only for routes belonging to this protocol):
|
* Routing entry hooks (called only for routes belonging to this protocol):
|
||||||
|
@ -242,6 +247,13 @@ static inline void
|
||||||
proto_copy_rest(struct proto_config *dest, struct proto_config *src, unsigned size)
|
proto_copy_rest(struct proto_config *dest, struct proto_config *src, unsigned size)
|
||||||
{ memcpy(dest + 1, src + 1, size - sizeof(struct proto_config)); }
|
{ memcpy(dest + 1, src + 1, size - sizeof(struct proto_config)); }
|
||||||
|
|
||||||
|
void graceful_restart_recovery(void);
|
||||||
|
void graceful_restart_init(void);
|
||||||
|
void graceful_restart_show_status(void);
|
||||||
|
void proto_graceful_restart_lock(struct proto *p);
|
||||||
|
void proto_graceful_restart_unlock(struct proto *p);
|
||||||
|
|
||||||
|
#define DEFAULT_GR_WAIT 240
|
||||||
|
|
||||||
void proto_show_limit(struct proto_limit *l, const char *dsc);
|
void proto_show_limit(struct proto_limit *l, const char *dsc);
|
||||||
void proto_show_basic_info(struct proto *p);
|
void proto_show_basic_info(struct proto *p);
|
||||||
|
@ -343,10 +355,17 @@ void proto_notify_state(struct proto *p, unsigned state);
|
||||||
* as a result of received ROUTE-REFRESH request).
|
* as a result of received ROUTE-REFRESH request).
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define FS_HUNGRY 0
|
#define FS_HUNGRY 0
|
||||||
#define FS_FEEDING 1
|
#define FS_FEEDING 1 /* obsolete */
|
||||||
#define FS_HAPPY 2
|
#define FS_HAPPY 2
|
||||||
#define FS_FLUSHING 3
|
#define FS_FLUSHING 3
|
||||||
|
|
||||||
|
|
||||||
|
#define ES_DOWN 0
|
||||||
|
#define ES_FEEDING 1
|
||||||
|
#define ES_READY 2
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Debugging flags
|
* Debugging flags
|
||||||
|
|
17
nest/route.h
17
nest/route.h
|
@ -148,6 +148,10 @@ typedef struct rtable {
|
||||||
struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */
|
struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */
|
||||||
} rtable;
|
} rtable;
|
||||||
|
|
||||||
|
#define RPS_NONE 0
|
||||||
|
#define RPS_SCHEDULED 1
|
||||||
|
#define RPS_RUNNING 2
|
||||||
|
|
||||||
typedef struct network {
|
typedef struct network {
|
||||||
struct fib_node n; /* FIB flags reserved for kernel syncer */
|
struct fib_node n; /* FIB flags reserved for kernel syncer */
|
||||||
struct rte *routes; /* Available routes for this network */
|
struct rte *routes; /* Available routes for this network */
|
||||||
|
@ -222,6 +226,8 @@ typedef struct rte {
|
||||||
|
|
||||||
#define REF_COW 1 /* Copy this rte on write */
|
#define REF_COW 1 /* Copy this rte on write */
|
||||||
#define REF_FILTERED 2 /* Route is rejected by import filter */
|
#define REF_FILTERED 2 /* Route is rejected by import filter */
|
||||||
|
#define REF_STALE 4 /* Route is stale in a refresh cycle */
|
||||||
|
#define REF_DISCARD 8 /* Route is scheduled for discard */
|
||||||
|
|
||||||
/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */
|
/* Route is valid for propagation (may depend on other flags in the future), accepts NULL */
|
||||||
static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); }
|
static inline int rte_is_valid(rte *r) { return r && !(r->flags & REF_FILTERED); }
|
||||||
|
@ -257,6 +263,8 @@ void rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *s
|
||||||
static inline void rte_update(struct proto *p, net *net, rte *new) { rte_update2(p->main_ahook, net, new, p->main_source); }
|
static inline void rte_update(struct proto *p, net *net, rte *new) { rte_update2(p->main_ahook, net, new, p->main_source); }
|
||||||
void rte_discard(rtable *tab, rte *old);
|
void rte_discard(rtable *tab, rte *old);
|
||||||
int rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter);
|
int rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter *filter);
|
||||||
|
void rt_refresh_begin(rtable *t, struct announce_hook *ah);
|
||||||
|
void rt_refresh_end(rtable *t, struct announce_hook *ah);
|
||||||
void rte_dump(rte *);
|
void rte_dump(rte *);
|
||||||
void rte_free(rte *);
|
void rte_free(rte *);
|
||||||
rte *rte_do_cow(rte *);
|
rte *rte_do_cow(rte *);
|
||||||
|
@ -268,6 +276,15 @@ void rt_feed_baby_abort(struct proto *p);
|
||||||
int rt_prune_loop(void);
|
int rt_prune_loop(void);
|
||||||
struct rtable_config *rt_new_table(struct symbol *s);
|
struct rtable_config *rt_new_table(struct symbol *s);
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
rt_mark_for_prune(rtable *tab)
|
||||||
|
{
|
||||||
|
if (tab->prune_state == RPS_RUNNING)
|
||||||
|
fit_get(&tab->fib, &tab->prune_fit);
|
||||||
|
|
||||||
|
tab->prune_state = RPS_SCHEDULED;
|
||||||
|
}
|
||||||
|
|
||||||
struct rt_show_data {
|
struct rt_show_data {
|
||||||
ip_addr prefix;
|
ip_addr prefix;
|
||||||
unsigned pxlen;
|
unsigned pxlen;
|
||||||
|
|
153
nest/rt-table.c
153
nest/rt-table.c
|
@ -55,8 +55,10 @@ static void rt_free_hostcache(rtable *tab);
|
||||||
static void rt_notify_hostcache(rtable *tab, net *net);
|
static void rt_notify_hostcache(rtable *tab, net *net);
|
||||||
static void rt_update_hostcache(rtable *tab);
|
static void rt_update_hostcache(rtable *tab);
|
||||||
static void rt_next_hop_update(rtable *tab);
|
static void rt_next_hop_update(rtable *tab);
|
||||||
|
static inline int rt_prune_table(rtable *tab);
|
||||||
static inline void rt_schedule_gc(rtable *tab);
|
static inline void rt_schedule_gc(rtable *tab);
|
||||||
|
static inline void rt_schedule_prune(rtable *tab);
|
||||||
|
|
||||||
|
|
||||||
static inline struct ea_list *
|
static inline struct ea_list *
|
||||||
make_tmp_attrs(struct rte *rt, struct linpool *pool)
|
make_tmp_attrs(struct rte *rt, struct linpool *pool)
|
||||||
|
@ -570,7 +572,7 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *befo
|
||||||
struct announce_hook *a;
|
struct announce_hook *a;
|
||||||
WALK_LIST(a, tab->hooks)
|
WALK_LIST(a, tab->hooks)
|
||||||
{
|
{
|
||||||
ASSERT(a->proto->core_state == FS_HAPPY || a->proto->core_state == FS_FEEDING);
|
ASSERT(a->proto->export_state != ES_DOWN);
|
||||||
if (a->proto->accept_ra_types == type)
|
if (a->proto->accept_ra_types == type)
|
||||||
if (type == RA_ACCEPTED)
|
if (type == RA_ACCEPTED)
|
||||||
rt_notify_accepted(a, net, new, old, before_old, tmpa, 0);
|
rt_notify_accepted(a, net, new, old, before_old, tmpa, 0);
|
||||||
|
@ -1108,6 +1110,69 @@ rt_examine(rtable *t, ip_addr prefix, int pxlen, struct proto *p, struct filter
|
||||||
return v > 0;
|
return v > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* rt_refresh_begin - start a refresh cycle
|
||||||
|
* @t: related routing table
|
||||||
|
* @ah: related announce hook
|
||||||
|
*
|
||||||
|
* This function starts a refresh cycle for given routing table and announce
|
||||||
|
* hook. The refresh cycle is a sequence where the protocol sends all its valid
|
||||||
|
* routes to the routing table (by rte_update()). After that, all protocol
|
||||||
|
* routes (more precisely routes with @ah as @sender) not sent during the
|
||||||
|
* refresh cycle but still in the table from the past are pruned. This is
|
||||||
|
* implemented by marking all related routes as stale by REF_STALE flag in
|
||||||
|
* rt_refresh_begin(), then marking all related stale routes with REF_DISCARD
|
||||||
|
* flag in rt_refresh_end() and then removing such routes in the prune loop.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
rt_refresh_begin(rtable *t, struct announce_hook *ah)
|
||||||
|
{
|
||||||
|
net *n;
|
||||||
|
rte *e;
|
||||||
|
|
||||||
|
FIB_WALK(&t->fib, fn)
|
||||||
|
{
|
||||||
|
n = (net *) fn;
|
||||||
|
for (e = n->routes; e; e = e->next)
|
||||||
|
if (e->sender == ah)
|
||||||
|
e->flags |= REF_STALE;
|
||||||
|
}
|
||||||
|
FIB_WALK_END;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* rt_refresh_end - end a refresh cycle
|
||||||
|
* @t: related routing table
|
||||||
|
* @ah: related announce hook
|
||||||
|
*
|
||||||
|
* This function starts a refresh cycle for given routing table and announce
|
||||||
|
* hook. See rt_refresh_begin() for description of refresh cycles.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
rt_refresh_end(rtable *t, struct announce_hook *ah)
|
||||||
|
{
|
||||||
|
int prune = 0;
|
||||||
|
net *n;
|
||||||
|
rte *e;
|
||||||
|
|
||||||
|
FIB_WALK(&t->fib, fn)
|
||||||
|
{
|
||||||
|
n = (net *) fn;
|
||||||
|
for (e = n->routes; e; e = e->next)
|
||||||
|
if ((e->sender == ah) && (e->flags & REF_STALE))
|
||||||
|
{
|
||||||
|
e->flags |= REF_DISCARD;
|
||||||
|
prune = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FIB_WALK_END;
|
||||||
|
|
||||||
|
if (prune)
|
||||||
|
rt_schedule_prune(t);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* rte_dump - dump a route
|
* rte_dump - dump a route
|
||||||
* @e: &rte to be dumped
|
* @e: &rte to be dumped
|
||||||
|
@ -1169,6 +1234,13 @@ rt_dump_all(void)
|
||||||
rt_dump(t);
|
rt_dump(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
rt_schedule_prune(rtable *tab)
|
||||||
|
{
|
||||||
|
rt_mark_for_prune(tab);
|
||||||
|
ev_schedule(tab->rt_event);
|
||||||
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
rt_schedule_gc(rtable *tab)
|
rt_schedule_gc(rtable *tab)
|
||||||
{
|
{
|
||||||
|
@ -1199,6 +1271,7 @@ rt_schedule_nhu(rtable *tab)
|
||||||
tab->nhu_state |= 1;
|
tab->nhu_state |= 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
rt_prune_nets(rtable *tab)
|
rt_prune_nets(rtable *tab)
|
||||||
{
|
{
|
||||||
|
@ -1242,6 +1315,14 @@ rt_event(void *ptr)
|
||||||
if (tab->nhu_state)
|
if (tab->nhu_state)
|
||||||
rt_next_hop_update(tab);
|
rt_next_hop_update(tab);
|
||||||
|
|
||||||
|
if (tab->prune_state)
|
||||||
|
if (!rt_prune_table(tab))
|
||||||
|
{
|
||||||
|
/* Table prune unfinished */
|
||||||
|
ev_schedule(tab->rt_event);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (tab->gc_scheduled)
|
if (tab->gc_scheduled)
|
||||||
{
|
{
|
||||||
rt_prune_nets(tab);
|
rt_prune_nets(tab);
|
||||||
|
@ -1283,8 +1364,8 @@ rt_init(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline int
|
static int
|
||||||
rt_prune_step(rtable *tab, int step, int *max_feed)
|
rt_prune_step(rtable *tab, int step, int *limit)
|
||||||
{
|
{
|
||||||
static struct rate_limit rl_flush;
|
static struct rate_limit rl_flush;
|
||||||
struct fib_iterator *fit = &tab->prune_fit;
|
struct fib_iterator *fit = &tab->prune_fit;
|
||||||
|
@ -1294,13 +1375,13 @@ rt_prune_step(rtable *tab, int step, int *max_feed)
|
||||||
fib_check(&tab->fib);
|
fib_check(&tab->fib);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (tab->prune_state == 0)
|
if (tab->prune_state == RPS_NONE)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
if (tab->prune_state == 1)
|
if (tab->prune_state == RPS_SCHEDULED)
|
||||||
{
|
{
|
||||||
FIB_ITERATE_INIT(fit, &tab->fib);
|
FIB_ITERATE_INIT(fit, &tab->fib);
|
||||||
tab->prune_state = 2;
|
tab->prune_state = RPS_RUNNING;
|
||||||
}
|
}
|
||||||
|
|
||||||
again:
|
again:
|
||||||
|
@ -1312,9 +1393,10 @@ again:
|
||||||
rescan:
|
rescan:
|
||||||
for (e=n->routes; e; e=e->next)
|
for (e=n->routes; e; e=e->next)
|
||||||
if (e->sender->proto->flushing ||
|
if (e->sender->proto->flushing ||
|
||||||
|
(e->flags & REF_DISCARD) ||
|
||||||
(step && e->attrs->src->proto->flushing))
|
(step && e->attrs->src->proto->flushing))
|
||||||
{
|
{
|
||||||
if (*max_feed <= 0)
|
if (*limit <= 0)
|
||||||
{
|
{
|
||||||
FIB_ITERATE_PUT(fit, fn);
|
FIB_ITERATE_PUT(fit, fn);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1325,7 +1407,7 @@ again:
|
||||||
n->n.prefix, n->n.pxlen, e->attrs->src->proto->name, tab->name);
|
n->n.prefix, n->n.pxlen, e->attrs->src->proto->name, tab->name);
|
||||||
|
|
||||||
rte_discard(tab, e);
|
rte_discard(tab, e);
|
||||||
(*max_feed)--;
|
(*limit)--;
|
||||||
|
|
||||||
goto rescan;
|
goto rescan;
|
||||||
}
|
}
|
||||||
|
@ -1342,41 +1424,60 @@ again:
|
||||||
fib_check(&tab->fib);
|
fib_check(&tab->fib);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
tab->prune_state = 0;
|
tab->prune_state = RPS_NONE;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* rt_prune_table - prune a routing table
|
||||||
|
*
|
||||||
|
* This function scans the routing table @tab and removes routes belonging to
|
||||||
|
* flushing protocols, discarded routes and also stale network entries, in a
|
||||||
|
* similar fashion like rt_prune_loop(). Returns 1 when all such routes are
|
||||||
|
* pruned. Contrary to rt_prune_loop(), this function is not a part of the
|
||||||
|
* protocol flushing loop, but it is called from rt_event() for just one routing
|
||||||
|
* table.
|
||||||
|
*
|
||||||
|
* Note that rt_prune_table() and rt_prune_loop() share (for each table) the
|
||||||
|
* prune state (@prune_state) and also the pruning iterator (@prune_fit).
|
||||||
|
*/
|
||||||
|
static inline int
|
||||||
|
rt_prune_table(rtable *tab)
|
||||||
|
{
|
||||||
|
int limit = 512;
|
||||||
|
return rt_prune_step(tab, 0, &limit);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* rt_prune_loop - prune routing tables
|
* rt_prune_loop - prune routing tables
|
||||||
*
|
*
|
||||||
* The prune loop scans routing tables and removes routes belonging to
|
* The prune loop scans routing tables and removes routes belonging to flushing
|
||||||
* flushing protocols and also stale network entries. Returns 1 when
|
* protocols, discarded routes and also stale network entries. Returns 1 when
|
||||||
* all such routes are pruned. It is a part of the protocol flushing
|
* all such routes are pruned. It is a part of the protocol flushing loop.
|
||||||
* loop.
|
|
||||||
*
|
*
|
||||||
* The prune loop runs in two steps. In the first step it prunes just
|
* The prune loop runs in two steps. In the first step it prunes just the routes
|
||||||
* the routes with flushing senders (in explicitly marked tables) so
|
* with flushing senders (in explicitly marked tables) so the route removal is
|
||||||
* the route removal is propagated as usual. In the second step, all
|
* propagated as usual. In the second step, all remaining relevant routes are
|
||||||
* remaining relevant routes are removed. Ideally, there shouldn't be
|
* removed. Ideally, there shouldn't be any, but it happens when pipe filters
|
||||||
* any, but it happens when pipe filters are changed.
|
* are changed.
|
||||||
*/
|
*/
|
||||||
int
|
int
|
||||||
rt_prune_loop(void)
|
rt_prune_loop(void)
|
||||||
{
|
{
|
||||||
static int step = 0;
|
static int step = 0;
|
||||||
int max_feed = 512;
|
int limit = 512;
|
||||||
rtable *t;
|
rtable *t;
|
||||||
|
|
||||||
again:
|
again:
|
||||||
WALK_LIST(t, routing_tables)
|
WALK_LIST(t, routing_tables)
|
||||||
if (! rt_prune_step(t, step, &max_feed))
|
if (! rt_prune_step(t, step, &limit))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (step == 0)
|
if (step == 0)
|
||||||
{
|
{
|
||||||
/* Prepare for the second step */
|
/* Prepare for the second step */
|
||||||
WALK_LIST(t, routing_tables)
|
WALK_LIST(t, routing_tables)
|
||||||
t->prune_state = 1;
|
t->prune_state = RPS_SCHEDULED;
|
||||||
|
|
||||||
step = 1;
|
step = 1;
|
||||||
goto again;
|
goto again;
|
||||||
|
@ -1721,7 +1822,7 @@ again:
|
||||||
(p->accept_ra_types == RA_ACCEPTED))
|
(p->accept_ra_types == RA_ACCEPTED))
|
||||||
if (rte_is_valid(e))
|
if (rte_is_valid(e))
|
||||||
{
|
{
|
||||||
if (p->core_state != FS_FEEDING)
|
if (p->export_state != ES_FEEDING)
|
||||||
return 1; /* In the meantime, the protocol fell down. */
|
return 1; /* In the meantime, the protocol fell down. */
|
||||||
do_feed_baby(p, p->accept_ra_types, h, n, e);
|
do_feed_baby(p, p->accept_ra_types, h, n, e);
|
||||||
max_feed--;
|
max_feed--;
|
||||||
|
@ -1730,7 +1831,7 @@ again:
|
||||||
if (p->accept_ra_types == RA_ANY)
|
if (p->accept_ra_types == RA_ANY)
|
||||||
for(e = n->routes; rte_is_valid(e); e = e->next)
|
for(e = n->routes; rte_is_valid(e); e = e->next)
|
||||||
{
|
{
|
||||||
if (p->core_state != FS_FEEDING)
|
if (p->export_state != ES_FEEDING)
|
||||||
return 1; /* In the meantime, the protocol fell down. */
|
return 1; /* In the meantime, the protocol fell down. */
|
||||||
do_feed_baby(p, RA_ANY, h, n, e);
|
do_feed_baby(p, RA_ANY, h, n, e);
|
||||||
max_feed--;
|
max_feed--;
|
||||||
|
@ -2223,9 +2324,7 @@ rt_show_cont(struct cli *c)
|
||||||
cli_printf(c, 8004, "Stopped due to reconfiguration");
|
cli_printf(c, 8004, "Stopped due to reconfiguration");
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
if (d->export_protocol &&
|
if (d->export_protocol && (d->export_protocol->export_state == ES_DOWN))
|
||||||
d->export_protocol->core_state != FS_HAPPY &&
|
|
||||||
d->export_protocol->core_state != FS_FEEDING)
|
|
||||||
{
|
{
|
||||||
cli_printf(c, 8005, "Protocol is down");
|
cli_printf(c, 8005, "Protocol is down");
|
||||||
goto done;
|
goto done;
|
||||||
|
|
146
proto/bgp/bgp.c
146
proto/bgp/bgp.c
|
@ -51,6 +51,16 @@
|
||||||
* and bgp_encode_attrs() which does the converse. Both functions are built around a
|
* and bgp_encode_attrs() which does the converse. Both functions are built around a
|
||||||
* @bgp_attr_table array describing all important characteristics of all known attributes.
|
* @bgp_attr_table array describing all important characteristics of all known attributes.
|
||||||
* Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
|
* Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
|
||||||
|
*
|
||||||
|
* BGP protocol implements graceful restart in both restarting (local restart)
|
||||||
|
* and receiving (neighbor restart) roles. The first is handled mostly by the
|
||||||
|
* graceful restart code in the nest, BGP protocol just handles capabilities,
|
||||||
|
* sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
|
||||||
|
* The second is implemented by internal restart of the BGP state to %BS_IDLE
|
||||||
|
* and protocol state to %PS_START, but keeping the protocol up from the core
|
||||||
|
* point of view and therefore maintaining received routes. Routing table
|
||||||
|
* refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
|
||||||
|
* stale routes after reestablishment of BGP session during graceful restart.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#undef LOCAL_DEBUG
|
#undef LOCAL_DEBUG
|
||||||
|
@ -319,6 +329,7 @@ bgp_decision(void *vp)
|
||||||
DBG("BGP: Decision start\n");
|
DBG("BGP: Decision start\n");
|
||||||
if ((p->p.proto_state == PS_START)
|
if ((p->p.proto_state == PS_START)
|
||||||
&& (p->outgoing_conn.state == BS_IDLE)
|
&& (p->outgoing_conn.state == BS_IDLE)
|
||||||
|
&& (p->incoming_conn.state != BS_OPENCONFIRM)
|
||||||
&& (!p->cf->passive))
|
&& (!p->cf->passive))
|
||||||
bgp_active(p);
|
bgp_active(p);
|
||||||
|
|
||||||
|
@ -363,7 +374,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
|
||||||
|
|
||||||
/* For multi-hop BGP sessions */
|
/* For multi-hop BGP sessions */
|
||||||
if (ipa_zero(p->source_addr))
|
if (ipa_zero(p->source_addr))
|
||||||
p->source_addr = conn->sk->saddr;
|
p->source_addr = conn->sk->saddr;
|
||||||
|
|
||||||
p->conn = conn;
|
p->conn = conn;
|
||||||
p->last_error_class = 0;
|
p->last_error_class = 0;
|
||||||
|
@ -371,6 +382,20 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
|
||||||
bgp_init_bucket_table(p);
|
bgp_init_bucket_table(p);
|
||||||
bgp_init_prefix_table(p, 8);
|
bgp_init_prefix_table(p, 8);
|
||||||
|
|
||||||
|
int peer_gr_ready = conn->peer_gr_aware && !(conn->peer_gr_flags & BGP_GRF_RESTART);
|
||||||
|
|
||||||
|
if (p->p.gr_recovery && !peer_gr_ready)
|
||||||
|
proto_graceful_restart_unlock(&p->p);
|
||||||
|
|
||||||
|
if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready)
|
||||||
|
p->p.gr_wait = 1;
|
||||||
|
|
||||||
|
if (p->gr_active)
|
||||||
|
tm_stop(p->gr_timer);
|
||||||
|
|
||||||
|
if (p->gr_active && (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
|
||||||
|
bgp_graceful_restart_done(p);
|
||||||
|
|
||||||
bgp_conn_set_state(conn, BS_ESTABLISHED);
|
bgp_conn_set_state(conn, BS_ESTABLISHED);
|
||||||
proto_notify_state(&p->p, PS_UP);
|
proto_notify_state(&p->p, PS_UP);
|
||||||
}
|
}
|
||||||
|
@ -416,16 +441,86 @@ bgp_conn_enter_idle_state(struct bgp_conn *conn)
|
||||||
bgp_conn_leave_established_state(p);
|
bgp_conn_leave_established_state(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* bgp_handle_graceful_restart - handle detected BGP graceful restart
|
||||||
|
* @p: BGP instance
|
||||||
|
*
|
||||||
|
* This function is called when a BGP graceful restart of the neighbor is
|
||||||
|
* detected (when the TCP connection fails or when a new TCP connection
|
||||||
|
* appears). The function activates processing of the restart - starts routing
|
||||||
|
* table refresh cycle and activates BGP restart timer. The protocol state goes
|
||||||
|
* back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
|
||||||
|
* caller.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
bgp_handle_graceful_restart(struct bgp_proto *p)
|
||||||
|
{
|
||||||
|
ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
|
||||||
|
|
||||||
|
BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
|
||||||
|
p->gr_active ? " - already pending" : "");
|
||||||
|
proto_notify_state(&p->p, PS_START);
|
||||||
|
|
||||||
|
if (p->gr_active)
|
||||||
|
rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
|
||||||
|
|
||||||
|
p->gr_active = 1;
|
||||||
|
bgp_start_timer(p->gr_timer, p->conn->peer_gr_time);
|
||||||
|
rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* bgp_graceful_restart_done - finish active BGP graceful restart
|
||||||
|
* @p: BGP instance
|
||||||
|
*
|
||||||
|
* This function is called when the active BGP graceful restart of the neighbor
|
||||||
|
* should be finished - either successfully (the neighbor sends all paths and
|
||||||
|
* reports end-of-RIB on the new session) or unsuccessfully (the neighbor does
|
||||||
|
* not support BGP graceful restart on the new session). The function ends
|
||||||
|
* routing table refresh cycle and stops BGP restart timer.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
bgp_graceful_restart_done(struct bgp_proto *p)
|
||||||
|
{
|
||||||
|
BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
|
||||||
|
p->gr_active = 0;
|
||||||
|
tm_stop(p->gr_timer);
|
||||||
|
rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
|
||||||
|
* @t: timer
|
||||||
|
*
|
||||||
|
* This function is a timeout hook for @gr_timer, implementing BGP restart time
|
||||||
|
* limit for reestablisment of the BGP session after the graceful restart. When
|
||||||
|
* fired, we just proceed with the usual protocol restart.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static void
|
||||||
|
bgp_graceful_restart_timeout(timer *t)
|
||||||
|
{
|
||||||
|
struct bgp_proto *p = t->data;
|
||||||
|
|
||||||
|
BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
|
||||||
|
bgp_stop(p, 0);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
bgp_send_open(struct bgp_conn *conn)
|
bgp_send_open(struct bgp_conn *conn)
|
||||||
{
|
{
|
||||||
conn->start_state = conn->bgp->start_state;
|
conn->start_state = conn->bgp->start_state;
|
||||||
|
|
||||||
// Default values, possibly changed by receiving capabilities.
|
// Default values, possibly changed by receiving capabilities.
|
||||||
|
conn->advertised_as = 0;
|
||||||
conn->peer_refresh_support = 0;
|
conn->peer_refresh_support = 0;
|
||||||
conn->peer_as4_support = 0;
|
conn->peer_as4_support = 0;
|
||||||
conn->peer_add_path = 0;
|
conn->peer_add_path = 0;
|
||||||
conn->advertised_as = 0;
|
conn->peer_gr_aware = 0;
|
||||||
|
conn->peer_gr_able = 0;
|
||||||
|
conn->peer_gr_time = 0;
|
||||||
|
conn->peer_gr_flags = 0;
|
||||||
|
conn->peer_gr_aflags = 0;
|
||||||
|
|
||||||
DBG("BGP: Sending open\n");
|
DBG("BGP: Sending open\n");
|
||||||
conn->sk->rx_hook = bgp_rx;
|
conn->sk->rx_hook = bgp_rx;
|
||||||
|
@ -484,6 +579,9 @@ bgp_sock_err(sock *sk, int err)
|
||||||
else
|
else
|
||||||
BGP_TRACE(D_EVENTS, "Connection closed");
|
BGP_TRACE(D_EVENTS, "Connection closed");
|
||||||
|
|
||||||
|
if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
|
||||||
|
bgp_handle_graceful_restart(p);
|
||||||
|
|
||||||
bgp_conn_enter_idle_state(conn);
|
bgp_conn_enter_idle_state(conn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -649,6 +747,14 @@ bgp_incoming_connection(sock *sk, int dummy UNUSED)
|
||||||
int acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
|
int acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
|
||||||
(p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
|
(p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
|
||||||
|
|
||||||
|
if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
|
||||||
|
{
|
||||||
|
bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
|
||||||
|
bgp_handle_graceful_restart(p);
|
||||||
|
bgp_conn_enter_idle_state(p->conn);
|
||||||
|
acc = 1;
|
||||||
|
}
|
||||||
|
|
||||||
BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
|
BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
|
||||||
sk->daddr, ipa_has_link_scope(sk->daddr) ? sk->iface : NULL,
|
sk->daddr, ipa_has_link_scope(sk->daddr) ? sk->iface : NULL,
|
||||||
sk->dport, acc ? "accepted" : "rejected");
|
sk->dport, acc ? "accepted" : "rejected");
|
||||||
|
@ -817,6 +923,17 @@ bgp_reload_routes(struct proto *P)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
bgp_feed_done(struct proto *P)
|
||||||
|
{
|
||||||
|
struct bgp_proto *p = (struct bgp_proto *) P;
|
||||||
|
if (!p->conn || !p->cf->gr_mode || p->p.refeeding)
|
||||||
|
return;
|
||||||
|
|
||||||
|
p->send_end_mark = 1;
|
||||||
|
bgp_schedule_packet(p->conn, PKT_UPDATE);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
bgp_start_locked(struct object_lock *lock)
|
bgp_start_locked(struct object_lock *lock)
|
||||||
{
|
{
|
||||||
|
@ -867,6 +984,8 @@ bgp_start(struct proto *P)
|
||||||
p->incoming_conn.state = BS_IDLE;
|
p->incoming_conn.state = BS_IDLE;
|
||||||
p->neigh = NULL;
|
p->neigh = NULL;
|
||||||
p->bfd_req = NULL;
|
p->bfd_req = NULL;
|
||||||
|
p->gr_ready = 0;
|
||||||
|
p->gr_active = 0;
|
||||||
|
|
||||||
rt_lock_table(p->igp_table);
|
rt_lock_table(p->igp_table);
|
||||||
|
|
||||||
|
@ -878,6 +997,10 @@ bgp_start(struct proto *P)
|
||||||
p->startup_timer->hook = bgp_startup_timeout;
|
p->startup_timer->hook = bgp_startup_timeout;
|
||||||
p->startup_timer->data = p;
|
p->startup_timer->data = p;
|
||||||
|
|
||||||
|
p->gr_timer = tm_new(p->p.pool);
|
||||||
|
p->gr_timer->hook = bgp_graceful_restart_timeout;
|
||||||
|
p->gr_timer->data = p;
|
||||||
|
|
||||||
p->local_id = proto_get_router_id(P->cf);
|
p->local_id = proto_get_router_id(P->cf);
|
||||||
if (p->rr_client)
|
if (p->rr_client)
|
||||||
p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
|
p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
|
||||||
|
@ -885,6 +1008,9 @@ bgp_start(struct proto *P)
|
||||||
p->remote_id = 0;
|
p->remote_id = 0;
|
||||||
p->source_addr = p->cf->source_addr;
|
p->source_addr = p->cf->source_addr;
|
||||||
|
|
||||||
|
if (p->p.gr_recovery && p->cf->gr_mode)
|
||||||
|
proto_graceful_restart_lock(P);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Before attempting to create the connection, we need to lock the
|
* Before attempting to create the connection, we need to lock the
|
||||||
* port, so that are sure we're the only instance attempting to talk
|
* port, so that are sure we're the only instance attempting to talk
|
||||||
|
@ -985,6 +1111,7 @@ bgp_init(struct proto_config *C)
|
||||||
P->import_control = bgp_import_control;
|
P->import_control = bgp_import_control;
|
||||||
P->neigh_notify = bgp_neigh_notify;
|
P->neigh_notify = bgp_neigh_notify;
|
||||||
P->reload_routes = bgp_reload_routes;
|
P->reload_routes = bgp_reload_routes;
|
||||||
|
P->feed_done = bgp_feed_done;
|
||||||
P->rte_better = bgp_rte_better;
|
P->rte_better = bgp_rte_better;
|
||||||
P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
|
P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
|
||||||
|
|
||||||
|
@ -1164,7 +1291,7 @@ bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
|
||||||
|
|
||||||
static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
|
static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
|
||||||
static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
|
static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
|
||||||
static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "BFD session down" };
|
static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "BFD session down", "Graceful restart"};
|
||||||
static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
|
static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
|
||||||
|
|
||||||
static const char *
|
static const char *
|
||||||
|
@ -1225,25 +1352,32 @@ bgp_show_proto_info(struct proto *P)
|
||||||
cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
|
cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
|
||||||
cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
|
cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
|
||||||
|
|
||||||
|
if (p->gr_active)
|
||||||
|
cli_msg(-1006, " Neighbor graceful restart active");
|
||||||
|
|
||||||
if (P->proto_state == PS_START)
|
if (P->proto_state == PS_START)
|
||||||
{
|
{
|
||||||
struct bgp_conn *oc = &p->outgoing_conn;
|
struct bgp_conn *oc = &p->outgoing_conn;
|
||||||
|
|
||||||
if ((p->start_state < BSS_CONNECT) &&
|
if ((p->start_state < BSS_CONNECT) &&
|
||||||
(p->startup_timer->expires))
|
(p->startup_timer->expires))
|
||||||
cli_msg(-1006, " Error wait: %d/%d",
|
cli_msg(-1006, " Error wait: %d/%d",
|
||||||
p->startup_timer->expires - now, p->startup_delay);
|
p->startup_timer->expires - now, p->startup_delay);
|
||||||
|
|
||||||
if ((oc->state == BS_ACTIVE) &&
|
if ((oc->state == BS_ACTIVE) &&
|
||||||
(oc->connect_retry_timer->expires))
|
(oc->connect_retry_timer->expires))
|
||||||
cli_msg(-1006, " Start delay: %d/%d",
|
cli_msg(-1006, " Start delay: %d/%d",
|
||||||
oc->connect_retry_timer->expires - now, p->cf->start_delay_time);
|
oc->connect_retry_timer->expires - now, p->cf->start_delay_time);
|
||||||
|
|
||||||
|
if (p->gr_active && p->gr_timer->expires)
|
||||||
|
cli_msg(-1006, " Restart timer: %d/-", p->gr_timer->expires - now);
|
||||||
}
|
}
|
||||||
else if (P->proto_state == PS_UP)
|
else if (P->proto_state == PS_UP)
|
||||||
{
|
{
|
||||||
cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
|
cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
|
||||||
cli_msg(-1006, " Neighbor caps: %s%s%s%s",
|
cli_msg(-1006, " Neighbor caps: %s%s%s%s%s",
|
||||||
c->peer_refresh_support ? " refresh" : "",
|
c->peer_refresh_support ? " refresh" : "",
|
||||||
|
c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
|
||||||
c->peer_as4_support ? " AS4" : "",
|
c->peer_as4_support ? " AS4" : "",
|
||||||
(c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
|
(c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
|
||||||
(c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "");
|
(c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "");
|
||||||
|
|
|
@ -48,6 +48,8 @@ struct bgp_config {
|
||||||
int secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */
|
int secondary; /* Accept also non-best routes (i.e. RA_ACCEPTED) */
|
||||||
int add_path; /* Use ADD-PATH extension [draft] */
|
int add_path; /* Use ADD-PATH extension [draft] */
|
||||||
int allow_local_as; /* Allow that number of local ASNs in incoming AS_PATHs */
|
int allow_local_as; /* Allow that number of local ASNs in incoming AS_PATHs */
|
||||||
|
int gr_mode; /* Graceful restart mode (BGP_GR_*) */
|
||||||
|
unsigned gr_time; /* Graceful restart timeout */
|
||||||
unsigned connect_retry_time;
|
unsigned connect_retry_time;
|
||||||
unsigned hold_time, initial_hold_time;
|
unsigned hold_time, initial_hold_time;
|
||||||
unsigned keepalive_time;
|
unsigned keepalive_time;
|
||||||
|
@ -73,6 +75,15 @@ struct bgp_config {
|
||||||
#define ADD_PATH_TX 2
|
#define ADD_PATH_TX 2
|
||||||
#define ADD_PATH_FULL 3
|
#define ADD_PATH_FULL 3
|
||||||
|
|
||||||
|
#define BGP_GR_ABLE 1
|
||||||
|
#define BGP_GR_AWARE 2
|
||||||
|
|
||||||
|
/* For peer_gr_flags */
|
||||||
|
#define BGP_GRF_RESTART 0x80
|
||||||
|
|
||||||
|
/* For peer_gr_aflags */
|
||||||
|
#define BGP_GRF_FORWARDING 0x80
|
||||||
|
|
||||||
|
|
||||||
struct bgp_conn {
|
struct bgp_conn {
|
||||||
struct bgp_proto *bgp;
|
struct bgp_proto *bgp;
|
||||||
|
@ -90,6 +101,11 @@ struct bgp_conn {
|
||||||
u8 peer_refresh_support; /* Peer supports route refresh [RFC2918] */
|
u8 peer_refresh_support; /* Peer supports route refresh [RFC2918] */
|
||||||
u8 peer_as4_support; /* Peer supports 4B AS numbers [RFC4893] */
|
u8 peer_as4_support; /* Peer supports 4B AS numbers [RFC4893] */
|
||||||
u8 peer_add_path; /* Peer supports ADD-PATH [draft] */
|
u8 peer_add_path; /* Peer supports ADD-PATH [draft] */
|
||||||
|
u8 peer_gr_aware;
|
||||||
|
u8 peer_gr_able;
|
||||||
|
u16 peer_gr_time;
|
||||||
|
u8 peer_gr_flags;
|
||||||
|
u8 peer_gr_aflags;
|
||||||
unsigned hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */
|
unsigned hold_time, keepalive_time; /* Times calculated from my and neighbor's requirements */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -107,6 +123,8 @@ struct bgp_proto {
|
||||||
u32 rr_cluster_id; /* Route reflector cluster ID */
|
u32 rr_cluster_id; /* Route reflector cluster ID */
|
||||||
int rr_client; /* Whether neighbor is RR client of me */
|
int rr_client; /* Whether neighbor is RR client of me */
|
||||||
int rs_client; /* Whether neighbor is RS client of me */
|
int rs_client; /* Whether neighbor is RS client of me */
|
||||||
|
u8 gr_ready; /* Neighbor could do graceful restart */
|
||||||
|
u8 gr_active; /* Neighbor is doing graceful restart */
|
||||||
struct bgp_conn *conn; /* Connection we have established */
|
struct bgp_conn *conn; /* Connection we have established */
|
||||||
struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */
|
struct bgp_conn outgoing_conn; /* Outgoing connection we're working with */
|
||||||
struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */
|
struct bgp_conn incoming_conn; /* Incoming connection we have neither accepted nor rejected yet */
|
||||||
|
@ -117,12 +135,14 @@ struct bgp_proto {
|
||||||
rtable *igp_table; /* Table used for recursive next hop lookups */
|
rtable *igp_table; /* Table used for recursive next hop lookups */
|
||||||
struct event *event; /* Event for respawning and shutting process */
|
struct event *event; /* Event for respawning and shutting process */
|
||||||
struct timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */
|
struct timer *startup_timer; /* Timer used to delay protocol startup due to previous errors (startup_delay) */
|
||||||
|
struct timer *gr_timer; /* Timer waiting for reestablishment after graceful restart */
|
||||||
struct bgp_bucket **bucket_hash; /* Hash table of attribute buckets */
|
struct bgp_bucket **bucket_hash; /* Hash table of attribute buckets */
|
||||||
unsigned int hash_size, hash_count, hash_limit;
|
unsigned int hash_size, hash_count, hash_limit;
|
||||||
HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
|
HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
|
||||||
slab *prefix_slab; /* Slab holding prefix nodes */
|
slab *prefix_slab; /* Slab holding prefix nodes */
|
||||||
list bucket_queue; /* Queue of buckets to send */
|
list bucket_queue; /* Queue of buckets to send */
|
||||||
struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
|
struct bgp_bucket *withdraw_bucket; /* Withdrawn routes */
|
||||||
|
unsigned send_end_mark; /* End-of-RIB mark scheduled for transmit */
|
||||||
unsigned startup_delay; /* Time to delay protocol startup by due to errors */
|
unsigned startup_delay; /* Time to delay protocol startup by due to errors */
|
||||||
bird_clock_t last_proto_error; /* Time of last error that leads to protocol stop */
|
bird_clock_t last_proto_error; /* Time of last error that leads to protocol stop */
|
||||||
u8 last_error_class; /* Error class of last error */
|
u8 last_error_class; /* Error class of last error */
|
||||||
|
@ -172,6 +192,8 @@ void bgp_conn_enter_openconfirm_state(struct bgp_conn *conn);
|
||||||
void bgp_conn_enter_established_state(struct bgp_conn *conn);
|
void bgp_conn_enter_established_state(struct bgp_conn *conn);
|
||||||
void bgp_conn_enter_close_state(struct bgp_conn *conn);
|
void bgp_conn_enter_close_state(struct bgp_conn *conn);
|
||||||
void bgp_conn_enter_idle_state(struct bgp_conn *conn);
|
void bgp_conn_enter_idle_state(struct bgp_conn *conn);
|
||||||
|
void bgp_handle_graceful_restart(struct bgp_proto *p);
|
||||||
|
void bgp_graceful_restart_done(struct bgp_proto *p);
|
||||||
void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code);
|
void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code);
|
||||||
void bgp_stop(struct bgp_proto *p, unsigned subcode);
|
void bgp_stop(struct bgp_proto *p, unsigned subcode);
|
||||||
|
|
||||||
|
@ -313,6 +335,7 @@ void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsi
|
||||||
#define BEM_INVALID_MD5 3 /* MD5 authentication kernel request failed (possibly not supported) */
|
#define BEM_INVALID_MD5 3 /* MD5 authentication kernel request failed (possibly not supported) */
|
||||||
#define BEM_NO_SOCKET 4
|
#define BEM_NO_SOCKET 4
|
||||||
#define BEM_BFD_DOWN 5
|
#define BEM_BFD_DOWN 5
|
||||||
|
#define BEM_GRACEFUL_RESTART 6
|
||||||
|
|
||||||
/* Automatic shutdown error codes */
|
/* Automatic shutdown error codes */
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY,
|
||||||
PREFER, OLDER, MISSING, LLADDR, DROP, IGNORE, ROUTE, REFRESH,
|
PREFER, OLDER, MISSING, LLADDR, DROP, IGNORE, ROUTE, REFRESH,
|
||||||
INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP,
|
INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP,
|
||||||
TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC,
|
TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC,
|
||||||
SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX)
|
SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE)
|
||||||
|
|
||||||
CF_GRAMMAR
|
CF_GRAMMAR
|
||||||
|
|
||||||
|
@ -50,6 +50,8 @@ bgp_proto_start: proto_start BGP {
|
||||||
BGP_CFG->advertise_ipv4 = 1;
|
BGP_CFG->advertise_ipv4 = 1;
|
||||||
BGP_CFG->interpret_communities = 1;
|
BGP_CFG->interpret_communities = 1;
|
||||||
BGP_CFG->default_local_pref = 100;
|
BGP_CFG->default_local_pref = 100;
|
||||||
|
BGP_CFG->gr_mode = BGP_GR_AWARE;
|
||||||
|
BGP_CFG->gr_time = 120;
|
||||||
}
|
}
|
||||||
;
|
;
|
||||||
|
|
||||||
|
@ -115,6 +117,9 @@ bgp_proto:
|
||||||
| bgp_proto ADD PATHS bool ';' { BGP_CFG->add_path = $4 ? ADD_PATH_FULL : 0; }
|
| bgp_proto ADD PATHS bool ';' { BGP_CFG->add_path = $4 ? ADD_PATH_FULL : 0; }
|
||||||
| bgp_proto ALLOW LOCAL AS ';' { BGP_CFG->allow_local_as = -1; }
|
| bgp_proto ALLOW LOCAL AS ';' { BGP_CFG->allow_local_as = -1; }
|
||||||
| bgp_proto ALLOW LOCAL AS expr ';' { BGP_CFG->allow_local_as = $5; }
|
| bgp_proto ALLOW LOCAL AS expr ';' { BGP_CFG->allow_local_as = $5; }
|
||||||
|
| bgp_proto GRACEFUL RESTART bool ';' { BGP_CFG->gr_mode = $4; }
|
||||||
|
| bgp_proto GRACEFUL RESTART AWARE ';' { BGP_CFG->gr_mode = BGP_GR_AWARE; }
|
||||||
|
| bgp_proto GRACEFUL RESTART TIME expr ';' { BGP_CFG->gr_time = $5; }
|
||||||
| bgp_proto IGP TABLE rtable ';' { BGP_CFG->igp_table = $4; }
|
| bgp_proto IGP TABLE rtable ';' { BGP_CFG->igp_table = $4; }
|
||||||
| bgp_proto TTL SECURITY bool ';' { BGP_CFG->ttl_security = $4; }
|
| bgp_proto TTL SECURITY bool ';' { BGP_CFG->ttl_security = $4; }
|
||||||
| bgp_proto BFD bool ';' { BGP_CFG->bfd = $3; cf_check_bfd($3); }
|
| bgp_proto BFD bool ';' { BGP_CFG->bfd = $3; cf_check_bfd($3); }
|
||||||
|
|
|
@ -122,7 +122,7 @@ bgp_create_notification(struct bgp_conn *conn, byte *buf)
|
||||||
|
|
||||||
#ifdef IPV6
|
#ifdef IPV6
|
||||||
static byte *
|
static byte *
|
||||||
bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf)
|
bgp_put_cap_ipv6(struct bgp_proto *p UNUSED, byte *buf)
|
||||||
{
|
{
|
||||||
*buf++ = 1; /* Capability 1: Multiprotocol extensions */
|
*buf++ = 1; /* Capability 1: Multiprotocol extensions */
|
||||||
*buf++ = 4; /* Capability data length */
|
*buf++ = 4; /* Capability data length */
|
||||||
|
@ -136,7 +136,7 @@ bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf)
|
||||||
#else
|
#else
|
||||||
|
|
||||||
static byte *
|
static byte *
|
||||||
bgp_put_cap_ipv4(struct bgp_conn *conn UNUSED, byte *buf)
|
bgp_put_cap_ipv4(struct bgp_proto *p UNUSED, byte *buf)
|
||||||
{
|
{
|
||||||
*buf++ = 1; /* Capability 1: Multiprotocol extensions */
|
*buf++ = 1; /* Capability 1: Multiprotocol extensions */
|
||||||
*buf++ = 4; /* Capability data length */
|
*buf++ = 4; /* Capability data length */
|
||||||
|
@ -149,7 +149,7 @@ bgp_put_cap_ipv4(struct bgp_conn *conn UNUSED, byte *buf)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static byte *
|
static byte *
|
||||||
bgp_put_cap_rr(struct bgp_conn *conn UNUSED, byte *buf)
|
bgp_put_cap_rr(struct bgp_proto *p UNUSED, byte *buf)
|
||||||
{
|
{
|
||||||
*buf++ = 2; /* Capability 2: Support for route refresh */
|
*buf++ = 2; /* Capability 2: Support for route refresh */
|
||||||
*buf++ = 0; /* Capability data length */
|
*buf++ = 0; /* Capability data length */
|
||||||
|
@ -157,16 +157,44 @@ bgp_put_cap_rr(struct bgp_conn *conn UNUSED, byte *buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
static byte *
|
static byte *
|
||||||
bgp_put_cap_as4(struct bgp_conn *conn, byte *buf)
|
bgp_put_cap_gr1(struct bgp_proto *p, byte *buf)
|
||||||
|
{
|
||||||
|
*buf++ = 64; /* Capability 64: Support for graceful restart */
|
||||||
|
*buf++ = 6; /* Capability data length */
|
||||||
|
|
||||||
|
put_u16(buf, p->cf->gr_time);
|
||||||
|
if (p->p.gr_recovery)
|
||||||
|
buf[0] |= BGP_GRF_RESTART;
|
||||||
|
buf += 2;
|
||||||
|
|
||||||
|
*buf++ = 0; /* Appropriate AF */
|
||||||
|
*buf++ = BGP_AF;
|
||||||
|
*buf++ = 1; /* and SAFI 1 */
|
||||||
|
*buf++ = p->p.gr_recovery ? BGP_GRF_FORWARDING : 0;
|
||||||
|
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
static byte *
|
||||||
|
bgp_put_cap_gr2(struct bgp_proto *p, byte *buf)
|
||||||
|
{
|
||||||
|
*buf++ = 64; /* Capability 64: Support for graceful restart */
|
||||||
|
*buf++ = 2; /* Capability data length */
|
||||||
|
put_u16(buf, 0);
|
||||||
|
return buf + 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static byte *
|
||||||
|
bgp_put_cap_as4(struct bgp_proto *p, byte *buf)
|
||||||
{
|
{
|
||||||
*buf++ = 65; /* Capability 65: Support for 4-octet AS number */
|
*buf++ = 65; /* Capability 65: Support for 4-octet AS number */
|
||||||
*buf++ = 4; /* Capability data length */
|
*buf++ = 4; /* Capability data length */
|
||||||
put_u32(buf, conn->bgp->local_as);
|
put_u32(buf, p->local_as);
|
||||||
return buf + 4;
|
return buf + 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
static byte *
|
static byte *
|
||||||
bgp_put_cap_add_path(struct bgp_conn *conn, byte *buf)
|
bgp_put_cap_add_path(struct bgp_proto *p, byte *buf)
|
||||||
{
|
{
|
||||||
*buf++ = 69; /* Capability 69: Support for ADD-PATH */
|
*buf++ = 69; /* Capability 69: Support for ADD-PATH */
|
||||||
*buf++ = 4; /* Capability data length */
|
*buf++ = 4; /* Capability data length */
|
||||||
|
@ -175,7 +203,7 @@ bgp_put_cap_add_path(struct bgp_conn *conn, byte *buf)
|
||||||
*buf++ = BGP_AF;
|
*buf++ = BGP_AF;
|
||||||
*buf++ = 1; /* SAFI 1 */
|
*buf++ = 1; /* SAFI 1 */
|
||||||
|
|
||||||
*buf++ = conn->bgp->cf->add_path;
|
*buf++ = p->cf->add_path;
|
||||||
|
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
@ -206,21 +234,26 @@ bgp_create_open(struct bgp_conn *conn, byte *buf)
|
||||||
|
|
||||||
#ifndef IPV6
|
#ifndef IPV6
|
||||||
if (p->cf->advertise_ipv4)
|
if (p->cf->advertise_ipv4)
|
||||||
cap = bgp_put_cap_ipv4(conn, cap);
|
cap = bgp_put_cap_ipv4(p, cap);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef IPV6
|
#ifdef IPV6
|
||||||
cap = bgp_put_cap_ipv6(conn, cap);
|
cap = bgp_put_cap_ipv6(p, cap);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (p->cf->enable_refresh)
|
if (p->cf->enable_refresh)
|
||||||
cap = bgp_put_cap_rr(conn, cap);
|
cap = bgp_put_cap_rr(p, cap);
|
||||||
|
|
||||||
|
if (p->cf->gr_mode == BGP_GR_ABLE)
|
||||||
|
cap = bgp_put_cap_gr1(p, cap);
|
||||||
|
else if (p->cf->gr_mode == BGP_GR_AWARE)
|
||||||
|
cap = bgp_put_cap_gr2(p, cap);
|
||||||
|
|
||||||
if (p->cf->enable_as4)
|
if (p->cf->enable_as4)
|
||||||
cap = bgp_put_cap_as4(conn, cap);
|
cap = bgp_put_cap_as4(p, cap);
|
||||||
|
|
||||||
if (p->cf->add_path)
|
if (p->cf->add_path)
|
||||||
cap = bgp_put_cap_add_path(conn, cap);
|
cap = bgp_put_cap_add_path(p, cap);
|
||||||
|
|
||||||
cap_len = cap - buf - 12;
|
cap_len = cap - buf - 12;
|
||||||
if (cap_len > 0)
|
if (cap_len > 0)
|
||||||
|
@ -351,6 +384,16 @@ bgp_create_update(struct bgp_conn *conn, byte *buf)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static byte *
|
||||||
|
bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
|
||||||
|
{
|
||||||
|
struct bgp_proto *p = conn->bgp;
|
||||||
|
BGP_TRACE(D_PACKETS, "Sending End-of-RIB");
|
||||||
|
|
||||||
|
put_u32(buf, 0);
|
||||||
|
return buf+4;
|
||||||
|
}
|
||||||
|
|
||||||
#else /* IPv6 version */
|
#else /* IPv6 version */
|
||||||
|
|
||||||
static inline int
|
static inline int
|
||||||
|
@ -520,6 +563,26 @@ bgp_create_update(struct bgp_conn *conn, byte *buf)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static byte *
|
||||||
|
bgp_create_end_mark(struct bgp_conn *conn, byte *buf)
|
||||||
|
{
|
||||||
|
struct bgp_proto *p = conn->bgp;
|
||||||
|
BGP_TRACE(D_PACKETS, "Sending End-of-RIB");
|
||||||
|
|
||||||
|
put_u16(buf+0, 0);
|
||||||
|
put_u16(buf+2, 6); /* length 4-9 */
|
||||||
|
buf += 4;
|
||||||
|
|
||||||
|
/* Empty MP_UNREACH_NLRI atribute */
|
||||||
|
*buf++ = BAF_OPTIONAL;
|
||||||
|
*buf++ = BA_MP_UNREACH_NLRI;
|
||||||
|
*buf++ = 3; /* Length 7-9 */
|
||||||
|
*buf++ = 0; /* AFI */
|
||||||
|
*buf++ = BGP_AF_IPV6;
|
||||||
|
*buf++ = 1; /* SAFI */
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static byte *
|
static byte *
|
||||||
|
@ -606,10 +669,16 @@ bgp_fire_tx(struct bgp_conn *conn)
|
||||||
{
|
{
|
||||||
end = bgp_create_update(conn, pkt);
|
end = bgp_create_update(conn, pkt);
|
||||||
type = PKT_UPDATE;
|
type = PKT_UPDATE;
|
||||||
|
|
||||||
if (!end)
|
if (!end)
|
||||||
{
|
{
|
||||||
conn->packets_to_send = 0;
|
conn->packets_to_send = 0;
|
||||||
return 0;
|
|
||||||
|
if (!p->send_end_mark)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
p->send_end_mark = 0;
|
||||||
|
end = bgp_create_end_mark(conn, pkt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -678,6 +747,22 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
|
||||||
conn->peer_refresh_support = 1;
|
conn->peer_refresh_support = 1;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 64: /* Graceful restart capability, RFC 4724 */
|
||||||
|
if (cl % 4 != 2)
|
||||||
|
goto err;
|
||||||
|
conn->peer_gr_aware = 1;
|
||||||
|
conn->peer_gr_able = 0;
|
||||||
|
conn->peer_gr_time = get_u16(opt + 2) & 0x0fff;
|
||||||
|
conn->peer_gr_flags = opt[2] & 0xf0;
|
||||||
|
conn->peer_gr_aflags = 0;
|
||||||
|
for (i = 2; i < cl; i += 4)
|
||||||
|
if (opt[2+i+0] == 0 && opt[2+i+1] == BGP_AF && opt[2+i+2] == 1) /* Match AFI/SAFI */
|
||||||
|
{
|
||||||
|
conn->peer_gr_able = 1;
|
||||||
|
conn->peer_gr_aflags = opt[2+i+3];
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case 65: /* AS4 capability, RFC 4893 */
|
case 65: /* AS4 capability, RFC 4893 */
|
||||||
if (cl != 4)
|
if (cl != 4)
|
||||||
goto err;
|
goto err;
|
||||||
|
@ -704,7 +789,7 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
|
||||||
err:
|
err:
|
||||||
bgp_error(conn, 2, 0, NULL, 0);
|
bgp_error(conn, 2, 0, NULL, 0);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -807,12 +892,17 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
|
||||||
other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
|
other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
|
||||||
switch (other->state)
|
switch (other->state)
|
||||||
{
|
{
|
||||||
case BS_IDLE:
|
|
||||||
case BS_CONNECT:
|
case BS_CONNECT:
|
||||||
case BS_ACTIVE:
|
case BS_ACTIVE:
|
||||||
|
/* Stop outgoing connection attempts */
|
||||||
|
bgp_conn_enter_idle_state(other);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case BS_IDLE:
|
||||||
case BS_OPENSENT:
|
case BS_OPENSENT:
|
||||||
case BS_CLOSE:
|
case BS_CLOSE:
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case BS_OPENCONFIRM:
|
case BS_OPENCONFIRM:
|
||||||
if ((p->local_id < id) == (conn == &p->incoming_conn))
|
if ((p->local_id < id) == (conn == &p->incoming_conn))
|
||||||
{
|
{
|
||||||
|
@ -838,6 +928,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
|
||||||
p->as4_session = p->cf->enable_as4 && conn->peer_as4_support;
|
p->as4_session = p->cf->enable_as4 && conn->peer_as4_support;
|
||||||
p->add_path_rx = (p->cf->add_path & ADD_PATH_RX) && (conn->peer_add_path & ADD_PATH_TX);
|
p->add_path_rx = (p->cf->add_path & ADD_PATH_RX) && (conn->peer_add_path & ADD_PATH_TX);
|
||||||
p->add_path_tx = (p->cf->add_path & ADD_PATH_TX) && (conn->peer_add_path & ADD_PATH_RX);
|
p->add_path_tx = (p->cf->add_path & ADD_PATH_TX) && (conn->peer_add_path & ADD_PATH_RX);
|
||||||
|
p->gr_ready = p->cf->gr_mode && conn->peer_gr_able;
|
||||||
|
|
||||||
if (p->add_path_tx)
|
if (p->add_path_tx)
|
||||||
p->p.accept_ra_types = RA_ANY;
|
p->p.accept_ra_types = RA_ANY;
|
||||||
|
@ -849,6 +940,20 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
|
||||||
bgp_conn_enter_openconfirm_state(conn);
|
bgp_conn_enter_openconfirm_state(conn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
bgp_rx_end_mark(struct bgp_proto *p)
|
||||||
|
{
|
||||||
|
BGP_TRACE(D_PACKETS, "Got End-of-RIB");
|
||||||
|
|
||||||
|
if (p->p.gr_recovery)
|
||||||
|
proto_graceful_restart_unlock(&p->p);
|
||||||
|
|
||||||
|
if (p->gr_active)
|
||||||
|
bgp_graceful_restart_done(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#define DECODE_PREFIX(pp, ll) do { \
|
#define DECODE_PREFIX(pp, ll) do { \
|
||||||
if (p->add_path_rx) \
|
if (p->add_path_rx) \
|
||||||
{ \
|
{ \
|
||||||
|
@ -983,6 +1088,13 @@ bgp_do_rx_update(struct bgp_conn *conn,
|
||||||
u32 path_id = 0;
|
u32 path_id = 0;
|
||||||
u32 last_id = 0;
|
u32 last_id = 0;
|
||||||
|
|
||||||
|
/* Check for End-of-RIB marker */
|
||||||
|
if (!withdrawn_len && !attr_len && !nlri_len)
|
||||||
|
{
|
||||||
|
bgp_rx_end_mark(p);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/* Withdraw routes */
|
/* Withdraw routes */
|
||||||
while (withdrawn_len)
|
while (withdrawn_len)
|
||||||
{
|
{
|
||||||
|
@ -1088,6 +1200,14 @@ bgp_do_rx_update(struct bgp_conn *conn,
|
||||||
if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
|
if (conn->state != BS_ESTABLISHED) /* fatal error during decoding */
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
/* Check for End-of-RIB marker */
|
||||||
|
if ((attr_len < 8) && !withdrawn_len && !attr_len &&
|
||||||
|
(p->mp_unreach_len == 3) && (get_u16(p->mp_unreach_start) == BGP_AF_IPV6))
|
||||||
|
{
|
||||||
|
bgp_rx_end_mark(p);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
DO_NLRI(mp_unreach)
|
DO_NLRI(mp_unreach)
|
||||||
{
|
{
|
||||||
while (len)
|
while (len)
|
||||||
|
|
|
@ -17,7 +17,7 @@ CF_DEFINES
|
||||||
|
|
||||||
CF_DECLS
|
CF_DECLS
|
||||||
|
|
||||||
CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, KRT_SOURCE, KRT_METRIC)
|
CF_KEYWORDS(KERNEL, PERSIST, SCAN, TIME, LEARN, DEVICE, ROUTES, GRACEFUL, RESTART, KRT_SOURCE, KRT_METRIC)
|
||||||
|
|
||||||
CF_GRAMMAR
|
CF_GRAMMAR
|
||||||
|
|
||||||
|
@ -46,6 +46,7 @@ kern_item:
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
| DEVICE ROUTES bool { THIS_KRT->devroutes = $3; }
|
| DEVICE ROUTES bool { THIS_KRT->devroutes = $3; }
|
||||||
|
| GRACEFUL RESTART bool { THIS_KRT->graceful_restart = $3; }
|
||||||
;
|
;
|
||||||
|
|
||||||
/* Kernel interface protocol */
|
/* Kernel interface protocol */
|
||||||
|
|
|
@ -653,6 +653,13 @@ krt_got_route(struct krt_proto *p, rte *e)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!p->ready)
|
||||||
|
{
|
||||||
|
/* We wait for the initial feed to have correct KRF_INSTALLED flag */
|
||||||
|
verdict = KRF_IGNORE;
|
||||||
|
goto sentenced;
|
||||||
|
}
|
||||||
|
|
||||||
old = net->routes;
|
old = net->routes;
|
||||||
if ((net->n.flags & KRF_INSTALLED) && rte_is_valid(old))
|
if ((net->n.flags & KRF_INSTALLED) && rte_is_valid(old))
|
||||||
{
|
{
|
||||||
|
@ -779,7 +786,9 @@ krt_prune(struct krt_proto *p)
|
||||||
if (KRT_CF->learn)
|
if (KRT_CF->learn)
|
||||||
krt_learn_prune(p);
|
krt_learn_prune(p);
|
||||||
#endif
|
#endif
|
||||||
p->initialized = 1;
|
|
||||||
|
if (p->ready)
|
||||||
|
p->initialized = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -852,7 +861,7 @@ krt_scan_timer_start(struct krt_proto *p)
|
||||||
|
|
||||||
krt_scan_count++;
|
krt_scan_count++;
|
||||||
|
|
||||||
tm_start(krt_scan_timer, 0);
|
tm_start(krt_scan_timer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -867,6 +876,12 @@ krt_scan_timer_stop(struct krt_proto *p)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
krt_scan_timer_kick(struct krt_proto *p UNUSED)
|
||||||
|
{
|
||||||
|
tm_start(krt_scan_timer, 0);
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -885,7 +900,7 @@ static void
|
||||||
krt_scan_timer_start(struct krt_proto *p)
|
krt_scan_timer_start(struct krt_proto *p)
|
||||||
{
|
{
|
||||||
p->scan_timer = tm_new_set(p->p.pool, krt_scan, p, 0, KRT_CF->scan_time);
|
p->scan_timer = tm_new_set(p->p.pool, krt_scan, p, 0, KRT_CF->scan_time);
|
||||||
tm_start(p->scan_timer, 0);
|
tm_start(p->scan_timer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -894,6 +909,12 @@ krt_scan_timer_stop(struct krt_proto *p)
|
||||||
tm_stop(p->scan_timer);
|
tm_stop(p->scan_timer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
krt_scan_timer_kick(struct krt_proto *p UNUSED)
|
||||||
|
{
|
||||||
|
tm_start(p->scan_timer, 0);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
@ -970,6 +991,16 @@ krt_notify(struct proto *P, struct rtable *table UNUSED, net *net,
|
||||||
krt_replace_rte(p, net, new, old, eattrs);
|
krt_replace_rte(p, net, new, old, eattrs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
krt_feed_done(struct proto *P)
|
||||||
|
{
|
||||||
|
struct krt_proto *p = (struct krt_proto *) P;
|
||||||
|
|
||||||
|
p->ready = 1;
|
||||||
|
krt_scan_timer_kick(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
krt_rte_same(rte *a, rte *b)
|
krt_rte_same(rte *a, rte *b)
|
||||||
{
|
{
|
||||||
|
@ -992,6 +1023,7 @@ krt_init(struct proto_config *c)
|
||||||
p->p.accept_ra_types = RA_OPTIMAL;
|
p->p.accept_ra_types = RA_OPTIMAL;
|
||||||
p->p.import_control = krt_import_control;
|
p->p.import_control = krt_import_control;
|
||||||
p->p.rt_notify = krt_notify;
|
p->p.rt_notify = krt_notify;
|
||||||
|
p->p.feed_done = krt_feed_done;
|
||||||
p->p.make_tmp_attrs = krt_make_tmp_attrs;
|
p->p.make_tmp_attrs = krt_make_tmp_attrs;
|
||||||
p->p.store_tmp_attrs = krt_store_tmp_attrs;
|
p->p.store_tmp_attrs = krt_store_tmp_attrs;
|
||||||
p->p.rte_same = krt_rte_same;
|
p->p.rte_same = krt_rte_same;
|
||||||
|
@ -1015,6 +1047,9 @@ krt_start(struct proto *P)
|
||||||
|
|
||||||
krt_scan_timer_start(p);
|
krt_scan_timer_start(p);
|
||||||
|
|
||||||
|
if (P->gr_recovery && KRT_CF->graceful_restart)
|
||||||
|
P->gr_wait = 1;
|
||||||
|
|
||||||
return PS_UP;
|
return PS_UP;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1029,6 +1064,9 @@ krt_shutdown(struct proto *P)
|
||||||
if (p->initialized && !KRT_CF->persist)
|
if (p->initialized && !KRT_CF->persist)
|
||||||
krt_flush_routes(p);
|
krt_flush_routes(p);
|
||||||
|
|
||||||
|
p->ready = 0;
|
||||||
|
p->initialized = 0;
|
||||||
|
|
||||||
krt_sys_shutdown(p);
|
krt_sys_shutdown(p);
|
||||||
|
|
||||||
rem_node(&p->krt_node);
|
rem_node(&p->krt_node);
|
||||||
|
@ -1045,7 +1083,7 @@ krt_reconfigure(struct proto *p, struct proto_config *new)
|
||||||
if (!krt_sys_reconfigure((struct krt_proto *) p, n, o))
|
if (!krt_sys_reconfigure((struct krt_proto *) p, n, o))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/* persist needn't be the same */
|
/* persist, graceful restart need not be the same */
|
||||||
return o->scan_time == n->scan_time && o->learn == n->learn && o->devroutes == n->devroutes;
|
return o->scan_time == n->scan_time && o->learn == n->learn && o->devroutes == n->devroutes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -48,6 +48,7 @@ struct krt_config {
|
||||||
int scan_time; /* How often we re-scan routes */
|
int scan_time; /* How often we re-scan routes */
|
||||||
int learn; /* Learn routes from other sources */
|
int learn; /* Learn routes from other sources */
|
||||||
int devroutes; /* Allow export of device routes */
|
int devroutes; /* Allow export of device routes */
|
||||||
|
int graceful_restart; /* Regard graceful restart recovery */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct krt_proto {
|
struct krt_proto {
|
||||||
|
@ -63,7 +64,8 @@ struct krt_proto {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
node krt_node; /* Node in krt_proto_list */
|
node krt_node; /* Node in krt_proto_list */
|
||||||
int initialized; /* First scan has already been finished */
|
byte ready; /* Initial feed has been finished */
|
||||||
|
byte initialized; /* First scan has been finished */
|
||||||
};
|
};
|
||||||
|
|
||||||
extern pool *krt_pool;
|
extern pool *krt_pool;
|
||||||
|
|
|
@ -602,7 +602,7 @@ signal_init(void)
|
||||||
* Parsing of command-line arguments
|
* Parsing of command-line arguments
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static char *opt_list = "c:dD:ps:P:u:g:f";
|
static char *opt_list = "c:dD:ps:P:u:g:fR";
|
||||||
static int parse_and_exit;
|
static int parse_and_exit;
|
||||||
char *bird_name;
|
char *bird_name;
|
||||||
static char *use_user;
|
static char *use_user;
|
||||||
|
@ -612,7 +612,7 @@ static int run_in_foreground = 0;
|
||||||
static void
|
static void
|
||||||
usage(void)
|
usage(void)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "Usage: %s [-c <config-file>] [-d] [-D <debug-file>] [-p] [-s <control-socket>] [-P <pid-file>] [-u <user>] [-g <group>] [-f]\n", bird_name);
|
fprintf(stderr, "Usage: %s [-c <config-file>] [-d] [-D <debug-file>] [-p] [-s <control-socket>] [-P <pid-file>] [-u <user>] [-g <group>] [-f] [-R]\n", bird_name);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -723,6 +723,9 @@ parse_args(int argc, char **argv)
|
||||||
case 'f':
|
case 'f':
|
||||||
run_in_foreground = 1;
|
run_in_foreground = 1;
|
||||||
break;
|
break;
|
||||||
|
case 'R':
|
||||||
|
graceful_restart_recovery();
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
usage();
|
usage();
|
||||||
}
|
}
|
||||||
|
@ -805,6 +808,8 @@ main(int argc, char **argv)
|
||||||
|
|
||||||
config_commit(conf, RECONFIG_HARD, 0);
|
config_commit(conf, RECONFIG_HARD, 0);
|
||||||
|
|
||||||
|
graceful_restart_init();
|
||||||
|
|
||||||
#ifdef LOCAL_DEBUG
|
#ifdef LOCAL_DEBUG
|
||||||
async_dump_flag = 1;
|
async_dump_flag = 1;
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in a new issue