Core multipath support.
This commit is contained in:
parent
01427d3f2b
commit
7e95c05d88
5 changed files with 140 additions and 55 deletions
|
@ -51,7 +51,7 @@ CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIREC
|
|||
RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE)
|
||||
CF_ENUM(T_ENUM_SCOPE, SCOPE_, HOST, LINK, SITE, ORGANIZATION, UNIVERSE)
|
||||
CF_ENUM(T_ENUM_RTC, RTC_, UNICAST, BROADCAST, MULTICAST, ANYCAST)
|
||||
CF_ENUM(T_ENUM_RTD, RTD_, ROUTER, DEVICE, BLACKHOLE, UNREACHABLE, PROHIBIT)
|
||||
CF_ENUM(T_ENUM_RTD, RTD_, ROUTER, DEVICE, BLACKHOLE, UNREACHABLE, PROHIBIT, MULTIPATH)
|
||||
|
||||
%type <i32> idval
|
||||
%type <f> imexport
|
||||
|
|
32
nest/route.h
32
nest/route.h
|
@ -170,7 +170,7 @@ struct hostentry {
|
|||
struct hostentry *next; /* Next in hash chain */
|
||||
unsigned hash_key; /* Hash key */
|
||||
unsigned uc; /* Use count */
|
||||
struct iface *iface; /* Chosen outgoing interface */
|
||||
struct rta *src; /* Source rta entry */
|
||||
ip_addr gw; /* Chosen next hop */
|
||||
byte dest; /* Chosen route destination type (RTD_...) */
|
||||
u32 igp_metric; /* Chosen route IGP metric */
|
||||
|
@ -266,6 +266,14 @@ void rt_show(struct rt_show_data *);
|
|||
* construction of BGP route attribute lists.
|
||||
*/
|
||||
|
||||
/* Multipath next-hop */
|
||||
struct mpnh {
|
||||
ip_addr gw; /* Next hop */
|
||||
struct iface *iface; /* Outgoing interface */
|
||||
struct mpnh *next;
|
||||
unsigned char weight;
|
||||
};
|
||||
|
||||
typedef struct rta {
|
||||
struct rta *next, **pprev; /* Hash chain */
|
||||
struct proto *proto; /* Protocol instance that originally created the route */
|
||||
|
@ -282,6 +290,7 @@ typedef struct rta {
|
|||
ip_addr from; /* Advertising router */
|
||||
struct hostentry *hostentry; /* Hostentry for recursive next-hops */
|
||||
struct iface *iface; /* Outgoing interface */
|
||||
struct mpnh *nexthops; /* Next-hops for multipath routes */
|
||||
struct ea_list *eattrs; /* Extended Attribute chain */
|
||||
} rta;
|
||||
|
||||
|
@ -309,7 +318,8 @@ typedef struct rta {
|
|||
#define RTD_BLACKHOLE 2 /* Silently drop packets */
|
||||
#define RTD_UNREACHABLE 3 /* Reject as unreachable */
|
||||
#define RTD_PROHIBIT 4 /* Administratively prohibited */
|
||||
#define RTD_NONE 5 /* Invalid RTD */
|
||||
#define RTD_MULTIPATH 5 /* Multipath route (nexthops != NULL) */
|
||||
#define RTD_NONE 6 /* Invalid RTD */
|
||||
|
||||
#define RTAF_CACHED 1 /* This is a cached rta */
|
||||
|
||||
|
@ -387,6 +397,10 @@ void ea_format(eattr *e, byte *buf);
|
|||
#define EA_FORMAT_BUF_SIZE 256
|
||||
ea_list *ea_append(ea_list *to, ea_list *what);
|
||||
|
||||
int mpnh__same(struct mpnh *x, struct mpnh *y); /* Compare multipath nexthops */
|
||||
static inline int mpnh_same(struct mpnh *x, struct mpnh *y)
|
||||
{ return (x == y) || mpnh__same(x, y); }
|
||||
|
||||
void rta_init(void);
|
||||
rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */
|
||||
static inline rta *rta_clone(rta *r) { r->uc++; return r; }
|
||||
|
@ -403,12 +417,14 @@ void rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw, i
|
|||
* count. Cached rta locks its hostentry (increases its use count),
|
||||
* uncached rta does not lock it. Hostentry with zero use count is
|
||||
* removed asynchronously during host cache update, therefore it is
|
||||
* safe to hold such hostentry temorarily. There is no need to hold
|
||||
* a lock for hostentry->dep table, because that table contains routes
|
||||
* responsible for that hostentry, and therefore is non-empty if given
|
||||
* hostentry has non-zero use count. The protocol responsible for routes
|
||||
* with recursive next hops should also hold a lock for a table governing
|
||||
* that routes (argument tab to rta_set_recursive_next_hop()).
|
||||
* safe to hold such hostentry temorarily. Hostentry holds a lock for
|
||||
* a 'source' rta, mainly to share multipath nexthops. There is no
|
||||
* need to hold a lock for hostentry->dep table, because that table
|
||||
* contains routes responsible for that hostentry, and therefore is
|
||||
* non-empty if given hostentry has non-zero use count. The protocol
|
||||
* responsible for routes with recursive next hops should also hold a
|
||||
* lock for a table governing that routes (argument tab to
|
||||
* rta_set_recursive_next_hop()).
|
||||
*/
|
||||
|
||||
static inline void rt_lock_hostentry(struct hostentry *he) { if (he) he->uc++; }
|
||||
|
|
|
@ -57,9 +57,65 @@
|
|||
pool *rta_pool;
|
||||
|
||||
static slab *rta_slab;
|
||||
static slab *mpnh_slab;
|
||||
|
||||
struct protocol *attr_class_to_protocol[EAP_MAX];
|
||||
|
||||
static inline unsigned int
|
||||
mpnh_hash(struct mpnh *x)
|
||||
{
|
||||
unsigned int h = 0;
|
||||
for (; x; x = x->next)
|
||||
h ^= ipa_hash(x->gw);
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
int
|
||||
mpnh__same(struct mpnh *x, struct mpnh *y)
|
||||
{
|
||||
for (; x && y; x = x->next, y = y->next)
|
||||
if (!ipa_equal(x->gw, y->gw) || (x->iface != y->iface) || (x->weight != y->weight))
|
||||
return 0;
|
||||
|
||||
return x == y;
|
||||
}
|
||||
|
||||
static struct mpnh *
|
||||
mpnh_copy(struct mpnh *o)
|
||||
{
|
||||
struct mpnh *first = NULL;
|
||||
struct mpnh **last = &first;
|
||||
|
||||
for (; o; o = o->next)
|
||||
{
|
||||
struct mpnh *n = sl_alloc(mpnh_slab);
|
||||
n->gw = o->gw;
|
||||
n->iface = o->iface;
|
||||
n->next = NULL;
|
||||
n->weight = o->weight;
|
||||
|
||||
*last = n;
|
||||
last = &(n->next);
|
||||
}
|
||||
|
||||
return first;
|
||||
}
|
||||
|
||||
static void
|
||||
mpnh_free(struct mpnh *o)
|
||||
{
|
||||
struct mpnh *n;
|
||||
|
||||
while (o)
|
||||
{
|
||||
n = o->next;
|
||||
sl_free(mpnh_slab, o);
|
||||
o = n;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Extended Attributes
|
||||
*/
|
||||
|
@ -587,7 +643,8 @@ rta_alloc_hash(void)
|
|||
static inline unsigned int
|
||||
rta_hash(rta *a)
|
||||
{
|
||||
return (a->proto->hash_key ^ ipa_hash(a->gw) ^ ea_hash(a->eattrs)) & 0xffff;
|
||||
return (a->proto->hash_key ^ ipa_hash(a->gw) ^
|
||||
mpnh_hash(a->nexthops) ^ ea_hash(a->eattrs)) & 0xffff;
|
||||
}
|
||||
|
||||
static inline int
|
||||
|
@ -604,6 +661,7 @@ rta_same(rta *x, rta *y)
|
|||
ipa_equal(x->from, y->from) &&
|
||||
x->iface == y->iface &&
|
||||
x->hostentry == y->hostentry &&
|
||||
mpnh_same(x->nexthops, y->nexthops) &&
|
||||
ea_same(x->eattrs, y->eattrs));
|
||||
}
|
||||
|
||||
|
@ -614,6 +672,7 @@ rta_copy(rta *o)
|
|||
|
||||
memcpy(r, o, sizeof(rta));
|
||||
r->uc = 1;
|
||||
r->nexthops = mpnh_copy(o->nexthops);
|
||||
r->eattrs = ea_list_copy(o->eattrs);
|
||||
return r;
|
||||
}
|
||||
|
@ -707,6 +766,7 @@ rta__free(rta *a)
|
|||
a->next->pprev = a->pprev;
|
||||
a->aflags = 0; /* Poison the entry */
|
||||
rt_unlock_hostentry(a->hostentry);
|
||||
mpnh_free(a->nexthops);
|
||||
ea_free(a->eattrs);
|
||||
sl_free(rta_slab, a);
|
||||
}
|
||||
|
@ -798,6 +858,7 @@ rta_init(void)
|
|||
{
|
||||
rta_pool = rp_new(&root_pool, "Attributes");
|
||||
rta_slab = sl_new(rta_pool, sizeof(rta));
|
||||
mpnh_slab = sl_new(rta_pool, sizeof(struct mpnh));
|
||||
rta_alloc_hash();
|
||||
}
|
||||
|
||||
|
|
|
@ -962,29 +962,31 @@ rt_preconfig(struct config *c)
|
|||
* triggered by rt_schedule_nhu().
|
||||
*/
|
||||
|
||||
static inline int
|
||||
hostentry_diff(struct hostentry *he, struct iface *iface, ip_addr gw,
|
||||
byte dest, u32 igp_metric)
|
||||
{
|
||||
return (he->iface != iface) || !ipa_equal(he->gw, gw) ||
|
||||
(he->dest != dest) || (he->igp_metric != igp_metric);
|
||||
}
|
||||
|
||||
static inline int
|
||||
rta_next_hop_outdated(rta *a)
|
||||
{
|
||||
struct hostentry *he = a->hostentry;
|
||||
return he && hostentry_diff(he, a->iface, a->gw, a->dest, a->igp_metric);
|
||||
|
||||
if (!he)
|
||||
return 0;
|
||||
|
||||
if (!he->src)
|
||||
return a->dest != RTD_UNREACHABLE;
|
||||
|
||||
return (a->iface != he->src->iface) || !ipa_equal(a->gw, he->gw) ||
|
||||
(a->dest != he->dest) || (a->igp_metric != he->igp_metric) ||
|
||||
!mpnh_same(a->nexthops, he->src->nexthops);
|
||||
}
|
||||
|
||||
static inline void
|
||||
rta_apply_hostentry(rta *a, struct hostentry *he)
|
||||
{
|
||||
a->hostentry = he;
|
||||
a->iface = he->iface;
|
||||
a->iface = he->src ? he->src->iface : NULL;
|
||||
a->gw = he->gw;
|
||||
a->dest = he->dest;
|
||||
a->igp_metric = he->igp_metric;
|
||||
a->nexthops = he->src ? he->src->nexthops : NULL;
|
||||
}
|
||||
|
||||
static inline rte *
|
||||
|
@ -1388,6 +1390,7 @@ hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsig
|
|||
he->tab = dep;
|
||||
he->hash_key = k;
|
||||
he->uc = 0;
|
||||
he->src = NULL;
|
||||
|
||||
add_tail(&hc->hostentries, &he->ln);
|
||||
hc_insert(hc, he);
|
||||
|
@ -1402,6 +1405,8 @@ hc_new_hostentry(struct hostcache *hc, ip_addr a, ip_addr ll, rtable *dep, unsig
|
|||
static void
|
||||
hc_delete_hostentry(struct hostcache *hc, struct hostentry *he)
|
||||
{
|
||||
rta_free(he->src);
|
||||
|
||||
rem_node(&he->ln);
|
||||
hc_remove(hc, he);
|
||||
sl_free(hc->slab, he);
|
||||
|
@ -1436,6 +1441,8 @@ rt_free_hostcache(rtable *tab)
|
|||
WALK_LIST(n, hc->hostentries)
|
||||
{
|
||||
struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
|
||||
rta_free(he->src);
|
||||
|
||||
if (he->uc)
|
||||
log(L_ERR "Hostcache is not empty in table %s", tab->name);
|
||||
}
|
||||
|
@ -1488,7 +1495,7 @@ rt_get_igp_metric(rte *rt)
|
|||
return rt->u.rip.metric;
|
||||
|
||||
/* Device routes */
|
||||
if (a->dest != RTD_ROUTER)
|
||||
if ((a->dest != RTD_ROUTER) && (a->dest != RTD_MULTIPATH))
|
||||
return 0;
|
||||
|
||||
return IGP_METRIC_UNKNOWN;
|
||||
|
@ -1497,12 +1504,15 @@ rt_get_igp_metric(rte *rt)
|
|||
static int
|
||||
rt_update_hostentry(rtable *tab, struct hostentry *he)
|
||||
{
|
||||
struct iface *old_iface = he->iface;
|
||||
ip_addr old_gw = he->gw;
|
||||
byte old_dest = he->dest;
|
||||
u32 old_metric = he->igp_metric;
|
||||
rta *old_src = he->src;
|
||||
int pxlen = 0;
|
||||
|
||||
/* Reset the hostentry */
|
||||
he->src = NULL;
|
||||
he->gw = IPA_NONE;
|
||||
he->dest = RTD_UNREACHABLE;
|
||||
he->igp_metric = 0;
|
||||
|
||||
net *n = net_route(tab, he->addr, MAX_PREFIX_LENGTH);
|
||||
if (n)
|
||||
{
|
||||
|
@ -1513,53 +1523,41 @@ rt_update_hostentry(rtable *tab, struct hostentry *he)
|
|||
{
|
||||
/* Recursive route should not depend on another recursive route */
|
||||
log(L_WARN "Next hop address %I resolvable through recursive route for %I/%d",
|
||||
he->addr, n->n.prefix, n->n.pxlen);
|
||||
he->iface = NULL;
|
||||
he->gw = IPA_NONE;
|
||||
he->dest = RTD_UNREACHABLE;
|
||||
he->addr, n->n.prefix, pxlen);
|
||||
goto done;
|
||||
}
|
||||
else if (a->dest == RTD_DEVICE)
|
||||
|
||||
if (a->dest == RTD_DEVICE)
|
||||
{
|
||||
if (if_local_addr(he->addr, a->iface))
|
||||
{
|
||||
/* The host address is a local address, this is not valid */
|
||||
log(L_WARN "Next hop address %I is a local address of iface %s",
|
||||
he->addr, a->iface->name);
|
||||
he->iface = NULL;
|
||||
he->gw = IPA_NONE;
|
||||
he->dest = RTD_UNREACHABLE;
|
||||
goto done;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
/* The host is directly reachable, use link as a gateway */
|
||||
he->iface = a->iface;
|
||||
he->gw = he->link;
|
||||
he->dest = RTD_ROUTER;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* The host is reachable through some route entry */
|
||||
he->iface = a->iface;
|
||||
he->gw = a->gw;
|
||||
he->dest = a->dest;
|
||||
}
|
||||
|
||||
he->igp_metric = he->iface ? rt_get_igp_metric(n->routes) : 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* The host is unreachable */
|
||||
he->iface = NULL;
|
||||
he->gw = IPA_NONE;
|
||||
he->dest = RTD_UNREACHABLE;
|
||||
he->igp_metric = 0;
|
||||
he->src = rta_clone(a);
|
||||
he->igp_metric = rt_get_igp_metric(n->routes);
|
||||
}
|
||||
|
||||
done:
|
||||
/* Add a prefix range to the trie */
|
||||
trie_add_prefix(tab->hostcache->trie, he->addr, MAX_PREFIX_LENGTH, pxlen, MAX_PREFIX_LENGTH);
|
||||
|
||||
return hostentry_diff(he, old_iface, old_gw, old_dest, old_metric);
|
||||
rta_free(old_src);
|
||||
return old_src != he->src;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -1630,6 +1628,7 @@ rt_format_via(rte *e, byte *via)
|
|||
case RTD_BLACKHOLE: bsprintf(via, "blackhole"); break;
|
||||
case RTD_UNREACHABLE: bsprintf(via, "unreachable"); break;
|
||||
case RTD_PROHIBIT: bsprintf(via, "prohibited"); break;
|
||||
case RTD_MULTIPATH: bsprintf(via, "multipath"); break;
|
||||
default: bsprintf(via, "???");
|
||||
}
|
||||
}
|
||||
|
@ -1641,6 +1640,7 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tm
|
|||
byte tm[TM_DATETIME_BUFFER_SIZE], info[256];
|
||||
rta *a = e->attrs;
|
||||
int primary = (e->net->routes == e);
|
||||
struct mpnh *nh;
|
||||
|
||||
rt_format_via(e, via);
|
||||
tm_format_datetime(tm, &config->tf_route, e->lastmod);
|
||||
|
@ -1663,6 +1663,8 @@ rt_show_rte(struct cli *c, byte *ia, rte *e, struct rt_show_data *d, ea_list *tm
|
|||
bsprintf(info, " (%d)", e->pref);
|
||||
cli_printf(c, -1007, "%-18s %s [%s %s%s]%s%s", ia, via, a->proto->name,
|
||||
tm, from, primary ? " *" : "", info);
|
||||
for (nh = a->nexthops; nh; nh = nh->next)
|
||||
cli_printf(c, -1007, "\tvia %I on %s weight %d", nh->gw, nh->iface->name, nh->weight + 1);
|
||||
if (d->verbose)
|
||||
rta_show(c, a, tmpa);
|
||||
}
|
||||
|
|
|
@ -1015,6 +1015,13 @@ bgp_get_neighbor(rte *r)
|
|||
return ((struct bgp_proto *) r->attrs->proto)->remote_as;
|
||||
}
|
||||
|
||||
static inline int
|
||||
rte_resolvable(rte *rt)
|
||||
{
|
||||
int rd = rt->attrs->dest;
|
||||
return (rd == RTD_ROUTER) || (rd == RTD_DEVICE) || (rd == RTD_MULTIPATH);
|
||||
}
|
||||
|
||||
int
|
||||
bgp_rte_better(rte *new, rte *old)
|
||||
{
|
||||
|
@ -1024,9 +1031,8 @@ bgp_rte_better(rte *new, rte *old)
|
|||
u32 n, o;
|
||||
|
||||
/* RFC 4271 9.1.2.1. Route resolvability test */
|
||||
/* non-NULL iface means it is either RTD_ROUTER or RTD_DEVICE route */
|
||||
n = new->attrs->iface != NULL;
|
||||
o = old->attrs->iface != NULL;
|
||||
n = rte_resolvable(new);
|
||||
o = rte_resolvable(old);
|
||||
if (n > o)
|
||||
return 1;
|
||||
if (n < o)
|
||||
|
@ -1502,7 +1508,7 @@ bgp_get_route_info(rte *e, byte *buf, ea_list *attrs)
|
|||
buf += bsprintf(buf, " (%d", e->pref);
|
||||
if (e->attrs->hostentry)
|
||||
{
|
||||
if (!e->attrs->iface)
|
||||
if (!rte_resolvable(e))
|
||||
buf += bsprintf(buf, "/-");
|
||||
else if (e->attrs->igp_metric >= IGP_METRIC_UNKNOWN)
|
||||
buf += bsprintf(buf, "/?");
|
||||
|
|
Loading…
Reference in a new issue