KRT: Support for IPv6 ECMP
Linux represents IPv6 ECMP routes as a sequence of unipath routes with the same prefix. We have to translate between our representation (one route with multipath next hop) and the Linux representation in both directions. Proper learning of alien IPv6 ECMP routes still not supported. Thanks to Mikhail Sennikovskii for the original patch.
This commit is contained in:
parent
84cac51a51
commit
2feaa6931b
1 changed files with 277 additions and 77 deletions
|
@ -19,7 +19,6 @@
|
||||||
#include "nest/route.h"
|
#include "nest/route.h"
|
||||||
#include "nest/protocol.h"
|
#include "nest/protocol.h"
|
||||||
#include "nest/iface.h"
|
#include "nest/iface.h"
|
||||||
#include "lib/alloca.h"
|
|
||||||
#include "lib/timer.h"
|
#include "lib/timer.h"
|
||||||
#include "lib/unix.h"
|
#include "lib/unix.h"
|
||||||
#include "lib/krt.h"
|
#include "lib/krt.h"
|
||||||
|
@ -51,6 +50,49 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef IPV6
|
||||||
|
#define krt_ecmp6(X) 1
|
||||||
|
#else
|
||||||
|
#define krt_ecmp6(X) 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Structure nl_parse_state keeps state of received route processing. Ideally,
|
||||||
|
* we could just independently parse received Netlink messages and immediately
|
||||||
|
* propagate received routes to the rest of BIRD, but Linux kernel represents
|
||||||
|
* and announces IPv6 ECMP routes not as one route with multiple next hops (like
|
||||||
|
* RTA_MULTIPATH in IPv4 ECMP), but as a set of routes with the same prefix.
|
||||||
|
*
|
||||||
|
* Therefore, BIRD keeps currently processed route in nl_parse_state structure
|
||||||
|
* and postpones its propagation until we expect it to be final; i.e., when
|
||||||
|
* non-matching route is received or when the scan ends. When another matching
|
||||||
|
* route is received, it is merged with the already processed route to form an
|
||||||
|
* ECMP route. Note that merging is done only for IPv6 (merge == 1), but the
|
||||||
|
* postponing is done in both cases (for simplicity). All IPv4 routes are just
|
||||||
|
* considered non-matching.
|
||||||
|
*
|
||||||
|
* This is ignored for asynchronous notifications (every notification is handled
|
||||||
|
* as a separate route). It is not an issue for our routes, as we ignore such
|
||||||
|
* notifications anyways. But importing alien IPv6 ECMP routes does not work
|
||||||
|
* properly.
|
||||||
|
*/
|
||||||
|
|
||||||
|
struct nl_parse_state
|
||||||
|
{
|
||||||
|
struct linpool *pool;
|
||||||
|
int scan;
|
||||||
|
int merge;
|
||||||
|
|
||||||
|
net *net;
|
||||||
|
rta *attrs;
|
||||||
|
struct krt_proto *proto;
|
||||||
|
s8 new;
|
||||||
|
s8 krt_src;
|
||||||
|
u8 krt_type;
|
||||||
|
u8 krt_proto;
|
||||||
|
u32 krt_metric;
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Synchronous Netlink interface
|
* Synchronous Netlink interface
|
||||||
*/
|
*/
|
||||||
|
@ -66,6 +108,13 @@ struct nl_sock
|
||||||
|
|
||||||
#define NL_RX_SIZE 8192
|
#define NL_RX_SIZE 8192
|
||||||
|
|
||||||
|
#define NL_OP_DELETE 0
|
||||||
|
#define NL_OP_ADD (NLM_F_CREATE|NLM_F_EXCL)
|
||||||
|
#define NL_OP_REPLACE (NLM_F_CREATE|NLM_F_REPLACE)
|
||||||
|
#define NL_OP_APPEND (NLM_F_CREATE|NLM_F_APPEND)
|
||||||
|
|
||||||
|
static linpool *nl_linpool;
|
||||||
|
|
||||||
static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */
|
static struct nl_sock nl_scan = {.fd = -1}; /* Netlink socket for synchronous scan */
|
||||||
static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */
|
static struct nl_sock nl_req = {.fd = -1}; /* Netlink socket for requests */
|
||||||
|
|
||||||
|
@ -169,7 +218,7 @@ nl_get_reply(struct nl_sock *nl)
|
||||||
static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
|
static struct tbf rl_netlink_err = TBF_DEFAULT_LOG_LIMITS;
|
||||||
|
|
||||||
static int
|
static int
|
||||||
nl_error(struct nlmsghdr *h)
|
nl_error(struct nlmsghdr *h, int ignore_esrch)
|
||||||
{
|
{
|
||||||
struct nlmsgerr *e;
|
struct nlmsgerr *e;
|
||||||
int ec;
|
int ec;
|
||||||
|
@ -181,7 +230,7 @@ nl_error(struct nlmsghdr *h)
|
||||||
}
|
}
|
||||||
e = (struct nlmsgerr *) NLMSG_DATA(h);
|
e = (struct nlmsgerr *) NLMSG_DATA(h);
|
||||||
ec = -e->error;
|
ec = -e->error;
|
||||||
if (ec)
|
if (ec && !(ignore_esrch && (ec == ESRCH)))
|
||||||
log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
|
log_rl(&rl_netlink_err, L_WARN "Netlink: %s", strerror(ec));
|
||||||
return ec;
|
return ec;
|
||||||
}
|
}
|
||||||
|
@ -195,14 +244,14 @@ nl_get_scan(void)
|
||||||
return NULL;
|
return NULL;
|
||||||
if (h->nlmsg_type == NLMSG_ERROR)
|
if (h->nlmsg_type == NLMSG_ERROR)
|
||||||
{
|
{
|
||||||
nl_error(h);
|
nl_error(h, 0);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
nl_exchange(struct nlmsghdr *pkt)
|
nl_exchange(struct nlmsghdr *pkt, int ignore_esrch)
|
||||||
{
|
{
|
||||||
struct nlmsghdr *h;
|
struct nlmsghdr *h;
|
||||||
|
|
||||||
|
@ -214,7 +263,7 @@ nl_exchange(struct nlmsghdr *pkt)
|
||||||
break;
|
break;
|
||||||
log(L_WARN "nl_exchange: Unexpected reply received");
|
log(L_WARN "nl_exchange: Unexpected reply received");
|
||||||
}
|
}
|
||||||
return nl_error(h) ? -1 : 0;
|
return nl_error(h, ignore_esrch) ? -1 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -826,7 +875,7 @@ nh_bufsize(struct mpnh *nh)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)
|
nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int op, int dest, ip_addr gw, struct iface *iface)
|
||||||
{
|
{
|
||||||
eattr *ea;
|
eattr *ea;
|
||||||
net *net = e->net;
|
net *net = e->net;
|
||||||
|
@ -837,13 +886,13 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)
|
||||||
char buf[128 + KRT_METRICS_MAX*8 + nh_bufsize(a->nexthops)];
|
char buf[128 + KRT_METRICS_MAX*8 + nh_bufsize(a->nexthops)];
|
||||||
} r;
|
} r;
|
||||||
|
|
||||||
DBG("nl_send_route(%I/%d,new=%d)\n", net->n.prefix, net->n.pxlen, new);
|
DBG("nl_send_route(%I/%d,op=%x)\n", net->n.prefix, net->n.pxlen, op);
|
||||||
|
|
||||||
bzero(&r.h, sizeof(r.h));
|
bzero(&r.h, sizeof(r.h));
|
||||||
bzero(&r.r, sizeof(r.r));
|
bzero(&r.r, sizeof(r.r));
|
||||||
r.h.nlmsg_type = new ? RTM_NEWROUTE : RTM_DELROUTE;
|
r.h.nlmsg_type = op ? RTM_NEWROUTE : RTM_DELROUTE;
|
||||||
r.h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
|
r.h.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
|
||||||
r.h.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | (new ? NLM_F_CREATE|NLM_F_EXCL : 0);
|
r.h.nlmsg_flags = op | NLM_F_REQUEST | NLM_F_ACK;
|
||||||
|
|
||||||
r.r.rtm_family = BIRD_AF;
|
r.r.rtm_family = BIRD_AF;
|
||||||
r.r.rtm_dst_len = net->n.pxlen;
|
r.r.rtm_dst_len = net->n.pxlen;
|
||||||
|
@ -851,15 +900,21 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)
|
||||||
r.r.rtm_scope = RT_SCOPE_UNIVERSE;
|
r.r.rtm_scope = RT_SCOPE_UNIVERSE;
|
||||||
nl_add_attr_ipa(&r.h, sizeof(r), RTA_DST, net->n.prefix);
|
nl_add_attr_ipa(&r.h, sizeof(r), RTA_DST, net->n.prefix);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Strange behavior for RTM_DELROUTE:
|
||||||
|
* 1) rtm_family is ignored in IPv6, works for IPv4
|
||||||
|
* 2) not setting RTA_PRIORITY is different from setting default value (on IPv6)
|
||||||
|
* 3) not setting RTA_PRIORITY is equivalent to setting 0, which is wildcard
|
||||||
|
*/
|
||||||
|
|
||||||
if (krt_table_id(p) < 256)
|
if (krt_table_id(p) < 256)
|
||||||
r.r.rtm_table = krt_table_id(p);
|
r.r.rtm_table = krt_table_id(p);
|
||||||
else
|
else
|
||||||
nl_add_attr_u32(&r.h, sizeof(r), RTA_TABLE, krt_table_id(p));
|
nl_add_attr_u32(&r.h, sizeof(r), RTA_TABLE, krt_table_id(p));
|
||||||
|
|
||||||
/* For route delete, we do not specify route attributes */
|
/* For route delete, we do not specify remaining route attributes */
|
||||||
if (!new)
|
if (op == NL_OP_DELETE)
|
||||||
return nl_exchange(&r.h);
|
goto dest;
|
||||||
|
|
||||||
|
|
||||||
if (ea = ea_find(eattrs, EA_KRT_METRIC))
|
if (ea = ea_find(eattrs, EA_KRT_METRIC))
|
||||||
nl_add_attr_u32(&r.h, sizeof(r), RTA_PRIORITY, ea->u.data);
|
nl_add_attr_u32(&r.h, sizeof(r), RTA_PRIORITY, ea->u.data);
|
||||||
|
@ -886,18 +941,18 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)
|
||||||
nl_add_metrics(&r.h, sizeof(r), metrics, KRT_METRICS_MAX);
|
nl_add_metrics(&r.h, sizeof(r), metrics, KRT_METRICS_MAX);
|
||||||
|
|
||||||
|
|
||||||
|
dest:
|
||||||
/* a->iface != NULL checked in krt_capable() for router and device routes */
|
/* a->iface != NULL checked in krt_capable() for router and device routes */
|
||||||
|
switch (dest)
|
||||||
switch (a->dest)
|
|
||||||
{
|
{
|
||||||
case RTD_ROUTER:
|
case RTD_ROUTER:
|
||||||
r.r.rtm_type = RTN_UNICAST;
|
r.r.rtm_type = RTN_UNICAST;
|
||||||
nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index);
|
nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, iface->index);
|
||||||
nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, a->gw);
|
nl_add_attr_ipa(&r.h, sizeof(r), RTA_GATEWAY, gw);
|
||||||
break;
|
break;
|
||||||
case RTD_DEVICE:
|
case RTD_DEVICE:
|
||||||
r.r.rtm_type = RTN_UNICAST;
|
r.r.rtm_type = RTN_UNICAST;
|
||||||
nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, a->iface->index);
|
nl_add_attr_u32(&r.h, sizeof(r), RTA_OIF, iface->index);
|
||||||
break;
|
break;
|
||||||
case RTD_BLACKHOLE:
|
case RTD_BLACKHOLE:
|
||||||
r.r.rtm_type = RTN_BLACKHOLE;
|
r.r.rtm_type = RTN_BLACKHOLE;
|
||||||
|
@ -912,11 +967,50 @@ nl_send_route(struct krt_proto *p, rte *e, struct ea_list *eattrs, int new)
|
||||||
r.r.rtm_type = RTN_UNICAST;
|
r.r.rtm_type = RTN_UNICAST;
|
||||||
nl_add_multipath(&r.h, sizeof(r), a->nexthops);
|
nl_add_multipath(&r.h, sizeof(r), a->nexthops);
|
||||||
break;
|
break;
|
||||||
|
case RTD_NONE:
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
bug("krt_capable inconsistent with nl_send_route");
|
bug("krt_capable inconsistent with nl_send_route");
|
||||||
}
|
}
|
||||||
|
|
||||||
return nl_exchange(&r.h);
|
/* Ignore missing for DELETE */
|
||||||
|
return nl_exchange(&r.h, (op == NL_OP_DELETE));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
nl_add_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
|
||||||
|
{
|
||||||
|
rta *a = e->attrs;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
if (krt_ecmp6(p) && (a->dest == RTD_MULTIPATH))
|
||||||
|
{
|
||||||
|
struct mpnh *nh = a->nexthops;
|
||||||
|
|
||||||
|
err = nl_send_route(p, e, eattrs, NL_OP_ADD, RTD_ROUTER, nh->gw, nh->iface);
|
||||||
|
if (err < 0)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
for (nh = nh->next; nh; nh = nh->next)
|
||||||
|
err += nl_send_route(p, e, eattrs, NL_OP_APPEND, RTD_ROUTER, nh->gw, nh->iface);
|
||||||
|
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
return nl_send_route(p, e, eattrs, NL_OP_ADD, a->dest, a->gw, a->iface);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
nl_delete_rte(struct krt_proto *p, rte *e, struct ea_list *eattrs)
|
||||||
|
{
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
/* For IPv6, we just repeatedly request DELETE until we get error */
|
||||||
|
do
|
||||||
|
err = nl_send_route(p, e, eattrs, NL_OP_DELETE, RTD_NONE, IPA_NONE, NULL);
|
||||||
|
while (krt_ecmp6(p) && !err);
|
||||||
|
|
||||||
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -925,17 +1019,21 @@ krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* NULL for eattr of the old route is a little hack, but we don't
|
* We could use NL_OP_REPLACE, but route replace on Linux has some problems:
|
||||||
* get proper eattrs for old in rt_notify() anyway. NULL means no
|
*
|
||||||
* extended route attributes and therefore matches if the kernel
|
* 1) Does not check for matching rtm_protocol
|
||||||
* route has any of them.
|
* 2) Has broken semantics for IPv6 ECMP
|
||||||
|
* 3) Crashes some kernel version when used for IPv6 ECMP
|
||||||
|
*
|
||||||
|
* So we use NL_OP_DELETE and then NL_OP_ADD. We also do not trust the old
|
||||||
|
* route value, so we do not try to optimize IPv6 ECMP reconfigurations.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (old)
|
if (old)
|
||||||
nl_send_route(p, old, NULL, 0);
|
nl_delete_rte(p, old, eattrs);
|
||||||
|
|
||||||
if (new)
|
if (new)
|
||||||
err = nl_send_route(p, new, eattrs, 1);
|
err = nl_add_rte(p, new, eattrs);
|
||||||
|
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
n->n.flags |= KRF_SYNC_ERROR;
|
n->n.flags |= KRF_SYNC_ERROR;
|
||||||
|
@ -944,10 +1042,80 @@ krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old, struct ea_list
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline struct mpnh *
|
||||||
|
nl_alloc_mpnh(struct nl_parse_state *s, ip_addr gw, struct iface *iface, byte weight)
|
||||||
|
{
|
||||||
|
struct mpnh *nh = lp_alloc(s->pool, sizeof(struct mpnh));
|
||||||
|
|
||||||
|
nh->gw = gw;
|
||||||
|
nh->iface = iface;
|
||||||
|
nh->next = NULL;
|
||||||
|
nh->weight = weight;
|
||||||
|
|
||||||
|
return nh;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
nl_mergable_route(struct nl_parse_state *s, net *net, struct krt_proto *p, uint priority, uint krt_type)
|
||||||
|
{
|
||||||
|
/* Route merging must be active */
|
||||||
|
if (!s->merge)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Saved and new route must have same network, proto/table, and priority */
|
||||||
|
if ((s->net != net) || (s->proto != p) || (s->krt_metric != priority))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Both must be regular unicast routes */
|
||||||
|
if ((s->krt_type != RTN_UNICAST) || (krt_type != RTN_UNICAST))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
nl_announce_route(struct nl_parse_state *s)
|
||||||
|
{
|
||||||
|
rte *e = rte_get_temp(s->attrs);
|
||||||
|
e->net = s->net;
|
||||||
|
e->u.krt.src = s->krt_src;
|
||||||
|
e->u.krt.proto = s->krt_proto;
|
||||||
|
e->u.krt.seen = 0;
|
||||||
|
e->u.krt.best = 0;
|
||||||
|
e->u.krt.metric = s->krt_metric;
|
||||||
|
|
||||||
|
if (s->scan)
|
||||||
|
krt_got_route(s->proto, e);
|
||||||
|
else
|
||||||
|
krt_got_route_async(s->proto, e, s->new);
|
||||||
|
|
||||||
|
s->net = NULL;
|
||||||
|
s->attrs = NULL;
|
||||||
|
s->proto = NULL;
|
||||||
|
lp_flush(s->pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
nl_parse_begin(struct nl_parse_state *s, int scan, int merge)
|
||||||
|
{
|
||||||
|
memset(s, 0, sizeof (struct nl_parse_state));
|
||||||
|
s->pool = nl_linpool;
|
||||||
|
s->scan = scan;
|
||||||
|
s->merge = merge;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
nl_parse_end(struct nl_parse_state *s)
|
||||||
|
{
|
||||||
|
if (s->net)
|
||||||
|
nl_announce_route(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
|
#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
|
||||||
|
|
||||||
static void
|
static void
|
||||||
nl_parse_route(struct nlmsghdr *h, int scan)
|
nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
|
||||||
{
|
{
|
||||||
struct krt_proto *p;
|
struct krt_proto *p;
|
||||||
struct rtmsg *i;
|
struct rtmsg *i;
|
||||||
|
@ -957,6 +1125,7 @@ nl_parse_route(struct nlmsghdr *h, int scan)
|
||||||
ip_addr dst = IPA_NONE;
|
ip_addr dst = IPA_NONE;
|
||||||
u32 oif = ~0;
|
u32 oif = ~0;
|
||||||
u32 table;
|
u32 table;
|
||||||
|
u32 priority = 0;
|
||||||
int src;
|
int src;
|
||||||
|
|
||||||
if (!(i = nl_checkin(h, sizeof(*i))))
|
if (!(i = nl_checkin(h, sizeof(*i))))
|
||||||
|
@ -999,7 +1168,6 @@ nl_parse_route(struct nlmsghdr *h, int scan)
|
||||||
if (!p)
|
if (!p)
|
||||||
SKIP("unknown table %d\n", table);
|
SKIP("unknown table %d\n", table);
|
||||||
|
|
||||||
|
|
||||||
#ifdef IPV6
|
#ifdef IPV6
|
||||||
if (a[RTA_IIF])
|
if (a[RTA_IIF])
|
||||||
SKIP("IIF set\n");
|
SKIP("IIF set\n");
|
||||||
|
@ -1008,9 +1176,12 @@ nl_parse_route(struct nlmsghdr *h, int scan)
|
||||||
SKIP("TOS %02x\n", i->rtm_tos);
|
SKIP("TOS %02x\n", i->rtm_tos);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (scan && !new)
|
if (s->scan && !new)
|
||||||
SKIP("RTM_DELROUTE in scan\n");
|
SKIP("RTM_DELROUTE in scan\n");
|
||||||
|
|
||||||
|
if (a[RTA_PRIORITY])
|
||||||
|
priority = rta_get_u32(a[RTA_PRIORITY]);
|
||||||
|
|
||||||
int c = ipa_classify_net(dst);
|
int c = ipa_classify_net(dst);
|
||||||
if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
|
if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
|
||||||
SKIP("strange class/scope\n");
|
SKIP("strange class/scope\n");
|
||||||
|
@ -1019,6 +1190,7 @@ nl_parse_route(struct nlmsghdr *h, int scan)
|
||||||
// if (i->rtm_scope != RT_SCOPE_UNIVERSE)
|
// if (i->rtm_scope != RT_SCOPE_UNIVERSE)
|
||||||
// SKIP("scope %u\n", i->rtm_scope);
|
// SKIP("scope %u\n", i->rtm_scope);
|
||||||
|
|
||||||
|
|
||||||
switch (i->rtm_protocol)
|
switch (i->rtm_protocol)
|
||||||
{
|
{
|
||||||
case RTPROT_UNSPEC:
|
case RTPROT_UNSPEC:
|
||||||
|
@ -1033,7 +1205,7 @@ nl_parse_route(struct nlmsghdr *h, int scan)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
case RTPROT_BIRD:
|
case RTPROT_BIRD:
|
||||||
if (!scan)
|
if (!s->scan)
|
||||||
SKIP("echo\n");
|
SKIP("echo\n");
|
||||||
src = KRT_SRC_BIRD;
|
src = KRT_SRC_BIRD;
|
||||||
break;
|
break;
|
||||||
|
@ -1045,12 +1217,14 @@ nl_parse_route(struct nlmsghdr *h, int scan)
|
||||||
|
|
||||||
net *net = net_get(p->p.table, dst, i->rtm_dst_len);
|
net *net = net_get(p->p.table, dst, i->rtm_dst_len);
|
||||||
|
|
||||||
rta ra = {
|
if (s->net && !nl_mergable_route(s, net, p, priority, i->rtm_type))
|
||||||
.src= p->p.main_source,
|
nl_announce_route(s);
|
||||||
.source = RTS_INHERIT,
|
|
||||||
.scope = SCOPE_UNIVERSE,
|
rta *ra = lp_allocz(s->pool, sizeof(rta));
|
||||||
.cast = RTC_UNICAST
|
ra->src = p->p.main_source;
|
||||||
};
|
ra->source = RTS_INHERIT;
|
||||||
|
ra->scope = SCOPE_UNIVERSE;
|
||||||
|
ra->cast = RTC_UNICAST;
|
||||||
|
|
||||||
switch (i->rtm_type)
|
switch (i->rtm_type)
|
||||||
{
|
{
|
||||||
|
@ -1058,9 +1232,9 @@ nl_parse_route(struct nlmsghdr *h, int scan)
|
||||||
|
|
||||||
if (a[RTA_MULTIPATH] && (i->rtm_family == AF_INET))
|
if (a[RTA_MULTIPATH] && (i->rtm_family == AF_INET))
|
||||||
{
|
{
|
||||||
ra.dest = RTD_MULTIPATH;
|
ra->dest = RTD_MULTIPATH;
|
||||||
ra.nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]);
|
ra->nexthops = nl_parse_multipath(p, a[RTA_MULTIPATH]);
|
||||||
if (!ra.nexthops)
|
if (!ra->nexthops)
|
||||||
{
|
{
|
||||||
log(L_ERR "KRT: Received strange multipath route %I/%d",
|
log(L_ERR "KRT: Received strange multipath route %I/%d",
|
||||||
net->n.prefix, net->n.pxlen);
|
net->n.prefix, net->n.pxlen);
|
||||||
|
@ -1070,8 +1244,8 @@ nl_parse_route(struct nlmsghdr *h, int scan)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
ra.iface = if_find_by_index(oif);
|
ra->iface = if_find_by_index(oif);
|
||||||
if (!ra.iface)
|
if (!ra->iface)
|
||||||
{
|
{
|
||||||
log(L_ERR "KRT: Received route %I/%d with unknown ifindex %u",
|
log(L_ERR "KRT: Received route %I/%d with unknown ifindex %u",
|
||||||
net->n.prefix, net->n.pxlen, oif);
|
net->n.prefix, net->n.pxlen, oif);
|
||||||
|
@ -1081,39 +1255,39 @@ nl_parse_route(struct nlmsghdr *h, int scan)
|
||||||
if (a[RTA_GATEWAY])
|
if (a[RTA_GATEWAY])
|
||||||
{
|
{
|
||||||
neighbor *ng;
|
neighbor *ng;
|
||||||
ra.dest = RTD_ROUTER;
|
ra->dest = RTD_ROUTER;
|
||||||
memcpy(&ra.gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra.gw));
|
memcpy(&ra->gw, RTA_DATA(a[RTA_GATEWAY]), sizeof(ra->gw));
|
||||||
ipa_ntoh(ra.gw);
|
ipa_ntoh(ra->gw);
|
||||||
|
|
||||||
#ifdef IPV6
|
#ifdef IPV6
|
||||||
/* Silently skip strange 6to4 routes */
|
/* Silently skip strange 6to4 routes */
|
||||||
if (ipa_in_net(ra.gw, IPA_NONE, 96))
|
if (ipa_in_net(ra->gw, IPA_NONE, 96))
|
||||||
return;
|
return;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ng = neigh_find2(&p->p, &ra.gw, ra.iface,
|
ng = neigh_find2(&p->p, &ra->gw, ra->iface,
|
||||||
(i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
|
(i->rtm_flags & RTNH_F_ONLINK) ? NEF_ONLINK : 0);
|
||||||
if (!ng || (ng->scope == SCOPE_HOST))
|
if (!ng || (ng->scope == SCOPE_HOST))
|
||||||
{
|
{
|
||||||
log(L_ERR "KRT: Received route %I/%d with strange next-hop %I",
|
log(L_ERR "KRT: Received route %I/%d with strange next-hop %I",
|
||||||
net->n.prefix, net->n.pxlen, ra.gw);
|
net->n.prefix, net->n.pxlen, ra->gw);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ra.dest = RTD_DEVICE;
|
ra->dest = RTD_DEVICE;
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
case RTN_BLACKHOLE:
|
case RTN_BLACKHOLE:
|
||||||
ra.dest = RTD_BLACKHOLE;
|
ra->dest = RTD_BLACKHOLE;
|
||||||
break;
|
break;
|
||||||
case RTN_UNREACHABLE:
|
case RTN_UNREACHABLE:
|
||||||
ra.dest = RTD_UNREACHABLE;
|
ra->dest = RTD_UNREACHABLE;
|
||||||
break;
|
break;
|
||||||
case RTN_PROHIBIT:
|
case RTN_PROHIBIT:
|
||||||
ra.dest = RTD_PROHIBIT;
|
ra->dest = RTD_PROHIBIT;
|
||||||
break;
|
break;
|
||||||
/* FIXME: What about RTN_THROW? */
|
/* FIXME: What about RTN_THROW? */
|
||||||
default:
|
default:
|
||||||
|
@ -1121,41 +1295,30 @@ nl_parse_route(struct nlmsghdr *h, int scan)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
rte *e = rte_get_temp(&ra);
|
|
||||||
e->net = net;
|
|
||||||
e->u.krt.src = src;
|
|
||||||
e->u.krt.proto = i->rtm_protocol;
|
|
||||||
e->u.krt.seen = 0;
|
|
||||||
e->u.krt.best = 0;
|
|
||||||
e->u.krt.metric = 0;
|
|
||||||
|
|
||||||
if (a[RTA_PRIORITY])
|
|
||||||
e->u.krt.metric = rta_get_u32(a[RTA_PRIORITY]);
|
|
||||||
|
|
||||||
if (a[RTA_PREFSRC])
|
if (a[RTA_PREFSRC])
|
||||||
{
|
{
|
||||||
ip_addr ps;
|
ip_addr ps;
|
||||||
memcpy(&ps, RTA_DATA(a[RTA_PREFSRC]), sizeof(ps));
|
memcpy(&ps, RTA_DATA(a[RTA_PREFSRC]), sizeof(ps));
|
||||||
ipa_ntoh(ps);
|
ipa_ntoh(ps);
|
||||||
|
|
||||||
ea_list *ea = alloca(sizeof(ea_list) + sizeof(eattr));
|
ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
|
||||||
ea->next = ra.eattrs;
|
ea->next = ra->eattrs;
|
||||||
ra.eattrs = ea;
|
ra->eattrs = ea;
|
||||||
ea->flags = EALF_SORTED;
|
ea->flags = EALF_SORTED;
|
||||||
ea->count = 1;
|
ea->count = 1;
|
||||||
ea->attrs[0].id = EA_KRT_PREFSRC;
|
ea->attrs[0].id = EA_KRT_PREFSRC;
|
||||||
ea->attrs[0].flags = 0;
|
ea->attrs[0].flags = 0;
|
||||||
ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
|
ea->attrs[0].type = EAF_TYPE_IP_ADDRESS;
|
||||||
ea->attrs[0].u.ptr = alloca(sizeof(struct adata) + sizeof(ps));
|
ea->attrs[0].u.ptr = lp_alloc(s->pool, sizeof(struct adata) + sizeof(ps));
|
||||||
ea->attrs[0].u.ptr->length = sizeof(ps);
|
ea->attrs[0].u.ptr->length = sizeof(ps);
|
||||||
memcpy(ea->attrs[0].u.ptr->data, &ps, sizeof(ps));
|
memcpy(ea->attrs[0].u.ptr->data, &ps, sizeof(ps));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (a[RTA_FLOW])
|
if (a[RTA_FLOW])
|
||||||
{
|
{
|
||||||
ea_list *ea = alloca(sizeof(ea_list) + sizeof(eattr));
|
ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + sizeof(eattr));
|
||||||
ea->next = ra.eattrs;
|
ea->next = ra->eattrs;
|
||||||
ra.eattrs = ea;
|
ra->eattrs = ea;
|
||||||
ea->flags = EALF_SORTED;
|
ea->flags = EALF_SORTED;
|
||||||
ea->count = 1;
|
ea->count = 1;
|
||||||
ea->attrs[0].id = EA_KRT_REALM;
|
ea->attrs[0].id = EA_KRT_REALM;
|
||||||
|
@ -1167,7 +1330,7 @@ nl_parse_route(struct nlmsghdr *h, int scan)
|
||||||
if (a[RTA_METRICS])
|
if (a[RTA_METRICS])
|
||||||
{
|
{
|
||||||
u32 metrics[KRT_METRICS_MAX];
|
u32 metrics[KRT_METRICS_MAX];
|
||||||
ea_list *ea = alloca(sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
|
ea_list *ea = lp_alloc(s->pool, sizeof(ea_list) + KRT_METRICS_MAX * sizeof(eattr));
|
||||||
int t, n = 0;
|
int t, n = 0;
|
||||||
|
|
||||||
if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
|
if (nl_parse_metrics(a[RTA_METRICS], metrics, ARRAY_SIZE(metrics)) < 0)
|
||||||
|
@ -1189,30 +1352,62 @@ nl_parse_route(struct nlmsghdr *h, int scan)
|
||||||
|
|
||||||
if (n > 0)
|
if (n > 0)
|
||||||
{
|
{
|
||||||
ea->next = ra.eattrs;
|
ea->next = ra->eattrs;
|
||||||
ea->flags = EALF_SORTED;
|
ea->flags = EALF_SORTED;
|
||||||
ea->count = n;
|
ea->count = n;
|
||||||
ra.eattrs = ea;
|
ra->eattrs = ea;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (scan)
|
/*
|
||||||
krt_got_route(p, e);
|
* Ideally, now we would send the received route to the rest of kernel code.
|
||||||
|
* But IPv6 ECMP routes are sent as a sequence of routes, so we postpone it
|
||||||
|
* and merge next hops until the end of the sequence.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (!s->net)
|
||||||
|
{
|
||||||
|
/* Store the new route */
|
||||||
|
s->net = net;
|
||||||
|
s->attrs = ra;
|
||||||
|
s->proto = p;
|
||||||
|
s->new = new;
|
||||||
|
s->krt_src = src;
|
||||||
|
s->krt_type = i->rtm_type;
|
||||||
|
s->krt_proto = i->rtm_protocol;
|
||||||
|
s->krt_metric = priority;
|
||||||
|
}
|
||||||
else
|
else
|
||||||
krt_got_route_async(p, e, new);
|
{
|
||||||
|
/* Merge next hops with the stored route */
|
||||||
|
rta *a = s->attrs;
|
||||||
|
|
||||||
|
if (a->dest != RTD_MULTIPATH)
|
||||||
|
{
|
||||||
|
a->dest = RTD_MULTIPATH;
|
||||||
|
a->nexthops = nl_alloc_mpnh(s, a->gw, a->iface, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
mpnh_insert(&a->nexthops, nl_alloc_mpnh(s, ra->gw, ra->iface, 0));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
|
krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
|
||||||
{
|
{
|
||||||
struct nlmsghdr *h;
|
struct nlmsghdr *h;
|
||||||
|
struct nl_parse_state s;
|
||||||
|
|
||||||
|
nl_parse_begin(&s, 1, krt_ecmp6(p));
|
||||||
|
|
||||||
nl_request_dump(BIRD_AF, RTM_GETROUTE);
|
nl_request_dump(BIRD_AF, RTM_GETROUTE);
|
||||||
while (h = nl_get_scan())
|
while (h = nl_get_scan())
|
||||||
if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
|
if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
|
||||||
nl_parse_route(h, 1);
|
nl_parse_route(&s, h);
|
||||||
else
|
else
|
||||||
log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
|
log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
|
||||||
|
|
||||||
|
nl_parse_end(&s);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1225,12 +1420,16 @@ static byte *nl_async_rx_buffer; /* Receive buffer */
|
||||||
static void
|
static void
|
||||||
nl_async_msg(struct nlmsghdr *h)
|
nl_async_msg(struct nlmsghdr *h)
|
||||||
{
|
{
|
||||||
|
struct nl_parse_state s;
|
||||||
|
|
||||||
switch (h->nlmsg_type)
|
switch (h->nlmsg_type)
|
||||||
{
|
{
|
||||||
case RTM_NEWROUTE:
|
case RTM_NEWROUTE:
|
||||||
case RTM_DELROUTE:
|
case RTM_DELROUTE:
|
||||||
DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
|
DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
|
||||||
nl_parse_route(h, 0);
|
nl_parse_begin(&s, 0, 0);
|
||||||
|
nl_parse_route(&s, h);
|
||||||
|
nl_parse_end(&s);
|
||||||
break;
|
break;
|
||||||
case RTM_NEWLINK:
|
case RTM_NEWLINK:
|
||||||
case RTM_DELLINK:
|
case RTM_DELLINK:
|
||||||
|
@ -1353,6 +1552,7 @@ nl_open_async(void)
|
||||||
void
|
void
|
||||||
krt_sys_io_init(void)
|
krt_sys_io_init(void)
|
||||||
{
|
{
|
||||||
|
nl_linpool = lp_new(krt_pool, 4080);
|
||||||
HASH_INIT(nl_table_map, krt_pool, 6);
|
HASH_INIT(nl_table_map, krt_pool, 6);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue