BGP: Support for MPLS labels and VPN SAFI

Basic support for SAFI 4 and 128 (MPLS labeled IP and VPN) for IPv4 and
IPv6. Should work for route reflector, but does not properly handle
originating routes with next hop self.

Based on patches from Jan Matejka.
This commit is contained in:
Ondrej Zajicek (work) 2017-03-22 15:00:07 +01:00
parent ead7b8f498
commit 1e37e35c3e
9 changed files with 639 additions and 52 deletions

View file

@ -35,6 +35,8 @@
#define NB_MPLS (1 << NET_MPLS) #define NB_MPLS (1 << NET_MPLS)
#define NB_IP (NB_IP4 | NB_IP6) #define NB_IP (NB_IP4 | NB_IP6)
#define NB_VPN (NB_VPN4 | NB_VPN6)
#define NB_FLOW (NB_FLOW4 | NB_FLOW6)
#define NB_ANY 0xffffffff #define NB_ANY 0xffffffff
@ -481,6 +483,12 @@ static inline void net_normalize_ip4(net_addr_ip4 *n)
static inline void net_normalize_ip6(net_addr_ip6 *n) static inline void net_normalize_ip6(net_addr_ip6 *n)
{ n->prefix = ip6_and(n->prefix, ip6_mkmask(n->pxlen)); } { n->prefix = ip6_and(n->prefix, ip6_mkmask(n->pxlen)); }
static inline void net_normalize_vpn4(net_addr_vpn4 *n)
{ net_normalize_ip4((net_addr_ip4 *) n); }
static inline void net_normalize_vpn6(net_addr_vpn6 *n)
{ net_normalize_ip6((net_addr_ip6 *) n); }
void net_normalize(net_addr *N); void net_normalize(net_addr *N);

View file

@ -28,6 +28,13 @@ get_u16(const void *p)
return ntohs(x); return ntohs(x);
} }
static inline u32
get_u24(const void *P)
{
const byte *p = P;
return (p[0] << 16) + (p[1] << 8) + p[2];
}
static inline u32 static inline u32
get_u32(const void *p) get_u32(const void *p)
{ {
@ -52,6 +59,13 @@ put_u16(void *p, u16 x)
memcpy(p, &x, 2); memcpy(p, &x, 2);
} }
static inline void
put_u24(void *p, u32 x)
{
x = htonl(x);
memcpy(p, ((char *) &x) + 1, 3);
}
static inline void static inline void
put_u32(void *p, u32 x) put_u32(void *p, u32 x)
{ {

View file

@ -551,7 +551,15 @@ static inline rta * rta_cow(rta *r, linpool *lp) { return rta_is_cached(r) ? rta
void rta_dump(rta *); void rta_dump(rta *);
void rta_dump_all(void); void rta_dump_all(void);
void rta_show(struct cli *, rta *, ea_list *); void rta_show(struct cli *, rta *, ea_list *);
void rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls);
struct hostentry * rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep);
void rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls);
static inline void
rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls)
{
rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ll, dep), mls);
}
/* /*
* rta_set_recursive_next_hop() acquires hostentry from hostcache and fills * rta_set_recursive_next_hop() acquires hostentry from hostcache and fills

View file

@ -1766,7 +1766,7 @@ rta_next_hop_outdated(rta *a)
(!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh)); (!he->nexthop_linkable) || !nexthop_same(&(a->nh), &(he->src->nh));
} }
static inline void void
rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls) rta_apply_hostentry(rta *a, struct hostentry *he, mpls_label_stack *mls)
{ {
a->hostentry = he; a->hostentry = he;
@ -1794,7 +1794,7 @@ no_nexthop:
struct nexthop *nhp = NULL, *nhr = NULL; struct nexthop *nhp = NULL, *nhr = NULL;
int skip_nexthop = 0; int skip_nexthop = 0;
for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next) for (struct nexthop *nh = &(he->src->nh); nh; nh = nh->next)
{ {
if (skip_nexthop) if (skip_nexthop)
@ -2475,7 +2475,7 @@ rt_update_hostcache(rtable *tab)
tab->hcu_scheduled = 0; tab->hcu_scheduled = 0;
} }
static struct hostentry * struct hostentry *
rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep) rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
{ {
struct hostentry *he; struct hostentry *he;
@ -2489,17 +2489,11 @@ rt_get_hostentry(rtable *tab, ip_addr a, ip_addr ll, rtable *dep)
if (ipa_equal(he->addr, a) && (he->tab == dep)) if (ipa_equal(he->addr, a) && (he->tab == dep))
return he; return he;
he = hc_new_hostentry(hc, a, ll, dep, k); he = hc_new_hostentry(hc, a, ipa_zero(ll) ? a : ll, dep, k);
rt_update_hostentry(tab, he); rt_update_hostentry(tab, he);
return he; return he;
} }
void
rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr gw, ip_addr ll, mpls_label_stack *mls)
{
rta_apply_hostentry(a, rt_get_hostentry(tab, gw, ipa_zero(ll) ? gw : ll, dep), mls);
}
/* /*
* CLI commands * CLI commands

View file

@ -629,6 +629,75 @@ bgp_decode_large_community(struct bgp_parse_state *s, uint code UNUSED, uint fla
bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad); bgp_set_attr_ptr(to, s->pool, BA_LARGE_COMMUNITY, flags, ad);
} }
static void
bgp_export_mpls_label_stack(struct bgp_export_state *s, eattr *a)
{
net_addr *n = s->route->net->n.addr;
u32 *labels = (u32 *) a->u.ptr->data;
uint lnum = a->u.ptr->length / 4;
/* Perhaps we should just ignore it? */
if (!s->mpls)
WITHDRAW("Unexpected MPLS stack");
/* Empty MPLS stack is not allowed */
if (!lnum)
WITHDRAW("Malformed MPLS stack - empty");
/* This is ugly, but we must ensure that labels fit into NLRI field */
if ((24*lnum + (net_is_vpn(n) ? 64 : 0) + net_pxlen(n)) > 255)
WITHDRAW("Malformed MPLS stack - too many labels (%u)", lnum);
for (uint i = 0; i < lnum; i++)
{
if (labels[i] > 0xfffff)
WITHDRAW("Malformed MPLS stack - invalid label (%u)", labels[i]);
/* TODO: Check for special-purpose label values? */
}
}
static int
bgp_encode_mpls_label_stack(struct bgp_write_state *s, eattr *a, byte *buf UNUSED, uint size UNUSED)
{
/*
* MPLS labels are encoded as a part of the NLRI in MP_REACH_NLRI attribute,
* so we store MPLS_LABEL_STACK and encode it later by AFI-specific hooks.
*/
s->mpls_labels = a->u.ptr;
return 0;
}
static void
bgp_decode_mpls_label_stack(struct bgp_parse_state *s, uint code UNUSED, uint flags UNUSED, byte *data UNUSED, uint len UNUSED, ea_list **to UNUSED)
{
DISCARD("Discarding received attribute #0");
}
static void
bgp_format_mpls_label_stack(eattr *a, byte *buf, uint size)
{
u32 *labels = (u32 *) a->u.ptr->data;
uint lnum = a->u.ptr->length / 4;
char *pos = buf;
for (uint i = 0; i < lnum; i++)
{
if (size < 20)
{
bsprintf(pos, "...");
return;
}
uint l = bsprintf(pos, "%d/", labels[i]);
ADVANCE(pos, size, l);
}
/* Clear last slash or terminate empty string */
pos[lnum ? -1 : 0] = 0;
}
static inline void static inline void
bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to) bgp_decode_unknown(struct bgp_parse_state *s, uint code, uint flags, byte *data, uint len, ea_list **to)
{ {
@ -763,6 +832,14 @@ static const struct bgp_attr_desc bgp_attr_table[] = {
.encode = bgp_encode_u32s, .encode = bgp_encode_u32s,
.decode = bgp_decode_large_community, .decode = bgp_decode_large_community,
}, },
[BA_MPLS_LABEL_STACK] = {
.name = "mpls_label_stack",
.type = EAF_TYPE_INT_SET,
.export = bgp_export_mpls_label_stack,
.encode = bgp_encode_mpls_label_stack,
.decode = bgp_decode_mpls_label_stack,
.format = bgp_format_mpls_label_stack,
},
}; };
static inline int static inline int
@ -849,7 +926,6 @@ bgp_export_attrs(struct bgp_export_state *s, ea_list *attrs)
return NULL; return NULL;
return new; return new;
} }
@ -1340,7 +1416,7 @@ bgp_update_attrs(struct bgp_proto *p, struct bgp_channel *c, rte *e, ea_list *at
{ {
struct proto *SRC = e->attrs->src->proto; struct proto *SRC = e->attrs->src->proto;
struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL; struct bgp_proto *src = (SRC->proto == &proto_bgp) ? (void *) SRC : NULL;
struct bgp_export_state s = { .proto = p, .channel =c, .pool = pool, .src = src, .route = e }; struct bgp_export_state s = { .proto = p, .channel = c, .pool = pool, .src = src, .route = e, .mpls = c->desc->mpls };
ea_list *attrs = attrs0; ea_list *attrs = attrs0;
eattr *a; eattr *a;
adata *ad; adata *ad;
@ -1453,13 +1529,13 @@ bgp_rt_notify(struct proto *P, struct channel *C, net *n, rte *new, rte *old, ea
if (new) if (new)
{ {
attrs = bgp_update_attrs(p, c, new, attrs, bgp_linpool); attrs = bgp_update_attrs(p, c, new, attrs, bgp_linpool2);
/* If attributes are invalid, we fail back to withdraw */ /* If attributes are invalid, we fail back to withdraw */
buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c); buck = attrs ? bgp_get_bucket(c, attrs) : bgp_get_withdraw_bucket(c);
path = new->attrs->src->global_id; path = new->attrs->src->global_id;
lp_flush(bgp_linpool); lp_flush(bgp_linpool2);
} }
else else
{ {

View file

@ -86,6 +86,7 @@
struct linpool *bgp_linpool; /* Global temporary pool */ struct linpool *bgp_linpool; /* Global temporary pool */
struct linpool *bgp_linpool2; /* Global temporary pool for bgp_rt_notify() */
static list bgp_sockets; /* Global list of listening sockets */ static list bgp_sockets; /* Global list of listening sockets */
@ -151,7 +152,10 @@ bgp_open(struct bgp_proto *p)
add_tail(&bgp_sockets, &bs->n); add_tail(&bgp_sockets, &bs->n);
if (!bgp_linpool) if (!bgp_linpool)
bgp_linpool = lp_new(proto_pool, 4080); {
bgp_linpool = lp_new(proto_pool, 4080);
bgp_linpool2 = lp_new(proto_pool, 4080);
}
return 0; return 0;
@ -187,6 +191,9 @@ bgp_close(struct bgp_proto *p)
rfree(bgp_linpool); rfree(bgp_linpool);
bgp_linpool = NULL; bgp_linpool = NULL;
rfree(bgp_linpool2);
bgp_linpool2 = NULL;
} }
static inline int static inline int
@ -1970,7 +1977,7 @@ struct protocol proto_bgp = {
.template = "bgp%d", .template = "bgp%d",
.attr_class = EAP_BGP, .attr_class = EAP_BGP,
.preference = DEF_PREF_BGP, .preference = DEF_PREF_BGP,
.channel_mask = NB_IP | NB_FLOW4 | NB_FLOW6, .channel_mask = NB_IP | NB_VPN | NB_FLOW,
.proto_size = sizeof(struct bgp_proto), .proto_size = sizeof(struct bgp_proto),
.config_size = sizeof(struct bgp_config), .config_size = sizeof(struct bgp_config),
.postconfig = bgp_postconfig, .postconfig = bgp_postconfig,

View file

@ -31,6 +31,8 @@ struct eattr;
#define BGP_SAFI_UNICAST 1 #define BGP_SAFI_UNICAST 1
#define BGP_SAFI_MULTICAST 2 #define BGP_SAFI_MULTICAST 2
#define BGP_SAFI_MPLS 4
#define BGP_SAFI_MPLS_VPN 128
#define BGP_SAFI_FLOW 133 #define BGP_SAFI_FLOW 133
/* Internal AF codes */ /* Internal AF codes */
@ -43,6 +45,10 @@ struct eattr;
#define BGP_AF_IPV6 BGP_AF( BGP_AFI_IPV6, BGP_SAFI_UNICAST ) #define BGP_AF_IPV6 BGP_AF( BGP_AFI_IPV6, BGP_SAFI_UNICAST )
#define BGP_AF_IPV4_MC BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MULTICAST ) #define BGP_AF_IPV4_MC BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MULTICAST )
#define BGP_AF_IPV6_MC BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MULTICAST ) #define BGP_AF_IPV6_MC BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MULTICAST )
#define BGP_AF_IPV4_MPLS BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MPLS )
#define BGP_AF_IPV6_MPLS BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MPLS )
#define BGP_AF_VPN4_MPLS BGP_AF( BGP_AFI_IPV4, BGP_SAFI_MPLS_VPN )
#define BGP_AF_VPN6_MPLS BGP_AF( BGP_AFI_IPV6, BGP_SAFI_MPLS_VPN )
#define BGP_AF_FLOW4 BGP_AF( BGP_AFI_IPV4, BGP_SAFI_FLOW ) #define BGP_AF_FLOW4 BGP_AF( BGP_AFI_IPV4, BGP_SAFI_FLOW )
#define BGP_AF_FLOW6 BGP_AF( BGP_AFI_IPV6, BGP_SAFI_FLOW ) #define BGP_AF_FLOW6 BGP_AF( BGP_AFI_IPV6, BGP_SAFI_FLOW )
@ -55,6 +61,7 @@ struct bgp_bucket;
struct bgp_af_desc { struct bgp_af_desc {
u32 afi; u32 afi;
u32 net; u32 net;
int mpls;
const char *name; const char *name;
uint (*encode_nlri)(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size); uint (*encode_nlri)(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size);
void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, rta *a); void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, rta *a);
@ -308,6 +315,7 @@ struct bgp_export_state {
struct bgp_proto *src; struct bgp_proto *src;
rte *route; rte *route;
int mpls;
u32 attrs_seen[1]; u32 attrs_seen[1];
uint err_withdraw; uint err_withdraw;
@ -320,8 +328,10 @@ struct bgp_write_state {
int as4_session; int as4_session;
int add_path; int add_path;
int mpls;
eattr *mp_next_hop; eattr *mp_next_hop;
adata *mpls_labels;
}; };
struct bgp_parse_state { struct bgp_parse_state {
@ -331,14 +341,13 @@ struct bgp_parse_state {
int as4_session; int as4_session;
int add_path; int add_path;
int mpls;
u32 attrs_seen[256/32]; u32 attrs_seen[256/32];
u32 mp_reach_af; u32 mp_reach_af;
u32 mp_unreach_af; u32 mp_unreach_af;
mpls_label_stack mls;
uint attr_len; uint attr_len;
uint ip_reach_len; uint ip_reach_len;
uint ip_unreach_len; uint ip_unreach_len;
@ -359,6 +368,9 @@ struct bgp_parse_state {
uint err_subcode; uint err_subcode;
jmp_buf err_jmpbuf; jmp_buf err_jmpbuf;
struct hostentry *hostentry;
adata *mpls_labels;
/* Cached state for bgp_rte_update() */ /* Cached state for bgp_rte_update() */
u32 last_id; u32 last_id;
struct rte_src *last_src; struct rte_src *last_src;
@ -392,6 +404,7 @@ bgp_parse_error(struct bgp_parse_state *s, uint subcode)
} }
extern struct linpool *bgp_linpool; extern struct linpool *bgp_linpool;
extern struct linpool *bgp_linpool2;
void bgp_start_timer(struct timer *t, int value); void bgp_start_timer(struct timer *t, int value);
@ -528,6 +541,9 @@ void bgp_update_next_hop(struct bgp_export_state *s, eattr *a, ea_list **to);
#define BA_AS4_AGGREGATOR 0x12 /* RFC 6793 */ #define BA_AS4_AGGREGATOR 0x12 /* RFC 6793 */
#define BA_LARGE_COMMUNITY 0x20 /* RFC 8092 */ #define BA_LARGE_COMMUNITY 0x20 /* RFC 8092 */
/* Bird's private internal BGP attributes */
#define BA_MPLS_LABEL_STACK 0xfe /* MPLS label stack transfer attribute */
/* BGP connection states */ /* BGP connection states */
#define BS_IDLE 0 #define BS_IDLE 0

View file

@ -139,6 +139,10 @@ bgp_afi:
| IPV6 { $$ = BGP_AF_IPV6; } | IPV6 { $$ = BGP_AF_IPV6; }
| IPV4 MULTICAST { $$ = BGP_AF_IPV4_MC; } | IPV4 MULTICAST { $$ = BGP_AF_IPV4_MC; }
| IPV6 MULTICAST { $$ = BGP_AF_IPV6_MC; } | IPV6 MULTICAST { $$ = BGP_AF_IPV6_MC; }
| IPV4 MPLS { $$ = BGP_AF_IPV4_MPLS; }
| IPV6 MPLS { $$ = BGP_AF_IPV6_MPLS; }
| VPN4 MPLS { $$ = BGP_AF_VPN4_MPLS; }
| VPN6 MPLS { $$ = BGP_AF_VPN6_MPLS; }
| FLOW4 { $$ = BGP_AF_FLOW4; } | FLOW4 { $$ = BGP_AF_FLOW4; }
| FLOW6 { $$ = BGP_AF_FLOW6; } | FLOW6 { $$ = BGP_AF_FLOW6; }
; ;

View file

@ -32,6 +32,13 @@
#define BGP_RR_BEGIN 1 #define BGP_RR_BEGIN 1
#define BGP_RR_END 2 #define BGP_RR_END 2
#define BGP_NLRI_MAX (4 + 1 + 32)
#define BGP_MPLS_BOS 1 /* Bottom-of-stack bit */
#define BGP_MPLS_MAX 10 /* Max number of labels that 24*n <= 255 */
#define BGP_MPLS_NULL 3 /* Implicit NULL label */
#define BGP_MPLS_MAGIC 0x800000 /* Magic withdraw label value, RFC 3107 3 */
static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS; static struct tbf rl_rcv_update = TBF_DEFAULT_LOG_LIMITS;
static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS; static struct tbf rl_snd_update = TBF_DEFAULT_LOG_LIMITS;
@ -282,8 +289,8 @@ bgp_write_capabilities(struct bgp_conn *conn, byte *buf)
/* Create capability list in buffer */ /* Create capability list in buffer */
/* /*
* Note that max length is ~ 20+14*af_count. With max 6 channels that is * Note that max length is ~ 20+14*af_count. With max 10 channels that is
* 104. Option limit is 253 and buffer size is 4096, so we cannot overflow * 160. Option limit is 253 and buffer size is 4096, so we cannot overflow
* unless we add new capabilities or more AFs. * unless we add new capabilities or more AFs.
*/ */
@ -722,6 +729,7 @@ bgp_rx_open(struct bgp_conn *conn, byte *pkt, uint len)
#define BAD_AFI "Unexpected AF <%u/%u> in UPDATE" #define BAD_AFI "Unexpected AF <%u/%u> in UPDATE"
#define BAD_NEXT_HOP "Invalid NEXT_HOP attribute" #define BAD_NEXT_HOP "Invalid NEXT_HOP attribute"
#define NO_NEXT_HOP "Missing NEXT_HOP attribute" #define NO_NEXT_HOP "Missing NEXT_HOP attribute"
#define NO_LABEL_STACK "Missing MPLS stack"
static void static void
@ -744,19 +752,56 @@ bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
WITHDRAW(BAD_NEXT_HOP); WITHDRAW(BAD_NEXT_HOP);
a->dest = RTD_UNICAST; a->dest = RTD_UNICAST;
a->nh = (struct nexthop){ .gw = nbr->addr, .iface = nbr->iface }; a->nh.gw = nbr->addr;
a->hostentry = NULL; a->nh.iface = nbr->iface;
a->igp_metric = 0;
} }
else /* GW_RECURSIVE */ else /* GW_RECURSIVE */
{ {
if (ipa_zero(gw)) if (ipa_zero(gw))
WITHDRAW(BAD_NEXT_HOP); WITHDRAW(BAD_NEXT_HOP);
rta_set_recursive_next_hop(c->c.table, a, c->igp_table, gw, ll, &(s->mls)); s->hostentry = rt_get_hostentry(c->igp_table, gw, ll, c->c.table);
if (!s->mpls)
rta_apply_hostentry(a, s->hostentry, NULL);
/* With MPLS, hostentry is applied later in bgp_apply_mpls_labels() */
} }
} }
static void
bgp_apply_mpls_labels(struct bgp_parse_state *s, rta *a, u32 *labels, uint lnum)
{
if (lnum > MPLS_MAX_LABEL_STACK)
{
REPORT("Too many MPLS labels ($u)", lnum);
a->dest = RTD_UNREACHABLE;
a->hostentry = NULL;
a->nh = (struct nexthop) { };
return;
}
/* Handle implicit NULL as empty MPLS stack */
if ((lnum == 1) && (labels[0] == BGP_MPLS_NULL))
lnum = 0;
if (s->channel->cf->gw_mode == GW_DIRECT)
{
a->nh.labels = lnum;
memcpy(a->nh.label, labels, 4*lnum);
}
else /* GW_RECURSIVE */
{
mpls_label_stack ms;
ms.len = lnum;
memcpy(ms.stack, labels, 4*lnum);
rta_apply_hostentry(a, s->hostentry, &ms);
}
}
static inline int static inline int
bgp_use_next_hop(struct bgp_export_state *s, eattr *a) bgp_use_next_hop(struct bgp_export_state *s, eattr *a)
{ {
@ -810,13 +855,26 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
{ {
if (bgp_use_gateway(s)) if (bgp_use_gateway(s))
{ {
ip_addr nh[1] = { s->route->attrs->nh.gw }; rta *ra = s->route->attrs;
ip_addr nh[1] = { ra->nh.gw };
bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16); bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, 16);
if (s->mpls)
{
u32 implicit_null = BGP_MPLS_NULL;
u32 *labels = ra->nh.labels ? ra->nh.label : &implicit_null;
uint lnum = ra->nh.labels ? ra->nh.labels : 1;
bgp_set_attr_data(to, s->pool, BA_MPLS_LABEL_STACK, 0, labels, lnum * 4);
}
} }
else else
{ {
ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr }; ip_addr nh[2] = { s->channel->next_hop_addr, s->channel->link_addr };
bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16); bgp_set_attr_data(to, s->pool, BA_NEXT_HOP, 0, nh, ipa_nonzero(nh[1]) ? 32 : 16);
/* TODO: Use local MPLS assigned label */
if (s->mpls)
bgp_unset_attr(to, s->pool, BA_MPLS_LABEL_STACK);
} }
} }
@ -834,6 +892,10 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1]))) if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1])))
WITHDRAW(BAD_NEXT_HOP); WITHDRAW(BAD_NEXT_HOP);
/* Just check if MPLS stack */
if (s->mpls && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK))
WITHDRAW(NO_LABEL_STACK);
} }
static uint static uint
@ -905,14 +967,76 @@ bgp_rte_update(struct bgp_parse_state *s, net_addr *n, u32 path_id, rta *a0)
rte_update2(&s->channel->c, n, e, s->last_src); rte_update2(&s->channel->c, n, e, s->last_src);
} }
static void
bgp_encode_mpls_labels(struct bgp_write_state *s UNUSED, adata *mpls, byte **pos, uint *size, byte *pxlen)
{
u32 dummy = 0;
u32 *labels = mpls ? (u32 *) mpls->data : &dummy;
uint lnum = mpls ? (mpls->length / 4) : 1;
for (uint i = 0; i < lnum; i++)
{
put_u24(*pos, labels[i] << 4);
ADVANCE(*pos, *size, 3);
}
/* Add bottom-of-stack flag */
(*pos)[-1] |= BGP_MPLS_BOS;
*pxlen += 24 * lnum;
}
static void
bgp_decode_mpls_labels(struct bgp_parse_state *s, byte **pos, uint *len, uint *pxlen, rta *a)
{
u32 labels[BGP_MPLS_MAX], label;
uint lnum = 0;
do {
if (*pxlen < 24)
bgp_parse_error(s, 1);
label = get_u24(*pos);
labels[lnum++] = label >> 4;
ADVANCE(*pos, *len, 3);
*pxlen -= 24;
/* Withdraw: Magic label stack value 0x800000 according to RFC 3107, section 3, last paragraph */
if (!a && !s->err_withdraw && (lnum == 1) && (label == BGP_MPLS_MAGIC))
break;
}
while (!(label & BGP_MPLS_BOS));
if (!a)
return;
/* Attach MPLS attribute unless we already have one */
if (!s->mpls_labels)
{
s->mpls_labels = lp_alloc_adata(s->pool, 4*BGP_MPLS_MAX);
bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_MPLS_LABEL_STACK, 0, s->mpls_labels);
}
/* Overwrite data in the attribute */
s->mpls_labels->length = 4*lnum;
memcpy(s->mpls_labels->data, labels, 4*lnum);
/* Update next hop entry in rta */
bgp_apply_mpls_labels(s, a, labels, lnum);
/* Attributes were changed, invalidate cached entry */
rta_free(s->cached_rta);
s->cached_rta = NULL;
return;
}
static uint static uint
bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size) bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
{ {
byte *pos = buf; byte *pos = buf;
while (!EMPTY_LIST(buck->prefixes) && (size >= (5 + sizeof(ip4_addr)))) while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
{ {
struct bgp_prefix *px = HEAD(buck->prefixes); struct bgp_prefix *px = HEAD(buck->prefixes);
struct net_addr_ip4 *net = (void *) px->net; struct net_addr_ip4 *net = (void *) px->net;
@ -924,14 +1048,17 @@ bgp_encode_nlri_ip4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
ADVANCE(pos, size, 4); ADVANCE(pos, size, 4);
} }
ip4_addr a = ip4_hton(net->prefix);
uint b = (net->pxlen + 7) / 8;
/* Encode prefix length */ /* Encode prefix length */
*pos = net->pxlen; *pos = net->pxlen;
ADVANCE(pos, size, 1); ADVANCE(pos, size, 1);
/* Encode MPLS labels */
if (s->mpls)
bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
/* Encode prefix body */ /* Encode prefix body */
ip4_addr a = ip4_hton(net->prefix);
uint b = (net->pxlen + 7) / 8;
memcpy(pos, &a, b); memcpy(pos, &a, b);
ADVANCE(pos, size, b); ADVANCE(pos, size, b);
@ -961,17 +1088,21 @@ bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
/* Decode prefix length */ /* Decode prefix length */
uint l = *pos; uint l = *pos;
uint b = (l + 7) / 8;
ADVANCE(pos, len, 1); ADVANCE(pos, len, 1);
if (len < ((l + 7) / 8))
bgp_parse_error(s, 1);
/* Decode MPLS labels */
if (s->mpls)
bgp_decode_mpls_labels(s, &pos, &len, &l, a);
if (l > IP4_MAX_PREFIX_LENGTH) if (l > IP4_MAX_PREFIX_LENGTH)
bgp_parse_error(s, 10); bgp_parse_error(s, 10);
if (len < b)
bgp_parse_error(s, 1);
/* Decode prefix body */ /* Decode prefix body */
ip4_addr addr = IP4_NONE; ip4_addr addr = IP4_NONE;
uint b = (l + 7) / 8;
memcpy(&addr, pos, b); memcpy(&addr, pos, b);
ADVANCE(pos, len, b); ADVANCE(pos, len, b);
@ -1016,7 +1147,7 @@ bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
{ {
byte *pos = buf; byte *pos = buf;
while (!EMPTY_LIST(buck->prefixes) && (size >= (5 + sizeof(ip6_addr)))) while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
{ {
struct bgp_prefix *px = HEAD(buck->prefixes); struct bgp_prefix *px = HEAD(buck->prefixes);
struct net_addr_ip6 *net = (void *) px->net; struct net_addr_ip6 *net = (void *) px->net;
@ -1028,14 +1159,17 @@ bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *bu
ADVANCE(pos, size, 4); ADVANCE(pos, size, 4);
} }
ip6_addr a = ip6_hton(net->prefix);
uint b = (net->pxlen + 7) / 8;
/* Encode prefix length */ /* Encode prefix length */
*pos = net->pxlen; *pos = net->pxlen;
ADVANCE(pos, size, 1); ADVANCE(pos, size, 1);
/* Encode MPLS labels */
if (s->mpls)
bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
/* Encode prefix body */ /* Encode prefix body */
ip6_addr a = ip6_hton(net->prefix);
uint b = (net->pxlen + 7) / 8;
memcpy(pos, &a, b); memcpy(pos, &a, b);
ADVANCE(pos, size, b); ADVANCE(pos, size, b);
@ -1065,17 +1199,21 @@ bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
/* Decode prefix length */ /* Decode prefix length */
uint l = *pos; uint l = *pos;
uint b = (l + 7) / 8;
ADVANCE(pos, len, 1); ADVANCE(pos, len, 1);
if (len < ((l + 7) / 8))
bgp_parse_error(s, 1);
/* Decode MPLS labels */
if (s->mpls)
bgp_decode_mpls_labels(s, &pos, &len, &l, a);
if (l > IP6_MAX_PREFIX_LENGTH) if (l > IP6_MAX_PREFIX_LENGTH)
bgp_parse_error(s, 10); bgp_parse_error(s, 10);
if (len < b)
bgp_parse_error(s, 1);
/* Decode prefix body */ /* Decode prefix body */
ip6_addr addr = IP6_NONE; ip6_addr addr = IP6_NONE;
uint b = (l + 7) / 8;
memcpy(&addr, pos, b); memcpy(&addr, pos, b);
ADVANCE(pos, len, b); ADVANCE(pos, len, b);
@ -1135,6 +1273,282 @@ bgp_decode_next_hop_ip6(struct bgp_parse_state *s, byte *data, uint len, rta *a)
} }
static uint
bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
{
byte *pos = buf;
while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
{
struct bgp_prefix *px = HEAD(buck->prefixes);
struct net_addr_vpn4 *net = (void *) px->net;
/* Encode path ID */
if (s->add_path)
{
put_u32(pos, px->path_id);
ADVANCE(pos, size, 4);
}
/* Encode prefix length */
*pos = net->pxlen;
ADVANCE(pos, size, 1);
/* Encode MPLS labels */
bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
/* Encode route distinguisher */
put_u64(pos, net->rd);
ADVANCE(pos, size, 8);
/* Encode prefix body */
ip4_addr a = ip4_hton(net->prefix);
uint b = (net->pxlen + 7) / 8;
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
bgp_free_prefix(s->channel, px);
}
return pos - buf;
}
static void
bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
{
while (len)
{
net_addr_vpn4 net;
u32 path_id = 0;
/* Decode path ID */
if (s->add_path)
{
if (len < 5)
bgp_parse_error(s, 1);
path_id = get_u32(pos);
ADVANCE(pos, len, 4);
}
/* Decode prefix length */
uint l = *pos;
ADVANCE(pos, len, 1);
if (len < ((l + 7) / 8))
bgp_parse_error(s, 1);
/* Decode MPLS labels */
bgp_decode_mpls_labels(s, &pos, &len, &l, a);
/* Decode route distinguisher */
if (l < 64)
bgp_parse_error(s, 1);
u64 rd = get_u64(pos);
ADVANCE(pos, len, 8);
l -= 64;
if (l > IP4_MAX_PREFIX_LENGTH)
bgp_parse_error(s, 10);
/* Decode prefix body */
ip4_addr addr = IP4_NONE;
uint b = (l + 7) / 8;
memcpy(&addr, pos, b);
ADVANCE(pos, len, b);
net = NET_ADDR_VPN4(ip4_ntoh(addr), l, rd);
net_normalize_vpn4(&net);
// XXXX validate prefix
bgp_rte_update(s, (net_addr *) &net, path_id, a);
}
}
static uint
bgp_encode_next_hop_vpn4(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size UNUSED)
{
/* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */
ASSERT(a->u.ptr->length == sizeof(ip_addr));
put_u64(buf, 0); /* VPN RD is 0 */
put_ip4(buf+8, ipa_to_ip4( *(ip_addr *) a->u.ptr->data ));
return 12;
}
static void
bgp_decode_next_hop_vpn4(struct bgp_parse_state *s, byte *data, uint len, rta *a)
{
if (len != 12)
bgp_parse_error(s, 9);
/* XXXX which error */
if (get_u64(data) != 0)
bgp_parse_error(s, 9);
ip_addr nh = ipa_from_ip4(get_ip4(data+8));
// XXXX validate next hop
bgp_set_attr_data(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, &nh, sizeof(nh));
bgp_apply_next_hop(s, a, nh, IPA_NONE);
}
static uint
bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
{
byte *pos = buf;
while (!EMPTY_LIST(buck->prefixes) && (size >= BGP_NLRI_MAX))
{
struct bgp_prefix *px = HEAD(buck->prefixes);
struct net_addr_vpn6 *net = (void *) px->net;
/* Encode path ID */
if (s->add_path)
{
put_u32(pos, px->path_id);
ADVANCE(pos, size, 4);
}
/* Encode prefix length */
*pos = net->pxlen;
ADVANCE(pos, size, 1);
/* Encode MPLS labels */
bgp_encode_mpls_labels(s, s->mpls_labels, &pos, &size, pos - 1);
/* Encode route distinguisher */
put_u64(pos, net->rd);
ADVANCE(pos, size, 8);
/* Encode prefix body */
ip6_addr a = ip6_hton(net->prefix);
uint b = (net->pxlen + 7) / 8;
memcpy(pos, &a, b);
ADVANCE(pos, size, b);
bgp_free_prefix(s->channel, px);
}
return pos - buf;
}
static void
bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
{
while (len)
{
net_addr_vpn6 net;
u32 path_id = 0;
/* Decode path ID */
if (s->add_path)
{
if (len < 5)
bgp_parse_error(s, 1);
path_id = get_u32(pos);
ADVANCE(pos, len, 4);
}
/* Decode prefix length */
uint l = *pos;
ADVANCE(pos, len, 1);
if (len < ((l + 7) / 8))
bgp_parse_error(s, 1);
/* Decode MPLS labels */
if (s->mpls)
bgp_decode_mpls_labels(s, &pos, &len, &l, a);
/* Decode route distinguisher */
if (l < 64)
bgp_parse_error(s, 1);
u64 rd = get_u64(pos);
ADVANCE(pos, len, 8);
l -= 64;
if (l > IP6_MAX_PREFIX_LENGTH)
bgp_parse_error(s, 10);
/* Decode prefix body */
ip6_addr addr = IP6_NONE;
uint b = (l + 7) / 8;
memcpy(&addr, pos, b);
ADVANCE(pos, len, b);
net = NET_ADDR_VPN6(ip6_ntoh(addr), l, rd);
net_normalize_vpn6(&net);
// XXXX validate prefix
bgp_rte_update(s, (net_addr *) &net, path_id, a);
}
}
static uint
bgp_encode_next_hop_vpn6(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size UNUSED)
{
ip_addr *nh = (void *) a->u.ptr->data;
uint len = a->u.ptr->length;
ASSERT((len == 16) || (len == 32));
put_u64(buf, 0); /* VPN RD is 0 */
put_ip6(buf+8, ipa_to_ip6(nh[0]));
if (len == 16)
return 24;
put_u64(buf+24, 0); /* VPN RD is 0 */
put_ip6(buf+32, ipa_to_ip6(nh[1]));
return 48;
}
static void
bgp_decode_next_hop_vpn6(struct bgp_parse_state *s, byte *data, uint len, rta *a)
{
struct adata *ad = lp_alloc_adata(s->pool, 32);
ip_addr *nh = (void *) ad->data;
if ((len != 24) && (len != 48))
bgp_parse_error(s, 9);
/* XXXX which error */
if ((get_u64(data) != 0) || ((len == 48) && (get_u64(data+24) != 0)))
bgp_parse_error(s, 9);
nh[0] = ipa_from_ip6(get_ip6(data+8));
nh[1] = (len == 48) ? ipa_from_ip6(get_ip6(data+32)) : IPA_NONE;
if (ip6_is_link_local(nh[0]))
{
nh[1] = nh[0];
nh[0] = IPA_NONE;
}
if (!ip6_is_link_local(nh[1]))
nh[1] = IPA_NONE;
if (ipa_zero(nh[1]))
ad->length = 16;
// XXXX validate next hop
bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
bgp_apply_next_hop(s, a, nh[0], nh[1]);
}
static uint static uint
bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size) bgp_encode_nlri_flow4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
{ {
@ -1341,14 +1755,15 @@ static const struct bgp_af_desc bgp_af_table[] = {
.update_next_hop = bgp_update_next_hop_ip, .update_next_hop = bgp_update_next_hop_ip,
}, },
{ {
.afi = BGP_AF_FLOW4, .afi = BGP_AF_IPV4_MPLS,
.net = NET_FLOW4, .net = NET_IP4,
.name = "flow4", .mpls = 1,
.encode_nlri = bgp_encode_nlri_flow4, .name = "ipv4-mpls",
.decode_nlri = bgp_decode_nlri_flow4, .encode_nlri = bgp_encode_nlri_ip4,
.encode_next_hop = bgp_encode_next_hop_none, .decode_nlri = bgp_decode_nlri_ip4,
.decode_next_hop = bgp_decode_next_hop_none, .encode_next_hop = bgp_encode_next_hop_ip4,
.update_next_hop = bgp_update_next_hop_none, .decode_next_hop = bgp_decode_next_hop_ip4,
.update_next_hop = bgp_update_next_hop_ip,
}, },
{ {
.afi = BGP_AF_IPV6, .afi = BGP_AF_IPV6,
@ -1370,6 +1785,49 @@ static const struct bgp_af_desc bgp_af_table[] = {
.decode_next_hop = bgp_decode_next_hop_ip6, .decode_next_hop = bgp_decode_next_hop_ip6,
.update_next_hop = bgp_update_next_hop_ip, .update_next_hop = bgp_update_next_hop_ip,
}, },
{
.afi = BGP_AF_IPV6_MPLS,
.net = NET_IP6,
.mpls = 1,
.name = "ipv6-mpls",
.encode_nlri = bgp_encode_nlri_ip6,
.decode_nlri = bgp_decode_nlri_ip6,
.encode_next_hop = bgp_encode_next_hop_ip6,
.decode_next_hop = bgp_decode_next_hop_ip6,
.update_next_hop = bgp_update_next_hop_ip,
},
{
.afi = BGP_AF_VPN4_MPLS,
.net = NET_VPN4,
.mpls = 1,
.name = "vpn4-mpls",
.encode_nlri = bgp_encode_nlri_vpn4,
.decode_nlri = bgp_decode_nlri_vpn4,
.encode_next_hop = bgp_encode_next_hop_vpn4,
.decode_next_hop = bgp_decode_next_hop_vpn4,
.update_next_hop = bgp_update_next_hop_ip,
},
{
.afi = BGP_AF_VPN6_MPLS,
.net = NET_VPN6,
.mpls = 1,
.name = "vpn6-mpls",
.encode_nlri = bgp_encode_nlri_vpn6,
.decode_nlri = bgp_decode_nlri_vpn6,
.encode_next_hop = bgp_encode_next_hop_vpn6,
.decode_next_hop = bgp_decode_next_hop_vpn6,
.update_next_hop = bgp_update_next_hop_ip,
},
{
.afi = BGP_AF_FLOW4,
.net = NET_FLOW4,
.name = "flow4",
.encode_nlri = bgp_encode_nlri_flow4,
.decode_nlri = bgp_decode_nlri_flow4,
.encode_next_hop = bgp_encode_next_hop_none,
.decode_next_hop = bgp_decode_next_hop_none,
.update_next_hop = bgp_update_next_hop_none,
},
{ {
.afi = BGP_AF_FLOW6, .afi = BGP_AF_FLOW6,
.net = NET_FLOW6, .net = NET_FLOW6,
@ -1566,6 +2024,8 @@ bgp_create_update(struct bgp_channel *c, byte *buf)
byte *end = buf + (bgp_max_packet_length(p->conn) - BGP_HEADER_LENGTH); byte *end = buf + (bgp_max_packet_length(p->conn) - BGP_HEADER_LENGTH);
byte *res = NULL; byte *res = NULL;
again: ;
/* Initialize write state */ /* Initialize write state */
struct bgp_write_state s = { struct bgp_write_state s = {
.proto = p, .proto = p,
@ -1573,10 +2033,9 @@ bgp_create_update(struct bgp_channel *c, byte *buf)
.pool = bgp_linpool, .pool = bgp_linpool,
.as4_session = p->as4_session, .as4_session = p->as4_session,
.add_path = c->add_path_tx, .add_path = c->add_path_tx,
.mpls = c->desc->mpls,
}; };
again:
/* Try unreachable bucket */ /* Try unreachable bucket */
if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes)) if ((buck = c->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
{ {
@ -1692,6 +2151,7 @@ bgp_decode_nlri(struct bgp_parse_state *s, u32 afi, byte *nlri, uint len, ea_lis
s->channel = c; s->channel = c;
s->add_path = c->add_path_rx; s->add_path = c->add_path_rx;
s->mpls = c->desc->mpls;
s->last_id = 0; s->last_id = 0;
s->last_src = s->proto->p.main_source; s->last_src = s->proto->p.main_source;