diff --git a/doc/bird.sgml b/doc/bird.sgml index 86df0456..5e5aeee4 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -318,8 +318,9 @@ protocol rip {
include " This statement causes inclusion of a new file. log " Set logging of messages having the given class (either returns the last ASN (the source ASN) in path returns the last ASN in the non-aggregated part of the path returns the length of path enable extended messages + The BGP protocol uses maximum message length of 4096 bytes. This option + provides an extension to allow extended messages with length up + to 65535 bytes. Default: off. + capabilities Use capability advertisement to advertise optional capabilities. This is standard behavior for newer BGP implementations, but there might be some @@ -2054,7 +2063,7 @@ protocol bgp { multihop; # ... which is connected indirectly export filter { # We use non-trivial export rules if source = RTS_STATIC then { # Export only static routes - # Assign our community + # Assign our community bgp_community.add((65000,64501)); # Artificially increase path length # by advertising local AS number twice @@ -2263,7 +2272,7 @@ these attributes: ip (Linux) The preferred source address. Used in source address selection for - outgoing packets. Has to be one of the IP addresses of the router. + outgoing packets. Has to be one of the IP addresses of the router. int (Linux) The realm of the route. Can be used for traffic classification. @@ -2608,8 +2617,8 @@ protocol ospf <name> { updates. Default value is 5. priority num - On every multiple access network (e.g., the Ethernet) Designed Router - and Backup Designed router are elected. These routers have some special + On every multiple access network (e.g., the Ethernet) Designated Router + and Backup Designated router are elected. These routers have some special functions in the flooding process. Higher priority increases preferences in this election. Routers with priority 0 are not eligible. Default value is 1. diff --git a/filter/config.Y b/filter/config.Y index 3bb00c13..3e70a63e 100644 --- a/filter/config.Y +++ b/filter/config.Y @@ -282,7 +282,7 @@ CF_KEYWORDS(FUNCTION, PRINT, PRINTN, UNSET, RETURN, LEN, DEFINED, ADD, DELETE, CONTAINS, RESET, - PREPEND, FIRST, LAST, MATCH, + PREPEND, FIRST, LAST, LAST_NONAGGREGATED, MATCH, EMPTY, FILTER, WHERE, EVAL) @@ -743,6 +743,7 @@ term: | term '.' MASK '(' term ')' { $$ = f_new_inst(); $$->code = P('i','M'); $$->a1.p = $1; $$->a2.p = $5; } | term '.' FIRST { $$ = f_new_inst(); $$->code = P('a','f'); $$->a1.p = $1; } | term '.' LAST { $$ = f_new_inst(); $$->code = P('a','l'); $$->a1.p = $1; } + | term '.' LAST_NONAGGREGATED { $$ = f_new_inst(); $$->code = P('a','L'); $$->a1.p = $1; } /* Communities */ /* This causes one shift/reduce conflict diff --git a/filter/filter.c b/filter/filter.c index 6ab0cc93..cc1bb3dc 100644 --- a/filter/filter.c +++ b/filter/filter.c @@ -1056,6 +1056,14 @@ interpret(struct f_inst *what) res.type = T_INT; res.val.i = as; break; + case P('a','L'): /* Get last ASN from non-aggregated part of AS PATH */ + ONEARG; + if (v1.type != T_PATH) + runtime( "AS path expected" ); + + res.type = T_INT; + res.val.i = as_path_get_last_nonaggregated(v1.val.ad); + break; case 'r': ONEARG; res = v1; diff --git a/lib/lists.c b/lib/lists.c index d323a4b6..12ef3cc6 100644 --- a/lib/lists.c +++ b/lib/lists.c @@ -41,7 +41,7 @@ add_tail(list *l, node *n) { node *z = l->tail; - n->next = (node *) &l->null; + n->next = &l->tail_node; n->prev = z; z->next = n; l->tail = n; @@ -60,7 +60,7 @@ add_head(list *l, node *n) node *z = l->head; n->next = z; - n->prev = (node *) &l->head; + n->prev = &l->head_node; z->prev = n; l->head = n; } @@ -88,7 +88,7 @@ insert_node(node *n, node *after) * rem_node - remove a node from a list * @n: node to be removed * - * Removes a node @n from the list it's linked in. + * Removes a node @n from the list it's linked in. Afterwards, node @n is cleared. */ LIST_INLINE void rem_node(node *n) @@ -96,23 +96,6 @@ rem_node(node *n) node *z = n->prev; node *x = n->next; - z->next = x; - x->prev = z; -} - -/** - * rem2_node - remove a node from a list, with cleanup - * @n: node to be removed - * - * Removes a node @n from the list it's linked in and resets its pointers to NULL. - * Useful if you want to distinguish between linked and unlinked nodes. - */ -LIST_INLINE void -rem2_node(node *n) -{ - node *z = n->prev; - node *x = n->next; - z->next = x; x->prev = z; n->next = NULL; @@ -150,9 +133,9 @@ replace_node(node *old, node *new) LIST_INLINE void init_list(list *l) { - l->head = (node *) &l->null; + l->head = &l->tail_node; l->null = NULL; - l->tail = (node *) &l->head; + l->tail = &l->head_node; } /** @@ -172,6 +155,6 @@ add_tail_list(list *to, list *l) p->next = q; q->prev = p; q = l->tail; - q->next = (node *) &to->null; + q->next = &to->tail_node; to->tail = q; } diff --git a/lib/lists.h b/lib/lists.h index d75f033d..46b33446 100644 --- a/lib/lists.h +++ b/lib/lists.h @@ -26,10 +26,23 @@ typedef struct node { struct node *next, *prev; } node; -typedef struct list { /* In fact two overlayed nodes */ - struct node *head, *null, *tail; +typedef union list { /* In fact two overlayed nodes */ + struct { /* Head node */ + struct node head_node; + void *head_padding; + }; + struct { /* Tail node */ + void *tail_padding; + struct node tail_node; + }; + struct { /* Split to separate pointers */ + struct node *head; + struct node *null; + struct node *tail; + }; } list; + #define NODE (node *) #define HEAD(list) ((void *)((list).head)) #define TAIL(list) ((void *)((list).tail)) @@ -64,7 +77,6 @@ typedef struct list { /* In fact two overlayed nodes */ void add_tail(list *, node *); void add_head(list *, node *); void rem_node(node *); -void rem2_node(node *); void add_tail_list(list *, list *); void init_list(list *); void insert_node(node *, node *); diff --git a/lib/resource.c b/lib/resource.c index 64f9a39c..68718dfb 100644 --- a/lib/resource.c +++ b/lib/resource.c @@ -163,6 +163,7 @@ rfree(void *res) if (r->n.next) rem_node(&r->n); r->class->free(r); + r->class = NULL; xfree(r); } @@ -383,16 +384,9 @@ mb_allocz(pool *p, unsigned size) void * mb_realloc(void *m, unsigned size) { - struct mblock *ob = NULL; + struct mblock *b = SKIP_BACK(struct mblock, data, m); - if (m) - { - ob = SKIP_BACK(struct mblock, data, m); - if (ob->r.n.next) - rem_node(&ob->r.n); - } - - struct mblock *b = xrealloc(ob, sizeof(struct mblock) + size); + b = xrealloc(b, sizeof(struct mblock) + size); replace_node(&b->r.n, &b->r.n); b->size = size; return b->data; diff --git a/lib/socket.h b/lib/socket.h index 1b03098d..91ae9db3 100644 --- a/lib/socket.h +++ b/lib/socket.h @@ -27,6 +27,7 @@ typedef struct birdsock { struct iface *iface; /* Interface; specify this for broad/multicast sockets */ byte *rbuf, *rpos; /* NULL=allocate automatically */ + uint fast_rx; /* RX has higher priority in event loop */ uint rbsize; int (*rx_hook)(struct birdsock *, int size); /* NULL=receiving turned off, returns 1 to clear rx buffer */ diff --git a/nest/a-path.c b/nest/a-path.c index c9c5aefb..32e2d27e 100644 --- a/nest/a-path.c +++ b/nest/a-path.c @@ -220,7 +220,7 @@ as_path_get_last(struct adata *path, u32 *orig_as) p += BS * len; } break; - default: bug("as_path_get_first: Invalid path segment"); + default: bug("Invalid path segment"); } } @@ -229,6 +229,35 @@ as_path_get_last(struct adata *path, u32 *orig_as) return found; } +u32 +as_path_get_last_nonaggregated(struct adata *path) +{ + u8 *p = path->data; + u8 *q = p+path->length; + u32 res = 0; + int len; + + while (pclass == (SYM_CONSTANT | T_IP)) && ipa_is_ip4(SYM_VAL($1).ip)) $$ = ipa_to_u32(SYM_VAL($1).ip); else - cf_error("Number of IPv4 address constant expected"); + cf_error("Number or IPv4 address constant expected"); } ; diff --git a/nest/proto.c b/nest/proto.c index f712fe5f..df4952b7 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -264,6 +264,7 @@ channel_stop_export(struct channel *c) rt_feed_channel_abort(c); c->export_state = ES_DOWN; + c->stats.exp_routes = 0; } static void @@ -299,7 +300,7 @@ channel_do_flush(struct channel *c) static void channel_do_down(struct channel *c) { - rem2_node(&c->table_node); + rem_node(&c->table_node); rt_unlock_table(c->table); c->proto->active_channels--; diff --git a/nest/route.h b/nest/route.h index 22fca331..11b08ce5 100644 --- a/nest/route.h +++ b/nest/route.h @@ -232,8 +232,8 @@ typedef struct rte { struct { /* Routes generated by krt sync (both temporary and inherited ones) */ s8 src; /* Alleged route source (see krt.h) */ u8 proto; /* Kernel source protocol ID */ - u8 type; /* Kernel route type */ u8 seen; /* Seen during last scan */ + u8 best; /* Best route in network, propagated to core */ u32 metric; /* Kernel metric */ } krt; } u; diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c index 62752e21..f966161c 100644 --- a/proto/bfd/bfd.c +++ b/proto/bfd/bfd.c @@ -872,7 +872,7 @@ bfd_notify_hook(sock *sk, int len) WALK_LIST_FIRST(s, tmp_list) { bfd_lock_sessions(p); - rem2_node(&s->n); + rem_node(&s->n); state = s->loc_state; diag = s->loc_diag; bfd_unlock_sessions(p); diff --git a/proto/bfd/io.c b/proto/bfd/io.c index fb150040..79ed9af7 100644 --- a/proto/bfd/io.c +++ b/proto/bfd/io.c @@ -576,7 +576,7 @@ sockets_close_fds(struct birdloop *loop) loop->close_scheduled = 0; } -int sk_read(sock *s); +int sk_read(sock *s, int revents); int sk_write(sock *s); static void @@ -605,7 +605,7 @@ sockets_fire(struct birdloop *loop) if (pfd->revents & POLLIN) while (e && *psk && (*psk)->rx_hook) - e = sk_read(*psk); + e = sk_read(*psk, 0); e = 1; if (pfd->revents & POLLOUT) diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index cb5b108c..61b5cba2 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -374,6 +374,8 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) if (ipa_zero(p->source_addr)) p->source_addr = conn->sk->saddr; + conn->sk->fast_rx = 0; + p->conn = conn; p->last_error_class = 0; p->last_error_code = 0; @@ -666,6 +668,10 @@ bgp_keepalive_timeout(timer *t) DBG("BGP: Keepalive timer\n"); bgp_schedule_packet(conn, PKT_KEEPALIVE); + + /* Kick TX a bit faster */ + if (ev_active(conn->tx_ev)) + ev_run(conn->tx_ev); } static void @@ -696,6 +702,7 @@ bgp_setup_sk(struct bgp_conn *conn, sock *s) { s->data = conn; s->err_hook = bgp_sock_err; + s->fast_rx = 1; conn->sk = s; } @@ -813,7 +820,13 @@ bgp_incoming_connection(sock *sk, int dummy UNUSED) return 0; } - /* We are in proper state and there is no other incoming connection */ + /* + * BIRD should keep multiple incoming connections in OpenSent state (for + * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming + * connections are rejected istead. The exception is the case where an + * incoming connection triggers a graceful restart. + */ + acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) && (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk); @@ -823,6 +836,10 @@ bgp_incoming_connection(sock *sk, int dummy UNUSED) bgp_handle_graceful_restart(p); bgp_conn_enter_idle_state(p->conn); acc = 1; + + /* There might be separate incoming connection in OpenSent state */ + if (p->incoming_conn.state > BS_ACTIVE) + bgp_close_conn(&p->incoming_conn); } BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s", diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index ed99f623..72ca3728 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -163,6 +163,14 @@ bgp_put_cap_rr(struct bgp_proto *p UNUSED, byte *buf) return buf; } +static byte * +bgp_put_cap_ext_msg(struct bgp_proto *p UNUSED, byte *buf) +{ + *buf++ = 6; /* Capability 6: Support for extended messages */ + *buf++ = 0; /* Capability data length */ + return buf; +} + static byte * bgp_put_cap_gr1(struct bgp_proto *p, byte *buf) { @@ -223,14 +231,6 @@ bgp_put_cap_err(struct bgp_proto *p UNUSED, byte *buf) return buf; } -static byte * -bgp_put_cap_ext_msg(struct bgp_proto *p UNUSED, byte *buf) -{ - *buf++ = 230; /* Capability TBD: Support for extended messages */ - *buf++ = 0; /* Capability data length */ - return buf; -} - static byte * bgp_create_open(struct bgp_conn *conn, byte *buf) @@ -827,6 +827,12 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len) conn->peer_refresh_support = 1; break; + case 6: /* Extended message length capability, draft */ + if (cl != 0) + goto err; + conn->peer_ext_messages_support = 1; + break; + case 64: /* Graceful restart capability, RFC 4724 */ if (cl % 4 != 2) goto err; @@ -867,12 +873,6 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len) conn->peer_enhanced_refresh_support = 1; break; - case 230: /* Extended message length capability, draft, cap number TBD */ - if (cl != 0) - goto err; - conn->peer_ext_messages_support = 1; - break; - /* We can safely ignore all other capabilities */ } len -= 2 + cl; diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c index 6001ac26..4548f6da 100644 --- a/proto/ospf/iface.c +++ b/proto/ospf/iface.c @@ -595,10 +595,10 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i if (ospf_is_v2(p) && (ifa->type == OSPF_IT_NBMA) && (addr->flags & IA_PEER)) ifa->type = OSPF_IT_PTMP; - if ((ifa->type == OSPF_IT_BCAST) && !(iface->flags & if_multi_flag)) + if ((ifa->type == OSPF_IT_BCAST) && !(iface->flags & if_multi_flag) && !ifa->stub) ifa->type = OSPF_IT_NBMA; - if ((ifa->type == OSPF_IT_PTP) && !(iface->flags & if_multi_flag)) + if ((ifa->type == OSPF_IT_PTP) && !(iface->flags & if_multi_flag) && !ifa->stub) ifa->type = OSPF_IT_PTMP; if (ifa->type != old_type) diff --git a/proto/ospf/neighbor.c b/proto/ospf/neighbor.c index b30b0438..b68ba6f4 100644 --- a/proto/ospf/neighbor.c +++ b/proto/ospf/neighbor.c @@ -108,6 +108,7 @@ ospf_neigh_down(struct ospf_neighbor *n) { struct ospf_iface *ifa = n->ifa; struct ospf_proto *p = ifa->oa->po; + u32 rid = n->rid; if ((ifa->type == OSPF_IT_NBMA) || (ifa->type == OSPF_IT_PTMP)) { @@ -121,7 +122,7 @@ ospf_neigh_down(struct ospf_neighbor *n) rem_node(NODE n); rfree(n->pool); - OSPF_TRACE(D_EVENTS, "Neighbor %R on %s removed", n->rid, ifa->ifname); + OSPF_TRACE(D_EVENTS, "Neighbor %R on %s removed", rid, ifa->ifname); } /** diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c index 9d0a93c7..86e39d75 100644 --- a/proto/ospf/topology.c +++ b/proto/ospf/topology.c @@ -278,7 +278,7 @@ ospf_originate_lsa(struct ospf_proto *p, struct ospf_new_lsa *lsa) if (!SNODE_VALID(en)) s_add_tail(&p->lsal, SNODE en); - if (en->lsa_body == NULL) + if (!en->nf || !en->lsa_body) en->nf = lsa->nf; if (en->nf != lsa->nf) diff --git a/proto/rip/config.Y b/proto/rip/config.Y index 79e57741..3c8cd0f2 100644 --- a/proto/rip/config.Y +++ b/proto/rip/config.Y @@ -137,7 +137,7 @@ rip_iface_item: | TIMEOUT TIME expr { RIP_IFACE->timeout_time = $3; if ($3<=0) cf_error("Timeout time must be positive"); } | GARBAGE TIME expr { RIP_IFACE->garbage_time = $3; if ($3<=0) cf_error("Garbage time must be positive"); } | ECMP WEIGHT expr { RIP_IFACE->ecmp_weight = $3 - 1; if (($3<1) || ($3>256)) cf_error("ECMP weight must be in range 1-256"); } - | RX BUFFER expr { RIP_IFACE->rx_buffer = $3; if (($3<256) || ($3>65535)) cf_error("TX length must be in range 256-65535"); } + | RX BUFFER expr { RIP_IFACE->rx_buffer = $3; if (($3<256) || ($3>65535)) cf_error("RX length must be in range 256-65535"); } | TX LENGTH expr { RIP_IFACE->tx_length = $3; if (($3<256) || ($3>65535)) cf_error("TX length must be in range 256-65535"); } | TX tos { RIP_IFACE->tx_tos = $2; } | TX PRIORITY expr { RIP_IFACE->tx_priority = $3; } diff --git a/sysdep/bsd/krt-sock.c b/sysdep/bsd/krt-sock.c index 5f2f1309..9f84b3f5 100644 --- a/sysdep/bsd/krt-sock.c +++ b/sysdep/bsd/krt-sock.c @@ -528,9 +528,8 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) e->net = net; e->u.krt.src = src; e->u.krt.proto = src2; - - /* These are probably too Linux-specific */ - e->u.krt.type = 0; + e->u.krt.seen = 0; + e->u.krt.best = 0; e->u.krt.metric = 0; if (scan) diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index 6240c177..c398a7f6 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -1204,7 +1204,8 @@ nl_parse_route(struct nlmsghdr *h, int scan) e->net = net; e->u.krt.src = src; e->u.krt.proto = i->rtm_protocol; - e->u.krt.type = i->rtm_type; + e->u.krt.seen = 0; + e->u.krt.best = 0; e->u.krt.metric = 0; if (a[RTA_PRIORITY]) diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index cbfb47d5..37e26c9b 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -41,12 +42,12 @@ #include "lib/sysio.h" /* Maximum number of calls of tx handler for one socket in one - * select iteration. Should be small enough to not monopolize CPU by + * poll iteration. Should be small enough to not monopolize CPU by * one protocol instance. */ #define MAX_STEPS 4 -/* Maximum number of calls of rx handler for all sockets in one select +/* Maximum number of calls of rx handler for all sockets in one poll iteration. RX callbacks are often much more costly so we limit this to gen small latencies */ #define MAX_RX_STEPS 4 @@ -1023,7 +1024,6 @@ sk_log_error(sock *s, const char *p) static list sock_list; static struct birdsock *current_sock; static struct birdsock *stored_sock; -static int sock_recalc_fdsets_p; static inline sock * sk_next(sock *s) @@ -1079,7 +1079,6 @@ sk_free(resource *r) if (s == stored_sock) stored_sock = sk_next(s); rem_node(&s->n); - sock_recalc_fdsets_p = 1; } } @@ -1277,7 +1276,6 @@ static void sk_insert(sock *s) { add_tail(&sock_list, &s->n); - sock_recalc_fdsets_p = 1; } static void @@ -1329,18 +1327,6 @@ sk_passive_connected(sock *s, int type) log(L_WARN "SOCK: Cannot get remote IP address for TCP<"); } - if (fd >= FD_SETSIZE) - { - /* FIXME: Call err_hook instead ? */ - log(L_ERR "SOCK: Incoming connection from %I%J (port %d) %s", - t->daddr, ipa_is_link_local(t->daddr) ? t->iface : NULL, - t->dport, "rejected due to FD_SETSIZE limit"); - close(fd); - t->fd = -1; - rfree(t); - return 1; - } - if (sk_setup(t) < 0) { /* FIXME: Call err_hook instead ? */ @@ -1416,9 +1402,6 @@ sk_open(sock *s) if (fd < 0) ERR("socket"); - if (fd >= FD_SETSIZE) - ERR2("FD_SETSIZE limit reached"); - s->fd = fd; if (sk_setup(s) < 0) @@ -1696,19 +1679,12 @@ sk_maybe_write(sock *s) int sk_rx_ready(sock *s) { - fd_set rd, wr; - struct timeval timo; int rv; - - FD_ZERO(&rd); - FD_ZERO(&wr); - FD_SET(s->fd, &rd); - - timo.tv_sec = 0; - timo.tv_usec = 0; + struct pollfd pfd = { .fd = s->fd }; + pfd.events |= POLLIN; redo: - rv = select(s->fd+1, &rd, &wr, NULL, &timo); + rv = poll(&pfd, 1, 0); if ((rv < 0) && (errno == EINTR || errno == EAGAIN)) goto redo; @@ -1777,7 +1753,7 @@ sk_send_full(sock *s, unsigned len, struct iface *ifa, /* sk_read() and sk_write() are called from BFD's event loop */ int -sk_read(sock *s) +sk_read(sock *s, int revents) { switch (s->type) { @@ -1796,6 +1772,11 @@ sk_read(sock *s) { if (errno != EINTR && errno != EAGAIN) s->err_hook(s, errno); + else if (errno == EAGAIN && !(revents & POLLIN)) + { + log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents); + s->err_hook(s, 0); + } } else if (!c) s->err_hook(s, 0); @@ -2068,62 +2049,63 @@ static int short_loops = 0; void io_loop(void) { - fd_set rd, wr; - struct timeval timo; + int poll_tout; time_t tout; - int hi, events; + int nfds, events, pout; sock *s; node *n; + int fdmax = 256; + struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd)); watchdog_start1(); - sock_recalc_fdsets_p = 1; for(;;) { events = ev_run_list(&global_event_list); + timers: update_times(); tout = tm_first_shot(); if (tout <= now) { tm_shot(); - continue; + goto timers; } - timo.tv_sec = events ? 0 : MIN(tout - now, 3); - timo.tv_usec = 0; + poll_tout = (events ? 0 : MIN(tout - now, 3)) * 1000; /* Time in milliseconds */ io_close_event(); - if (sock_recalc_fdsets_p) - { - sock_recalc_fdsets_p = 0; - FD_ZERO(&rd); - FD_ZERO(&wr); - } - - hi = 0; + nfds = 0; WALK_LIST(n, sock_list) { + pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */ s = SKIP_BACK(sock, n, n); if (s->rx_hook) { - FD_SET(s->fd, &rd); - if (s->fd > hi) - hi = s->fd; + pfd[nfds].fd = s->fd; + pfd[nfds].events |= POLLIN; } - else - FD_CLR(s->fd, &rd); if (s->tx_hook && s->ttx != s->tpos) { - FD_SET(s->fd, &wr); - if (s->fd > hi) - hi = s->fd; + pfd[nfds].fd = s->fd; + pfd[nfds].events |= POLLOUT; + } + if (pfd[nfds].fd != -1) + { + s->index = nfds; + nfds++; } else - FD_CLR(s->fd, &wr); + s->index = -1; + + if (nfds >= fdmax) + { + fdmax *= 2; + pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd)); + } } /* * Yes, this is racy. But even if the signal comes before this test - * and entering select(), it gets caught on the next timer tick. + * and entering poll(), it gets caught on the next timer tick. */ if (async_config_flag) @@ -2148,18 +2130,18 @@ io_loop(void) continue; } - /* And finally enter select() to find active sockets */ + /* And finally enter poll() to find active sockets */ watchdog_stop(); - hi = select(hi+1, &rd, &wr, NULL, &timo); + pout = poll(pfd, nfds, poll_tout); watchdog_start(); - if (hi < 0) + if (pout < 0) { if (errno == EINTR || errno == EAGAIN) continue; - die("select: %m"); + die("poll: %m"); } - if (hi) + if (pout) { /* guaranteed to be non-empty */ current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); @@ -2167,23 +2149,29 @@ io_loop(void) while (current_sock) { sock *s = current_sock; + if (s->index == -1) + { + current_sock = sk_next(s); + goto next; + } + int e; int steps; steps = MAX_STEPS; - if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook) + if (s->fast_rx && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook) do { steps--; io_log_event(s->rx_hook, s->data); - e = sk_read(s); + e = sk_read(s, pfd[s->index].revents); if (s != current_sock) goto next; } while (e && s->rx_hook && steps); steps = MAX_STEPS; - if (FD_ISSET(s->fd, &wr)) + if (pfd[s->index].revents & POLLOUT) do { steps--; @@ -2210,13 +2198,17 @@ io_loop(void) while (current_sock && count < MAX_RX_STEPS) { sock *s = current_sock; - int e UNUSED; + if (s->index == -1) + { + current_sock = sk_next(s); + goto next2; + } - if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook) + if (!s->fast_rx && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook) { count++; io_log_event(s->rx_hook, s->data); - e = sk_read(s); + sk_read(s, pfd[s->index].revents); if (s != current_sock) goto next2; } diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 6b3b4eee..b0a96613 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -412,46 +412,58 @@ again: { rte *e, **ee, *best, **pbest, *old_best; - old_best = n->routes; + /* + * Note that old_best may be NULL even if there was an old best route in + * the previous step, because it might be replaced in krt_learn_scan(). + * But in that case there is a new valid best route. + */ + + old_best = NULL; best = NULL; pbest = NULL; ee = &n->routes; while (e = *ee) { + if (e->u.krt.best) + old_best = e; + if (!e->u.krt.seen) { *ee = e->next; rte_free(e); continue; } + if (!best || best->u.krt.metric > e->u.krt.metric) { best = e; pbest = ee; } + e->u.krt.seen = 0; + e->u.krt.best = 0; ee = &e->next; } if (!n->routes) { DBG("%I/%d: deleting\n", n->n.prefix, n->n.pxlen); if (old_best) - { - krt_learn_announce_delete(p, n); - n->n.flags &= ~KRF_INSTALLED; - } + krt_learn_announce_delete(p, n); + FIB_ITERATE_PUT(&fit); fib_delete(fib, n); goto again; } + + best->u.krt.best = 1; *pbest = best->next; best->next = n->routes; n->routes = best; - if (best != old_best || !(n->n.flags & KRF_INSTALLED) || p->reload) + + if ((best != old_best) || p->reload) { DBG("%I/%d: announcing (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric); krt_learn_announce_update(p, best); - n->n.flags |= KRF_INSTALLED; } else DBG("%I/%d: uptodate (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric); @@ -510,31 +522,31 @@ krt_learn_async(struct krt_proto *p, rte *e, int new) best = n->routes; bestp = &n->routes; for(gg=&n->routes; g=*gg; gg=&g->next) + { if (best->u.krt.metric > g->u.krt.metric) { best = g; bestp = gg; } + + g->u.krt.best = 0; + } + if (best) { + best->u.krt.best = 1; *bestp = best->next; best->next = n->routes; n->routes = best; } + if (best != old_best) { DBG("krt_learn_async: distributing change\n"); if (best) - { - krt_learn_announce_update(p, best); - n->n.flags |= KRF_INSTALLED; - } + krt_learn_announce_update(p, best); else - { - n->routes = NULL; - krt_learn_announce_delete(p, n); - n->n.flags &= ~KRF_INSTALLED; - } + krt_learn_announce_delete(p, n); } } @@ -559,7 +571,7 @@ krt_dump(struct proto *P) static void krt_dump_attrs(rte *e) { - debug(" [m=%d,p=%d,t=%d]", e->u.krt.metric, e->u.krt.proto, e->u.krt.type); + debug(" [m=%d,p=%d]", e->u.krt.metric, e->u.krt.proto); } #endif diff --git a/sysdep/unix/log.c b/sysdep/unix/log.c index 7cb26360..b90bbbd2 100644 --- a/sysdep/unix/log.c +++ b/sysdep/unix/log.c @@ -284,17 +284,18 @@ log_switch(int debug, list *l, char *new_syslog_name) current_log_list = l; #ifdef HAVE_SYSLOG - if (current_syslog_name && new_syslog_name && - !strcmp(current_syslog_name, new_syslog_name)) + char *old_syslog_name = current_syslog_name; + current_syslog_name = new_syslog_name; + + if (old_syslog_name && new_syslog_name && + !strcmp(old_syslog_name, new_syslog_name)) return; - if (current_syslog_name) + if (old_syslog_name) closelog(); if (new_syslog_name) openlog(new_syslog_name, LOG_CONS | LOG_NDELAY, LOG_DAEMON); - - current_syslog_name = new_syslog_name; #endif } diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index 8796ab9c..f95bd968 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -450,6 +450,7 @@ cli_connect(sock *s, int size UNUSED) s->err_hook = cli_err; s->data = c = cli_new(s); s->pool = c->pool; /* We need to have all the socket buffers allocated in the cli pool */ + s->fast_rx = 1; c->rx_pos = c->rx_buf; c->rx_aux = NULL; rmove(s, c->pool); @@ -466,6 +467,7 @@ cli_init_unix(uid_t use_uid, gid_t use_gid) s->type = SK_UNIX_PASSIVE; s->rx_hook = cli_connect; s->rbsize = 1024; + s->fast_rx = 1; /* Return value intentionally ignored */ unlink(path_control_socket);
class == (SYM_CONSTANT | T_IP)) && ipa_is_ip4(SYM_VAL($1).ip)) $$ = ipa_to_u32(SYM_VAL($1).ip); else - cf_error("Number of IPv4 address constant expected"); + cf_error("Number or IPv4 address constant expected"); } ; diff --git a/nest/proto.c b/nest/proto.c index f712fe5f..df4952b7 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -264,6 +264,7 @@ channel_stop_export(struct channel *c) rt_feed_channel_abort(c); c->export_state = ES_DOWN; + c->stats.exp_routes = 0; } static void @@ -299,7 +300,7 @@ channel_do_flush(struct channel *c) static void channel_do_down(struct channel *c) { - rem2_node(&c->table_node); + rem_node(&c->table_node); rt_unlock_table(c->table); c->proto->active_channels--; diff --git a/nest/route.h b/nest/route.h index 22fca331..11b08ce5 100644 --- a/nest/route.h +++ b/nest/route.h @@ -232,8 +232,8 @@ typedef struct rte { struct { /* Routes generated by krt sync (both temporary and inherited ones) */ s8 src; /* Alleged route source (see krt.h) */ u8 proto; /* Kernel source protocol ID */ - u8 type; /* Kernel route type */ u8 seen; /* Seen during last scan */ + u8 best; /* Best route in network, propagated to core */ u32 metric; /* Kernel metric */ } krt; } u; diff --git a/proto/bfd/bfd.c b/proto/bfd/bfd.c index 62752e21..f966161c 100644 --- a/proto/bfd/bfd.c +++ b/proto/bfd/bfd.c @@ -872,7 +872,7 @@ bfd_notify_hook(sock *sk, int len) WALK_LIST_FIRST(s, tmp_list) { bfd_lock_sessions(p); - rem2_node(&s->n); + rem_node(&s->n); state = s->loc_state; diag = s->loc_diag; bfd_unlock_sessions(p); diff --git a/proto/bfd/io.c b/proto/bfd/io.c index fb150040..79ed9af7 100644 --- a/proto/bfd/io.c +++ b/proto/bfd/io.c @@ -576,7 +576,7 @@ sockets_close_fds(struct birdloop *loop) loop->close_scheduled = 0; } -int sk_read(sock *s); +int sk_read(sock *s, int revents); int sk_write(sock *s); static void @@ -605,7 +605,7 @@ sockets_fire(struct birdloop *loop) if (pfd->revents & POLLIN) while (e && *psk && (*psk)->rx_hook) - e = sk_read(*psk); + e = sk_read(*psk, 0); e = 1; if (pfd->revents & POLLOUT) diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index cb5b108c..61b5cba2 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -374,6 +374,8 @@ bgp_conn_enter_established_state(struct bgp_conn *conn) if (ipa_zero(p->source_addr)) p->source_addr = conn->sk->saddr; + conn->sk->fast_rx = 0; + p->conn = conn; p->last_error_class = 0; p->last_error_code = 0; @@ -666,6 +668,10 @@ bgp_keepalive_timeout(timer *t) DBG("BGP: Keepalive timer\n"); bgp_schedule_packet(conn, PKT_KEEPALIVE); + + /* Kick TX a bit faster */ + if (ev_active(conn->tx_ev)) + ev_run(conn->tx_ev); } static void @@ -696,6 +702,7 @@ bgp_setup_sk(struct bgp_conn *conn, sock *s) { s->data = conn; s->err_hook = bgp_sock_err; + s->fast_rx = 1; conn->sk = s; } @@ -813,7 +820,13 @@ bgp_incoming_connection(sock *sk, int dummy UNUSED) return 0; } - /* We are in proper state and there is no other incoming connection */ + /* + * BIRD should keep multiple incoming connections in OpenSent state (for + * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming + * connections are rejected istead. The exception is the case where an + * incoming connection triggers a graceful restart. + */ + acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) && (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk); @@ -823,6 +836,10 @@ bgp_incoming_connection(sock *sk, int dummy UNUSED) bgp_handle_graceful_restart(p); bgp_conn_enter_idle_state(p->conn); acc = 1; + + /* There might be separate incoming connection in OpenSent state */ + if (p->incoming_conn.state > BS_ACTIVE) + bgp_close_conn(&p->incoming_conn); } BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s", diff --git a/proto/bgp/packets.c b/proto/bgp/packets.c index ed99f623..72ca3728 100644 --- a/proto/bgp/packets.c +++ b/proto/bgp/packets.c @@ -163,6 +163,14 @@ bgp_put_cap_rr(struct bgp_proto *p UNUSED, byte *buf) return buf; } +static byte * +bgp_put_cap_ext_msg(struct bgp_proto *p UNUSED, byte *buf) +{ + *buf++ = 6; /* Capability 6: Support for extended messages */ + *buf++ = 0; /* Capability data length */ + return buf; +} + static byte * bgp_put_cap_gr1(struct bgp_proto *p, byte *buf) { @@ -223,14 +231,6 @@ bgp_put_cap_err(struct bgp_proto *p UNUSED, byte *buf) return buf; } -static byte * -bgp_put_cap_ext_msg(struct bgp_proto *p UNUSED, byte *buf) -{ - *buf++ = 230; /* Capability TBD: Support for extended messages */ - *buf++ = 0; /* Capability data length */ - return buf; -} - static byte * bgp_create_open(struct bgp_conn *conn, byte *buf) @@ -827,6 +827,12 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len) conn->peer_refresh_support = 1; break; + case 6: /* Extended message length capability, draft */ + if (cl != 0) + goto err; + conn->peer_ext_messages_support = 1; + break; + case 64: /* Graceful restart capability, RFC 4724 */ if (cl % 4 != 2) goto err; @@ -867,12 +873,6 @@ bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len) conn->peer_enhanced_refresh_support = 1; break; - case 230: /* Extended message length capability, draft, cap number TBD */ - if (cl != 0) - goto err; - conn->peer_ext_messages_support = 1; - break; - /* We can safely ignore all other capabilities */ } len -= 2 + cl; diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c index 6001ac26..4548f6da 100644 --- a/proto/ospf/iface.c +++ b/proto/ospf/iface.c @@ -595,10 +595,10 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i if (ospf_is_v2(p) && (ifa->type == OSPF_IT_NBMA) && (addr->flags & IA_PEER)) ifa->type = OSPF_IT_PTMP; - if ((ifa->type == OSPF_IT_BCAST) && !(iface->flags & if_multi_flag)) + if ((ifa->type == OSPF_IT_BCAST) && !(iface->flags & if_multi_flag) && !ifa->stub) ifa->type = OSPF_IT_NBMA; - if ((ifa->type == OSPF_IT_PTP) && !(iface->flags & if_multi_flag)) + if ((ifa->type == OSPF_IT_PTP) && !(iface->flags & if_multi_flag) && !ifa->stub) ifa->type = OSPF_IT_PTMP; if (ifa->type != old_type) diff --git a/proto/ospf/neighbor.c b/proto/ospf/neighbor.c index b30b0438..b68ba6f4 100644 --- a/proto/ospf/neighbor.c +++ b/proto/ospf/neighbor.c @@ -108,6 +108,7 @@ ospf_neigh_down(struct ospf_neighbor *n) { struct ospf_iface *ifa = n->ifa; struct ospf_proto *p = ifa->oa->po; + u32 rid = n->rid; if ((ifa->type == OSPF_IT_NBMA) || (ifa->type == OSPF_IT_PTMP)) { @@ -121,7 +122,7 @@ ospf_neigh_down(struct ospf_neighbor *n) rem_node(NODE n); rfree(n->pool); - OSPF_TRACE(D_EVENTS, "Neighbor %R on %s removed", n->rid, ifa->ifname); + OSPF_TRACE(D_EVENTS, "Neighbor %R on %s removed", rid, ifa->ifname); } /** diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c index 9d0a93c7..86e39d75 100644 --- a/proto/ospf/topology.c +++ b/proto/ospf/topology.c @@ -278,7 +278,7 @@ ospf_originate_lsa(struct ospf_proto *p, struct ospf_new_lsa *lsa) if (!SNODE_VALID(en)) s_add_tail(&p->lsal, SNODE en); - if (en->lsa_body == NULL) + if (!en->nf || !en->lsa_body) en->nf = lsa->nf; if (en->nf != lsa->nf) diff --git a/proto/rip/config.Y b/proto/rip/config.Y index 79e57741..3c8cd0f2 100644 --- a/proto/rip/config.Y +++ b/proto/rip/config.Y @@ -137,7 +137,7 @@ rip_iface_item: | TIMEOUT TIME expr { RIP_IFACE->timeout_time = $3; if ($3<=0) cf_error("Timeout time must be positive"); } | GARBAGE TIME expr { RIP_IFACE->garbage_time = $3; if ($3<=0) cf_error("Garbage time must be positive"); } | ECMP WEIGHT expr { RIP_IFACE->ecmp_weight = $3 - 1; if (($3<1) || ($3>256)) cf_error("ECMP weight must be in range 1-256"); } - | RX BUFFER expr { RIP_IFACE->rx_buffer = $3; if (($3<256) || ($3>65535)) cf_error("TX length must be in range 256-65535"); } + | RX BUFFER expr { RIP_IFACE->rx_buffer = $3; if (($3<256) || ($3>65535)) cf_error("RX length must be in range 256-65535"); } | TX LENGTH expr { RIP_IFACE->tx_length = $3; if (($3<256) || ($3>65535)) cf_error("TX length must be in range 256-65535"); } | TX tos { RIP_IFACE->tx_tos = $2; } | TX PRIORITY expr { RIP_IFACE->tx_priority = $3; } diff --git a/sysdep/bsd/krt-sock.c b/sysdep/bsd/krt-sock.c index 5f2f1309..9f84b3f5 100644 --- a/sysdep/bsd/krt-sock.c +++ b/sysdep/bsd/krt-sock.c @@ -528,9 +528,8 @@ krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan) e->net = net; e->u.krt.src = src; e->u.krt.proto = src2; - - /* These are probably too Linux-specific */ - e->u.krt.type = 0; + e->u.krt.seen = 0; + e->u.krt.best = 0; e->u.krt.metric = 0; if (scan) diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index 6240c177..c398a7f6 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -1204,7 +1204,8 @@ nl_parse_route(struct nlmsghdr *h, int scan) e->net = net; e->u.krt.src = src; e->u.krt.proto = i->rtm_protocol; - e->u.krt.type = i->rtm_type; + e->u.krt.seen = 0; + e->u.krt.best = 0; e->u.krt.metric = 0; if (a[RTA_PRIORITY]) diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index cbfb47d5..37e26c9b 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -41,12 +42,12 @@ #include "lib/sysio.h" /* Maximum number of calls of tx handler for one socket in one - * select iteration. Should be small enough to not monopolize CPU by + * poll iteration. Should be small enough to not monopolize CPU by * one protocol instance. */ #define MAX_STEPS 4 -/* Maximum number of calls of rx handler for all sockets in one select +/* Maximum number of calls of rx handler for all sockets in one poll iteration. RX callbacks are often much more costly so we limit this to gen small latencies */ #define MAX_RX_STEPS 4 @@ -1023,7 +1024,6 @@ sk_log_error(sock *s, const char *p) static list sock_list; static struct birdsock *current_sock; static struct birdsock *stored_sock; -static int sock_recalc_fdsets_p; static inline sock * sk_next(sock *s) @@ -1079,7 +1079,6 @@ sk_free(resource *r) if (s == stored_sock) stored_sock = sk_next(s); rem_node(&s->n); - sock_recalc_fdsets_p = 1; } } @@ -1277,7 +1276,6 @@ static void sk_insert(sock *s) { add_tail(&sock_list, &s->n); - sock_recalc_fdsets_p = 1; } static void @@ -1329,18 +1327,6 @@ sk_passive_connected(sock *s, int type) log(L_WARN "SOCK: Cannot get remote IP address for TCP<"); } - if (fd >= FD_SETSIZE) - { - /* FIXME: Call err_hook instead ? */ - log(L_ERR "SOCK: Incoming connection from %I%J (port %d) %s", - t->daddr, ipa_is_link_local(t->daddr) ? t->iface : NULL, - t->dport, "rejected due to FD_SETSIZE limit"); - close(fd); - t->fd = -1; - rfree(t); - return 1; - } - if (sk_setup(t) < 0) { /* FIXME: Call err_hook instead ? */ @@ -1416,9 +1402,6 @@ sk_open(sock *s) if (fd < 0) ERR("socket"); - if (fd >= FD_SETSIZE) - ERR2("FD_SETSIZE limit reached"); - s->fd = fd; if (sk_setup(s) < 0) @@ -1696,19 +1679,12 @@ sk_maybe_write(sock *s) int sk_rx_ready(sock *s) { - fd_set rd, wr; - struct timeval timo; int rv; - - FD_ZERO(&rd); - FD_ZERO(&wr); - FD_SET(s->fd, &rd); - - timo.tv_sec = 0; - timo.tv_usec = 0; + struct pollfd pfd = { .fd = s->fd }; + pfd.events |= POLLIN; redo: - rv = select(s->fd+1, &rd, &wr, NULL, &timo); + rv = poll(&pfd, 1, 0); if ((rv < 0) && (errno == EINTR || errno == EAGAIN)) goto redo; @@ -1777,7 +1753,7 @@ sk_send_full(sock *s, unsigned len, struct iface *ifa, /* sk_read() and sk_write() are called from BFD's event loop */ int -sk_read(sock *s) +sk_read(sock *s, int revents) { switch (s->type) { @@ -1796,6 +1772,11 @@ sk_read(sock *s) { if (errno != EINTR && errno != EAGAIN) s->err_hook(s, errno); + else if (errno == EAGAIN && !(revents & POLLIN)) + { + log(L_ERR "Got EAGAIN from read when revents=%x (without POLLIN)", revents); + s->err_hook(s, 0); + } } else if (!c) s->err_hook(s, 0); @@ -2068,62 +2049,63 @@ static int short_loops = 0; void io_loop(void) { - fd_set rd, wr; - struct timeval timo; + int poll_tout; time_t tout; - int hi, events; + int nfds, events, pout; sock *s; node *n; + int fdmax = 256; + struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd)); watchdog_start1(); - sock_recalc_fdsets_p = 1; for(;;) { events = ev_run_list(&global_event_list); + timers: update_times(); tout = tm_first_shot(); if (tout <= now) { tm_shot(); - continue; + goto timers; } - timo.tv_sec = events ? 0 : MIN(tout - now, 3); - timo.tv_usec = 0; + poll_tout = (events ? 0 : MIN(tout - now, 3)) * 1000; /* Time in milliseconds */ io_close_event(); - if (sock_recalc_fdsets_p) - { - sock_recalc_fdsets_p = 0; - FD_ZERO(&rd); - FD_ZERO(&wr); - } - - hi = 0; + nfds = 0; WALK_LIST(n, sock_list) { + pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */ s = SKIP_BACK(sock, n, n); if (s->rx_hook) { - FD_SET(s->fd, &rd); - if (s->fd > hi) - hi = s->fd; + pfd[nfds].fd = s->fd; + pfd[nfds].events |= POLLIN; } - else - FD_CLR(s->fd, &rd); if (s->tx_hook && s->ttx != s->tpos) { - FD_SET(s->fd, &wr); - if (s->fd > hi) - hi = s->fd; + pfd[nfds].fd = s->fd; + pfd[nfds].events |= POLLOUT; + } + if (pfd[nfds].fd != -1) + { + s->index = nfds; + nfds++; } else - FD_CLR(s->fd, &wr); + s->index = -1; + + if (nfds >= fdmax) + { + fdmax *= 2; + pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd)); + } } /* * Yes, this is racy. But even if the signal comes before this test - * and entering select(), it gets caught on the next timer tick. + * and entering poll(), it gets caught on the next timer tick. */ if (async_config_flag) @@ -2148,18 +2130,18 @@ io_loop(void) continue; } - /* And finally enter select() to find active sockets */ + /* And finally enter poll() to find active sockets */ watchdog_stop(); - hi = select(hi+1, &rd, &wr, NULL, &timo); + pout = poll(pfd, nfds, poll_tout); watchdog_start(); - if (hi < 0) + if (pout < 0) { if (errno == EINTR || errno == EAGAIN) continue; - die("select: %m"); + die("poll: %m"); } - if (hi) + if (pout) { /* guaranteed to be non-empty */ current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); @@ -2167,23 +2149,29 @@ io_loop(void) while (current_sock) { sock *s = current_sock; + if (s->index == -1) + { + current_sock = sk_next(s); + goto next; + } + int e; int steps; steps = MAX_STEPS; - if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook) + if (s->fast_rx && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook) do { steps--; io_log_event(s->rx_hook, s->data); - e = sk_read(s); + e = sk_read(s, pfd[s->index].revents); if (s != current_sock) goto next; } while (e && s->rx_hook && steps); steps = MAX_STEPS; - if (FD_ISSET(s->fd, &wr)) + if (pfd[s->index].revents & POLLOUT) do { steps--; @@ -2210,13 +2198,17 @@ io_loop(void) while (current_sock && count < MAX_RX_STEPS) { sock *s = current_sock; - int e UNUSED; + if (s->index == -1) + { + current_sock = sk_next(s); + goto next2; + } - if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook) + if (!s->fast_rx && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook) { count++; io_log_event(s->rx_hook, s->data); - e = sk_read(s); + sk_read(s, pfd[s->index].revents); if (s != current_sock) goto next2; } diff --git a/sysdep/unix/krt.c b/sysdep/unix/krt.c index 6b3b4eee..b0a96613 100644 --- a/sysdep/unix/krt.c +++ b/sysdep/unix/krt.c @@ -412,46 +412,58 @@ again: { rte *e, **ee, *best, **pbest, *old_best; - old_best = n->routes; + /* + * Note that old_best may be NULL even if there was an old best route in + * the previous step, because it might be replaced in krt_learn_scan(). + * But in that case there is a new valid best route. + */ + + old_best = NULL; best = NULL; pbest = NULL; ee = &n->routes; while (e = *ee) { + if (e->u.krt.best) + old_best = e; + if (!e->u.krt.seen) { *ee = e->next; rte_free(e); continue; } + if (!best || best->u.krt.metric > e->u.krt.metric) { best = e; pbest = ee; } + e->u.krt.seen = 0; + e->u.krt.best = 0; ee = &e->next; } if (!n->routes) { DBG("%I/%d: deleting\n", n->n.prefix, n->n.pxlen); if (old_best) - { - krt_learn_announce_delete(p, n); - n->n.flags &= ~KRF_INSTALLED; - } + krt_learn_announce_delete(p, n); + FIB_ITERATE_PUT(&fit); fib_delete(fib, n); goto again; } + + best->u.krt.best = 1; *pbest = best->next; best->next = n->routes; n->routes = best; - if (best != old_best || !(n->n.flags & KRF_INSTALLED) || p->reload) + + if ((best != old_best) || p->reload) { DBG("%I/%d: announcing (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric); krt_learn_announce_update(p, best); - n->n.flags |= KRF_INSTALLED; } else DBG("%I/%d: uptodate (metric=%d)\n", n->n.prefix, n->n.pxlen, best->u.krt.metric); @@ -510,31 +522,31 @@ krt_learn_async(struct krt_proto *p, rte *e, int new) best = n->routes; bestp = &n->routes; for(gg=&n->routes; g=*gg; gg=&g->next) + { if (best->u.krt.metric > g->u.krt.metric) { best = g; bestp = gg; } + + g->u.krt.best = 0; + } + if (best) { + best->u.krt.best = 1; *bestp = best->next; best->next = n->routes; n->routes = best; } + if (best != old_best) { DBG("krt_learn_async: distributing change\n"); if (best) - { - krt_learn_announce_update(p, best); - n->n.flags |= KRF_INSTALLED; - } + krt_learn_announce_update(p, best); else - { - n->routes = NULL; - krt_learn_announce_delete(p, n); - n->n.flags &= ~KRF_INSTALLED; - } + krt_learn_announce_delete(p, n); } } @@ -559,7 +571,7 @@ krt_dump(struct proto *P) static void krt_dump_attrs(rte *e) { - debug(" [m=%d,p=%d,t=%d]", e->u.krt.metric, e->u.krt.proto, e->u.krt.type); + debug(" [m=%d,p=%d]", e->u.krt.metric, e->u.krt.proto); } #endif diff --git a/sysdep/unix/log.c b/sysdep/unix/log.c index 7cb26360..b90bbbd2 100644 --- a/sysdep/unix/log.c +++ b/sysdep/unix/log.c @@ -284,17 +284,18 @@ log_switch(int debug, list *l, char *new_syslog_name) current_log_list = l; #ifdef HAVE_SYSLOG - if (current_syslog_name && new_syslog_name && - !strcmp(current_syslog_name, new_syslog_name)) + char *old_syslog_name = current_syslog_name; + current_syslog_name = new_syslog_name; + + if (old_syslog_name && new_syslog_name && + !strcmp(old_syslog_name, new_syslog_name)) return; - if (current_syslog_name) + if (old_syslog_name) closelog(); if (new_syslog_name) openlog(new_syslog_name, LOG_CONS | LOG_NDELAY, LOG_DAEMON); - - current_syslog_name = new_syslog_name; #endif } diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index 8796ab9c..f95bd968 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -450,6 +450,7 @@ cli_connect(sock *s, int size UNUSED) s->err_hook = cli_err; s->data = c = cli_new(s); s->pool = c->pool; /* We need to have all the socket buffers allocated in the cli pool */ + s->fast_rx = 1; c->rx_pos = c->rx_buf; c->rx_aux = NULL; rmove(s, c->pool); @@ -466,6 +467,7 @@ cli_init_unix(uid_t use_uid, gid_t use_gid) s->type = SK_UNIX_PASSIVE; s->rx_hook = cli_connect; s->rbsize = 1024; + s->fast_rx = 1; /* Return value intentionally ignored */ unlink(path_control_socket);