From: Benjamin Cama Date: Wed, 17 Aug 2011 15:23:07 +0000 (+0200) Subject: Merge branch 'use-netlink' into fdn-mods X-Git-Tag: debian/2.2.1-1fdn2~2^2 X-Git-Url: http://git.sameswireless.fr/l2tpns.git/commitdiff_plain/ec7ce23c79c375acc6a8930fc03dcef76488ba7e?hp=-c Merge branch 'use-netlink' into fdn-mods Signed-off-by: Benjamin Cama --- ec7ce23c79c375acc6a8930fc03dcef76488ba7e diff --combined bgp.c index 80a445e,f1d0ec2..4b8cb64 --- a/bgp.c +++ b/bgp.c @@@ -29,15 -29,11 +29,15 @@@ char const *cvs_id_bgp = "$Id: bgp.c,v static void bgp_clear(struct bgp_peer *peer); static void bgp_set_retry(struct bgp_peer *peer); - static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx); + static void bgp_cidr(in_addr_t ip, int prefixlen, struct bgp_ip_prefix *pfx); static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head, struct bgp_route_list *new); +static struct bgp_route6_list *bgp_insert_route6(struct bgp_route6_list *head, + struct bgp_route6_list *new); +static void bgp_process_timers(struct bgp_peer *peer); static void bgp_free_routes(struct bgp_route_list *routes); +static void bgp_free_routes6(struct bgp_route6_list *routes); static char const *bgp_msg_type_str(uint8_t type); static int bgp_connect(struct bgp_peer *peer); static int bgp_handle_connect(struct bgp_peer *peer); @@@ -47,13 -43,11 +47,13 @@@ static int bgp_handle_input(struct bgp_ static int bgp_send_open(struct bgp_peer *peer); static int bgp_send_keepalive(struct bgp_peer *peer); static int bgp_send_update(struct bgp_peer *peer); +static int bgp_send_update6(struct bgp_peer *peer); static int bgp_send_notification(struct bgp_peer *peer, uint8_t code, uint8_t subcode); static uint16_t our_as; static struct bgp_route_list *bgp_routes = 0; +static struct bgp_route6_list *bgp_routes6 = 0; int bgp_configured = 0; struct bgp_peer *bgp_peers = 0; @@@ -94,7 -88,6 +94,7 @@@ int bgp_setup(int as return 0; bgp_routes = 0; + bgp_routes6 = 0; bgp_configured = 0; /* set by bgp_start */ return 1; @@@ -102,7 -95,7 +102,7 @@@ /* start connection with a peer */ int bgp_start(struct bgp_peer *peer, char *name, int as, int keepalive, - int hold, int enable) + int hold, struct in_addr update_source, int enable) { struct hostent *h; int ibgp; @@@ -131,7 -124,6 +131,7 @@@ } memcpy(&peer->addr, h->h_addr, sizeof(peer->addr)); + peer->source_addr = update_source.s_addr; peer->as = as > 0 ? as : our_as; ibgp = peer->as == our_as; @@@ -199,6 -191,15 +199,6 @@@ ADD_ATTRIBUTE(); - /* NEXT_HOP */ - a.flags = BGP_PATH_ATTR_FLAG_TRANS; - a.code = BGP_PATH_ATTR_CODE_NEXT_HOP; - ip = my_address; /* we're it */ - a.data.s.len = sizeof(ip); - memcpy(a.data.s.value, &ip, sizeof(ip)); - - ADD_ATTRIBUTE(); - /* MULTI_EXIT_DISC */ a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL; a.code = BGP_PATH_ATTR_CODE_MULTI_EXIT_DISC; @@@ -228,25 -229,6 +228,25 @@@ ADD_ATTRIBUTE(); + /* remember the len before adding NEXT_HOP */ + peer->path_attr_len_without_nexthop = peer->path_attr_len; + + /* NEXT_HOP */ + a.flags = BGP_PATH_ATTR_FLAG_TRANS; + a.code = BGP_PATH_ATTR_CODE_NEXT_HOP; + if (config->nexthop_address) + { + ip = config->nexthop_address; + } + else + { + ip = my_address; /* we're it */ + } + a.data.s.len = sizeof(ip); + memcpy(a.data.s.value, &ip, sizeof(ip)); + + ADD_ATTRIBUTE(); + if (!(peer->path_attrs = malloc(peer->path_attr_len))) { LOG(0, 0, 0, "Can't allocate path_attrs for %s (%s)\n", @@@ -257,53 -239,6 +257,53 @@@ memcpy(peer->path_attrs, path_attrs, peer->path_attr_len); + /* multiprotocol attributes initialization */ + if (config->ipv6_prefix.s6_addr[0]) + { + struct bgp_attr_mp_reach_nlri_partial mp_reach_nlri_partial; + struct bgp_attr_mp_unreach_nlri_partial mp_unreach_nlri_partial; + + a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL; + a.code = BGP_PATH_ATTR_CODE_MP_REACH_NLRI; + a.data.s.len = 0; /* will be set on UPDATE */ + + mp_reach_nlri_partial.afi = htons(BGP_MP_AFI_IPv6); + mp_reach_nlri_partial.safi = BGP_MP_SAFI_UNICAST; + mp_reach_nlri_partial.reserved = 0; + mp_reach_nlri_partial.next_hop_len = 16; + + /* use the defined nexthop6, or our address in ipv6_prefix */ + if (config->nexthop6_address.s6_addr[0]) + memcpy(&mp_reach_nlri_partial.next_hop, + &config->nexthop6_address.s6_addr, 16); + else + { + /* our address is ipv6prefix::1 */ + memcpy(&mp_reach_nlri_partial.next_hop, + &config->ipv6_prefix.s6_addr, 16); + mp_reach_nlri_partial.next_hop[15] = 1; + } + + memcpy(&a.data.s.value, &mp_reach_nlri_partial, + sizeof(struct bgp_attr_mp_reach_nlri_partial)); + memcpy(&peer->mp_reach_nlri_partial, &a, + BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE); + + a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL | BGP_PATH_ATTR_FLAG_EXTLEN; + a.code = BGP_PATH_ATTR_CODE_MP_UNREACH_NLRI; + a.data.e.len = 0; /* will be set on UPDATE */ + + mp_unreach_nlri_partial.afi = htons(BGP_MP_AFI_IPv6); + mp_unreach_nlri_partial.safi = BGP_MP_SAFI_UNICAST; + + memcpy(&a.data.e.value, &mp_unreach_nlri_partial, + sizeof(struct bgp_attr_mp_unreach_nlri_partial)); + memcpy(&peer->mp_unreach_nlri_partial, &a, + BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE); + } + + peer->mp_handling = HandlingUnknown; + LOG(4, 0, 0, "Initiating BGP connection to %s (routing %s)\n", name, enable ? "enabled" : "suspended"); @@@ -332,8 -267,6 +332,8 @@@ static void bgp_clear(struct bgp_peer * bgp_free_routes(peer->routes); peer->routes = 0; + bgp_free_routes6(peer->routes6); + peer->routes6 = 0; peer->outbuf->packet.header.len = 0; peer->outbuf->done = 0; @@@ -394,26 -327,6 +394,6 @@@ static void bgp_set_retry(struct bgp_pe bgp_halt(peer); /* give up */ } - /* convert ip/mask to CIDR notation */ - static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx) - { - int i; - uint32_t b; - - /* convert to prefix notation */ - pfx->len = 32; - pfx->prefix = ip; - - if (!mask) /* bogus */ - mask = 0xffffffff; - - for (i = 0; i < 32 && ((b = ntohl(1 << i)), !(mask & b)); i++) - { - pfx->len--; - pfx->prefix &= ~b; - } - } - /* insert route into list; sorted */ static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head, struct bgp_route_list *new) @@@ -441,33 -354,6 +421,33 @@@ return head; } +/* insert route6 into list; sorted */ +static struct bgp_route6_list *bgp_insert_route6(struct bgp_route6_list *head, + struct bgp_route6_list *new) +{ + struct bgp_route6_list *p = head; + struct bgp_route6_list *e = 0; + + while (p && memcmp(&p->dest, &new->dest, sizeof(p->dest)) < 0) + { + e = p; + p = p->next; + } + + if (e) + { + new->next = e->next; + e->next = new; + } + else + { + new->next = head; + head = new; + } + + return head; +} + /* add route to list for peers */ /* * Note: this doesn't do route aggregation, nor drop routes if a less @@@ -475,13 -361,14 +455,14 @@@ * that if that route is later deleted we don't have to be concerned * about adding back the more specific one). */ - int bgp_add_route(in_addr_t ip, in_addr_t mask) + int bgp_add_route(in_addr_t ip, int prefixlen) { struct bgp_route_list *r = bgp_routes; struct bgp_route_list add; int i; - bgp_cidr(ip, mask, &add.dest); + add.dest.prefix = ip; + add.dest.len = prefixlen; add.next = 0; /* check for duplicate */ @@@ -520,68 -407,16 +501,69 @@@ return 1; } +/* add route to list for peers */ +/* + * Note: same provisions as above + */ +int bgp_add_route6(struct in6_addr ip, int prefixlen) +{ + struct bgp_route6_list *r = bgp_routes6; + struct bgp_route6_list add; + int i; + char ipv6addr[INET6_ADDRSTRLEN]; + + memcpy(&add.dest.prefix, &ip.s6_addr, 16); + add.dest.len = prefixlen; + add.next = 0; + + /* check for duplicate */ + while (r) + { + i = memcmp(&r->dest, &add.dest, sizeof(r->dest)); + if (!i) + return 1; /* already covered */ + + if (i > 0) + break; + + r = r->next; + } + + /* insert into route list; sorted */ + if (!(r = malloc(sizeof(*r)))) + { + LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n", + inet_ntop(AF_INET6, &ip, ipv6addr, INET6_ADDRSTRLEN), add.dest.len, + strerror(errno)); + + return 0; + } + + memcpy(r, &add, sizeof(*r)); + bgp_routes6 = bgp_insert_route6(bgp_routes6, r); + + /* flag established peers for update */ + for (i = 0; i < BGP_NUM_PEERS; i++) + if (bgp_peers[i].state == Established + && bgp_peers[i].mp_handling == HandleIPv6Routes) + bgp_peers[i].update_routes6 = 1; + + LOG(4, 0, 0, "Registered BGP route %s/%d\n", + inet_ntop(AF_INET6, &ip, ipv6addr, INET6_ADDRSTRLEN), add.dest.len); + + return 1; +} + /* remove route from list for peers */ - int bgp_del_route(in_addr_t ip, in_addr_t mask) + int bgp_del_route(in_addr_t ip, int prefixlen) { struct bgp_route_list *r = bgp_routes; struct bgp_route_list *e = 0; struct bgp_route_list del; int i; - bgp_cidr(ip, mask, &del.dest); + del.dest.prefix = ip; + del.dest.len = prefixlen; del.next = 0; /* find entry in routes list and remove */ @@@ -622,58 -457,6 +604,58 @@@ return 1; } +/* remove route from list for peers */ +int bgp_del_route6(struct in6_addr ip, int prefixlen) +{ + struct bgp_route6_list *r = bgp_routes6; + struct bgp_route6_list *e = 0; + struct bgp_route6_list del; + int i; + char ipv6addr[INET6_ADDRSTRLEN]; + + memcpy(&del.dest.prefix, &ip.s6_addr, 16); + del.dest.len = prefixlen; + del.next = 0; + + /* find entry in routes list and remove */ + while (r) + { + i = memcmp(&r->dest, &del.dest, sizeof(r->dest)); + if (!i) + { + if (e) + e->next = r->next; + else + bgp_routes6 = r->next; + + free(r); + break; + } + + e = r; + + if (i > 0) + r = 0; /* stop */ + else + r = r->next; + } + + /* not found */ + if (!r) + return 1; + + /* flag established peers for update */ + for (i = 0; i < BGP_NUM_PEERS; i++) + if (bgp_peers[i].state == Established + && bgp_peers[i].mp_handling == HandleIPv6Routes) + bgp_peers[i].update_routes6 = 1; + + LOG(4, 0, 0, "Removed BGP route %s/%d\n", + inet_ntop(AF_INET6, &ip, ipv6addr, INET6_ADDRSTRLEN), del.dest.len); + + return 1; +} + /* enable or disable routing */ void bgp_enable_routing(int enable) { @@@ -818,68 -601,36 +800,68 @@@ int bgp_process(uint32_t events[] continue; } - /* process timers */ - if (peer->state == Established) + /* process pending IPv6 updates */ + if (peer->update_routes6 + && !peer->outbuf->packet.header.len) /* ditto */ { - if (time_now > peer->expire_time) - { - LOG(1, 0, 0, "No message from BGP peer %s in %ds\n", - peer->name, peer->hold); - - bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0); + if (!bgp_send_update6(peer)) continue; - } - - if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len) - bgp_send_keepalive(peer); - } - else if (peer->state == Idle) - { - if (time_now > peer->retry_time) - bgp_connect(peer); } - else if (time_now > peer->state_time + BGP_STATE_TIME) + + /* process timers */ + bgp_process_timers(peer); + } + + return 1; +} + +/* process bgp timers only */ +void bgp_process_peers_timers() +{ + int i; + + if (!bgp_configured) + return; + + for (i = 0; i < BGP_NUM_PEERS; i++) + { + struct bgp_peer *peer = &bgp_peers[i]; + + if (peer->state == Disabled) + continue; + + bgp_process_timers(peer); + } +} + +static void bgp_process_timers(struct bgp_peer *peer) +{ + if (peer->state == Established) + { + if (time_now > peer->expire_time) { - LOG(1, 0, 0, "%s timer expired for BGP peer %s\n", - bgp_state_str(peer->state), peer->name); + LOG(1, 0, 0, "No message from BGP peer %s in %ds\n", + peer->name, peer->hold); - bgp_restart(peer); + bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0); + return; } + + if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len) + bgp_send_keepalive(peer); } + else if (peer->state == Idle) + { + if (time_now > peer->retry_time) + bgp_connect(peer); + } + else if (time_now > peer->state_time + BGP_STATE_TIME) + { + LOG(1, 0, 0, "%s timer expired for BGP peer %s\n", + bgp_state_str(peer->state), peer->name); - return 1; + bgp_restart(peer); + } } static void bgp_free_routes(struct bgp_route_list *routes) @@@ -893,17 -644,6 +875,17 @@@ } } +static void bgp_free_routes6(struct bgp_route6_list *routes) +{ + struct bgp_route6_list *tmp; + + while ((tmp = routes)) + { + routes = tmp->next; + free(tmp); + } +} + char const *bgp_state_str(enum bgp_state state) { switch (state) @@@ -938,7 -678,6 +920,7 @@@ static int bgp_connect(struct bgp_peer { static int bgp_port = 0; struct sockaddr_in addr; + struct sockaddr_in source_addr; struct epoll_event ev; if (!bgp_port) @@@ -970,19 -709,6 +952,19 @@@ /* set to non-blocking */ fcntl(peer->sock, F_SETFL, fcntl(peer->sock, F_GETFL, 0) | O_NONBLOCK); + /* set source address */ + memset(&source_addr, 0, sizeof(source_addr)); + source_addr.sin_family = AF_INET; + source_addr.sin_addr.s_addr = peer->source_addr; /* defaults to INADDR_ANY */ + if (bind(peer->sock, (struct sockaddr *) &source_addr, sizeof(source_addr)) < 0) + { + LOG(1, 0, 0, "Can't set source address to %s: %s\n", + inet_ntoa(source_addr.sin_addr), strerror(errno)); + + bgp_set_retry(peer); + return 0; + } + /* try connect */ memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; @@@ -1158,12 -884,6 +1140,12 @@@ static int bgp_handle_input(struct bgp_ struct bgp_data_open data; int hold; int i; + off_t param_offset, capability_offset; + struct bgp_opt_param *param; + uint8_t capabilities_len; + char *capabilities = NULL; + struct bgp_capability *capability; + struct bgp_mp_cap_param *mp_cap; for (i = 0; i < sizeof(p->header.marker); i++) { @@@ -1226,93 -946,10 +1208,93 @@@ if (peer->keepalive * 3 > peer->hold) peer->keepalive = peer->hold / 3; + /* check for optional parameters */ + /* 2 is for the size of type + len (both uint8_t) */ + for (param_offset = 0; + param_offset < data.opt_len; + param_offset += 2 + param->len) + { + param = (struct bgp_opt_param *)((char *)&data.opt_params + param_offset); + + /* sensible check */ + if (data.opt_len - param_offset < 2 + || param->len > data.opt_len - param_offset - 2) + { + LOG(1, 0, 0, "Malformed Optional Parameter list from BGP peer %s\n", + peer->name); + + bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_UNSPEC); + return 0; + } + + /* we know only one parameter type */ + if (param->type != BGP_PARAM_TYPE_CAPABILITY) + { + LOG(1, 0, 0, "Unsupported Optional Parameter type %d from BGP peer %s\n", + param->type, peer->name); + + bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_UNSUP_PARAM); + return 0; + } + + capabilities_len = param->len; + capabilities = (char *)¶m->value; + + /* look for BGP multiprotocol capability */ + for (capability_offset = 0; + capability_offset < capabilities_len; + capability_offset += 2 + capability->len) + { + capability = (struct bgp_capability *)(capabilities + capability_offset); + + /* sensible check */ + if (capabilities_len - capability_offset < 2 + || capability->len > capabilities_len - capability_offset - 2) + { + LOG(1, 0, 0, "Malformed Capabilities list from BGP peer %s\n", + peer->name); + + bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_UNSPEC); + return 0; + } + + /* we only know one capability code */ + if (capability->code != BGP_CAP_CODE_MP + && capability->len != sizeof(struct bgp_mp_cap_param)) + { + LOG(4, 0, 0, "Unsupported Capability code %d from BGP peer %s\n", + capability->code, peer->name); + + /* we don't terminate, still; we just jump to the next one */ + continue; + } + + mp_cap = (struct bgp_mp_cap_param *)&capability->value; + /* the only tuple we support */ + if (ntohs(mp_cap->afi) != BGP_MP_AFI_IPv6 && mp_cap->safi != BGP_MP_SAFI_UNICAST) + { + LOG(4, 0, 0, "Unsupported multiprotocol AFI %d and SAFI %d from BGP peer %s\n", + mp_cap->afi, mp_cap->safi, peer->name); + + /* we don't terminate, still; we just jump to the next one */ + continue; + } + + /* yes it can! */ + peer->mp_handling = HandleIPv6Routes; + } + } + + if (peer->mp_handling != HandleIPv6Routes) + { + peer->mp_handling = DoesntHandleIPv6Routes; + if (config->ipv6_prefix.s6_addr[0]) + LOG(1, 0, 0, "Warning: BGP peer %s doesn't handle IPv6 prefixes updates\n", + peer->name); + } + /* next transition requires an exchange of keepalives */ bgp_send_keepalive(peer); - - /* FIXME: may need to check for optional params */ } break; @@@ -1341,30 -978,10 +1323,30 @@@ if (notification->error_code == BGP_ERR_CEASE) { LOG(4, 0, 0, "BGP peer %s sent CEASE\n", peer->name); - bgp_restart(peer); + bgp_set_retry(peer); return 0; } + if (notification->error_code == BGP_ERR_OPEN + && notification->error_subcode == BGP_ERR_OPN_UNSUP_PARAM) + { + LOG(4, 0, 0, "BGP peer %s doesn't support BGP Capabilities\n", peer->name); + peer->mp_handling = DoesntHandleIPv6Routes; + bgp_set_retry(peer); + return 0; + } + + if (notification->error_code == BGP_ERR_OPEN + && notification->error_subcode == BGP_ERR_OPN_UNSUP_CAP) + { + /* the only capability we advertise is this one, so upon receiving + an "unsupported capability" message, we disable IPv6 routes for + this peer */ + LOG(4, 0, 0, "BGP peer %s doesn't support IPv6 routes advertisement\n", peer->name); + peer->mp_handling = DoesntHandleIPv6Routes; + break; + } + /* FIXME: should handle more notifications */ LOG(4, 0, 0, "BGP peer %s sent unhandled NOTIFICATION %d\n", peer->name, (int) notification->error_code); @@@ -1395,9 -1012,6 +1377,9 @@@ static int bgp_send_open(struct bgp_peer *peer) { struct bgp_data_open data; + struct bgp_mp_cap_param mp_ipv6 = { htons(BGP_MP_AFI_IPv6), 0, BGP_MP_SAFI_UNICAST }; + struct bgp_capability cap_mp_ipv6; + struct bgp_opt_param param_cap_mp_ipv6; uint16_t len = sizeof(peer->outbuf->packet.header); memset(peer->outbuf->packet.header.marker, 0xff, @@@ -1408,35 -1022,11 +1390,35 @@@ data.version = BGP_VERSION; data.as = htons(our_as); data.hold_time = htons(peer->hold); - data.identifier = my_address; - data.opt_len = 0; + /* use the source IP we use as identifier, if available */ + if (peer->source_addr != INADDR_ANY) + data.identifier = peer->source_addr; + else + data.identifier = my_address; + + /* if we know peer doesn't support MP (mp_handling == DoesntHandleIPv6Routes) + then don't add this parameter */ + if (config->ipv6_prefix.s6_addr[0] + && (peer->mp_handling == HandlingUnknown + || peer->mp_handling == HandleIPv6Routes)) + { + /* construct the param and capability */ + cap_mp_ipv6.code = BGP_CAP_CODE_MP; + cap_mp_ipv6.len = sizeof(mp_ipv6); + memcpy(&cap_mp_ipv6.value, &mp_ipv6, cap_mp_ipv6.len); + + param_cap_mp_ipv6.type = BGP_PARAM_TYPE_CAPABILITY; + param_cap_mp_ipv6.len = 2 + sizeof(mp_ipv6); + memcpy(¶m_cap_mp_ipv6.value, &cap_mp_ipv6, param_cap_mp_ipv6.len); + + data.opt_len = 2 + param_cap_mp_ipv6.len; + memcpy(&data.opt_params, ¶m_cap_mp_ipv6, data.opt_len); + } + else + data.opt_len = 0; - memcpy(peer->outbuf->packet.data, &data, BGP_DATA_OPEN_SIZE); - len += BGP_DATA_OPEN_SIZE; + memcpy(peer->outbuf->packet.data, &data, BGP_DATA_OPEN_SIZE + data.opt_len); + len += BGP_DATA_OPEN_SIZE + data.opt_len; peer->outbuf->packet.header.len = htons(len); peer->outbuf->done = 0; @@@ -1603,179 -1193,6 +1585,179 @@@ static int bgp_send_update(struct bgp_p return bgp_write(peer); } +/* send/buffer UPDATE message for IPv6 routes */ +static int bgp_send_update6(struct bgp_peer *peer) +{ + uint16_t attr_len; + uint16_t unreach_len = 0; + char *unreach_len_pos; + uint8_t reach_len; + uint16_t len = sizeof(peer->outbuf->packet.header); + struct bgp_route6_list *have = peer->routes6; + struct bgp_route6_list *want = peer->routing ? bgp_routes6 : 0; + struct bgp_route6_list *e = 0; + struct bgp_route6_list *add = 0; + int s; + char ipv6addr[INET6_ADDRSTRLEN]; + + char *data = (char *) &peer->outbuf->packet.data; + + /* need leave room for attr_len, bgp_path_attrs and one prefix */ + char *max = (char *) &peer->outbuf->packet.data + + sizeof(peer->outbuf->packet.data) + - sizeof(attr_len) - peer->path_attr_len_without_nexthop + - BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE - sizeof(struct bgp_ip6_prefix); + + memset(peer->outbuf->packet.header.marker, 0xff, + sizeof(peer->outbuf->packet.header.marker)); + + peer->outbuf->packet.header.type = BGP_MSG_UPDATE; + + /* insert non-MP unfeasible routes length */ + memcpy(data, &unreach_len, sizeof(unreach_len)); + /* skip over it and attr_len too; it will be filled when known */ + data += sizeof(unreach_len) + sizeof(attr_len); + len += sizeof(unreach_len) + sizeof(attr_len); + + /* copy usual attributes */ + memcpy(data, peer->path_attrs, peer->path_attr_len_without_nexthop); + data += peer->path_attr_len_without_nexthop; + attr_len = peer->path_attr_len_without_nexthop; + + /* copy MP unreachable NLRI heading */ + memcpy(data, peer->mp_unreach_nlri_partial, + BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE); + /* remember where to update this attr len */ + unreach_len_pos = data + 2; + data += BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE; + attr_len += BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE; + + peer->update_routes6 = 0; /* tentatively clear */ + + /* find differences */ + while ((have || want) && data < (max - sizeof(struct bgp_ip6_prefix))) + { + if (have) + s = want + ? memcmp(&have->dest, &want->dest, sizeof(have->dest)) + : -1; + else + s = 1; + + if (s < 0) /* found one to delete */ + { + struct bgp_route6_list *tmp = have; + have = have->next; + + s = BGP_IP_PREFIX_SIZE(tmp->dest); + memcpy(data, &tmp->dest, s); + data += s; + unreach_len += s; + attr_len += s; + + LOG(5, 0, 0, "Withdrawing route %s/%d from BGP peer %s\n", + inet_ntop(AF_INET6, &tmp->dest.prefix, ipv6addr, INET6_ADDRSTRLEN), + tmp->dest.len, peer->name); + + free(tmp); + + if (e) + e->next = have; + else + peer->routes6 = have; + } + else + { + if (!s) /* same */ + { + e = have; /* stash the last found to relink above */ + have = have->next; + want = want->next; + } + else if (s > 0) /* addition reqd. */ + { + if (add) + { + peer->update_routes6 = 1; /* only one add per packet */ + if (!have) + break; + } + else + add = want; + + if (want) + want = want->next; + } + } + } + + if (have || want) + peer->update_routes6 = 1; /* more to do */ + + /* anything changed? */ + if (!(unreach_len || add)) + return 1; + + if (unreach_len) + { + /* go back and insert MP unreach_len */ + unreach_len += sizeof(struct bgp_attr_mp_unreach_nlri_partial); + unreach_len = htons(unreach_len); + memcpy(unreach_len_pos, &unreach_len, sizeof(unreach_len)); + } + else + { + /* we can remove this attribute, then */ + data -= BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE; + attr_len -= BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE; + } + + if (add) + { + if (!(e = malloc(sizeof(*e)))) + { + LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n", + inet_ntop(AF_INET6, &add->dest.prefix, ipv6addr, INET6_ADDRSTRLEN), + add->dest.len, strerror(errno)); + + return 0; + } + + memcpy(e, add, sizeof(*e)); + e->next = 0; + peer->routes6 = bgp_insert_route6(peer->routes6, e); + + /* copy MP reachable NLRI heading */ + memcpy(data, peer->mp_reach_nlri_partial, + BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE); + /* with proper len */ + reach_len = BGP_IP_PREFIX_SIZE(add->dest); + data[2] = sizeof(struct bgp_attr_mp_reach_nlri_partial) + reach_len; + data += BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE; + attr_len += BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE; + + memcpy(data, &add->dest, reach_len); + data += reach_len; + attr_len += reach_len; + + LOG(5, 0, 0, "Advertising route %s/%d to BGP peer %s\n", + inet_ntop(AF_INET6, &add->dest.prefix, ipv6addr, INET6_ADDRSTRLEN), + add->dest.len, peer->name); + } + + /* update len with attributes we added */ + len += attr_len; + + /* go back and insert attr_len */ + attr_len = htons(attr_len); + memcpy((char *)&peer->outbuf->packet.data + 2, &attr_len, sizeof(attr_len)); + + peer->outbuf->packet.header.len = htons(len); + peer->outbuf->done = 0; + + return bgp_write(peer); +} + /* send/buffer NOTIFICATION message */ static int bgp_send_notification(struct bgp_peer *peer, uint8_t code, uint8_t subcode) diff --combined bgp.h index 44bad0d,f8b52d6..bc5a0e9 --- a/bgp.h +++ b/bgp.h @@@ -43,51 -43,11 +43,51 @@@ struct bgp_data_open char opt_params[sizeof(((struct bgp_packet *)0)->data) - BGP_DATA_OPEN_SIZE]; /* variable */ } __attribute__ ((packed)); +struct bgp_opt_param { + uint8_t type; + uint8_t len; +#define BGP_MAX_OPT_PARAM_SIZE 256 + char value[BGP_MAX_OPT_PARAM_SIZE]; +} __attribute__ ((packed)); + +#define BGP_PARAM_TYPE_CAPABILITY 2 +struct bgp_capability { + uint8_t code; + uint8_t len; +#define BGP_MAX_CAPABILITY_SIZE 256 + char value[BGP_MAX_CAPABILITY_SIZE]; +} __attribute__ ((packed)); + +/* RFC4760 Multiprotocol extension */ +#define BGP_CAP_CODE_MP 1 + +struct bgp_mp_cap_param { + uint16_t afi; /* sa_family_t */ + uint8_t reserved; /* SHOULD be 0 */ + uint8_t safi; +} __attribute__ ((packed)); + +/* bgp_mp_cap_param.afi */ +#define BGP_MP_AFI_RESERVED 0 +#define BGP_MP_AFI_IPv4 1 +#define BGP_MP_AFI_IPv6 2 +/* bgp_mp_cap_param.safi */ +#define BGP_MP_SAFI_UNICAST 1 +#define BGP_MP_SAFI_MULTICAST 2 + +struct bgp_ip6_prefix { + uint8_t len; + uint8_t prefix[16]; /* variable */ +} __attribute__ ((packed)); + +/* end of RFC4760 specific definitions */ + struct bgp_ip_prefix { uint8_t len; uint32_t prefix; /* variable */ } __attribute__ ((packed)); +/* works for both IPv4 and IPv6 prefixes */ #define BGP_IP_PREFIX_SIZE(p) (1 + ((p).len / 8) + ((p).len % 8 != 0)) struct bgp_path_attr { @@@ -105,22 -65,6 +105,22 @@@ } data; /* variable */ } __attribute__ ((packed)); +struct bgp_attr_mp_reach_nlri_partial { + uint16_t afi; /* sa_family_t */ + uint8_t safi; + uint8_t next_hop_len; + uint8_t next_hop[16]; + uint8_t reserved; +} __attribute__ ((packed)); +#define BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE (3 + sizeof(struct bgp_attr_mp_reach_nlri_partial)) + +struct bgp_attr_mp_unreach_nlri_partial { + uint16_t afi; /* sa_family_t */ + uint8_t safi; +} __attribute__ ((packed)); +/* we use it as an extended attribute */ +#define BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE (4 + sizeof(struct bgp_attr_mp_unreach_nlri_partial)) + /* bgp_path_attr.flags (bitfields) */ #define BGP_PATH_ATTR_FLAG_OPTIONAL (1 << 7) #define BGP_PATH_ATTR_FLAG_TRANS (1 << 6) @@@ -141,11 -85,9 +141,11 @@@ #define BGP_PATH_ATTR_CODE_ATOMIC_AGGREGATE 6 /* well-known, discretionary */ #define BGP_PATH_ATTR_CODE_AGGREGATOR 7 /* optional, transitive */ #define BGP_PATH_ATTR_CODE_COMMUNITIES 8 /* optional, transitive (RFC1997) */ +#define BGP_PATH_ATTR_CODE_MP_REACH_NLRI 14 /* optional, non-transitive (RFC4760) */ +#define BGP_PATH_ATTR_CODE_MP_UNREACH_NLRI 15 /* optional, non-transitive (RFC4760) */ #define BGP_PATH_ATTR_SIZE(p) ((((p).flags & BGP_PATH_ATTR_FLAG_EXTLEN) \ - ? ((p).data.e.len + 1) : (p).data.s.len) + 3) + ? ((p).data.e.len + 4) : (p).data.s.len) + 3) /* well known COMMUNITIES */ #define BGP_COMMUNITY_NO_EXPORT 0xffffff01 /* don't advertise outside confederation */ @@@ -159,7 -101,6 +159,7 @@@ struct bgp_data_notification } __attribute__ ((packed)); /* bgp_data_notification.error_code, .error_subcode */ +#define BGP_ERR_UNSPEC 0 #define BGP_ERR_HEADER 1 # define BGP_ERR_HDR_NOT_SYNC 1 # define BGP_ERR_HDR_BAD_LEN 2 @@@ -171,7 -112,6 +171,7 @@@ # define BGP_ERR_OPN_UNSUP_PARAM 4 # define BGP_ERR_OPN_AUTH_FAILURE 5 # define BGP_ERR_OPN_HOLD_TIME 6 +# define BGP_ERR_OPN_UNSUP_CAP 7 #define BGP_ERR_UPDATE 3 # define BGP_ERR_UPD_BAD_ATTR_LIST 1 # define BGP_ERR_UPD_UNKN_WK_ATTR 2 @@@ -198,11 -138,6 +198,11 @@@ enum bgp_state Established, /* established */ }; +struct bgp_route6_list { + struct bgp_ip6_prefix dest; + struct bgp_route6_list *next; +}; + struct bgp_route_list { struct bgp_ip_prefix dest; struct bgp_route_list *next; @@@ -213,17 -148,10 +213,17 @@@ struct bgp_buf size_t done; /* bytes sent/recvd */ }; +enum bgp_mp_handling { + HandleIPv6Routes, + DoesntHandleIPv6Routes, + HandlingUnknown, +}; + /* state */ struct bgp_peer { char name[32]; /* peer name */ in_addr_t addr; /* peer address */ + in_addr_t source_addr; /* our source address */ int as; /* AS number */ int sock; enum bgp_state state; /* FSM state */ @@@ -245,14 -173,8 +245,14 @@@ int cli_flag; /* updates requested from CLI */ char *path_attrs; /* path attrs to send in UPDATE message */ int path_attr_len; /* length of path attrs */ + int path_attr_len_without_nexthop; /* length of path attrs without NEXT_HOP */ uint32_t events; /* events to poll */ struct event_data edata; /* poll data */ + enum bgp_mp_handling mp_handling; /* how it handles IPv6 routes advertisements */ + int update_routes6; /* UPDATE required for IPv6 routes */ + struct bgp_route6_list *routes6; /* IPv6 routes known by this peer */ + char mp_reach_nlri_partial[BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE]; + char mp_unreach_nlri_partial[BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE]; }; /* bgp_peer.cli_flag */ @@@ -266,19 -188,16 +266,19 @@@ extern int bgp_configured /* actions */ int bgp_setup(int as); int bgp_start(struct bgp_peer *peer, char *name, int as, int keepalive, - int hold, int enable); + int hold, struct in_addr update_source, int enable); void bgp_stop(struct bgp_peer *peer); void bgp_halt(struct bgp_peer *peer); int bgp_restart(struct bgp_peer *peer); - int bgp_add_route(in_addr_t ip, in_addr_t mask); + int bgp_add_route(in_addr_t ip, int prefixlen); +int bgp_add_route6(struct in6_addr ip, int prefixlen); - int bgp_del_route(in_addr_t ip, in_addr_t mask); + int bgp_del_route(in_addr_t ip, int prefixlen); +int bgp_del_route6(struct in6_addr ip, int prefixlen); void bgp_enable_routing(int enable); int bgp_set_poll(void); int bgp_process(uint32_t events[]); +void bgp_process_peers_timers(); char const *bgp_state_str(enum bgp_state state); extern char const *cvs_id_bgp; diff --combined l2tpns.c index f2a60c9,c34dc63..581198d --- a/l2tpns.c +++ b/l2tpns.c @@@ -14,7 -14,6 +14,6 @@@ char const *cvs_id_l2tpns = "$Id: l2tpn #define SYSLOG_NAMES #include #include - #include #include #include #include @@@ -39,6 -38,8 +38,8 @@@ #include #include #include + #include + #include #include "md5.h" #include "l2tpns.h" @@@ -56,6 -57,7 +57,7 @@@ // Globals configt *config = NULL; // all configuration + int nlfd = -1; // netlink socket int tunfd = -1; // tun interface file handle. (network device) int udpfd = -1; // UDP file handle int controlfd = -1; // Control signal handle @@@ -63,14 -65,14 +65,14 @@@ int clifd = -1; // Socket listening f int daefd = -1; // Socket listening for DAE connections. int snoopfd = -1; // UDP file handle for sending out intercept data int *radfds = NULL; // RADIUS requests file handles - int ifrfd = -1; // File descriptor for routing, etc - int ifr6fd = -1; // File descriptor for IPv6 routing, etc int rand_fd = -1; // Random data source int cluster_sockfd = -1; // Intra-cluster communications socket. int epollfd = -1; // event polling time_t basetime = 0; // base clock -char hostname[1000] = ""; // us. +char hostname[MAXHOSTNAME] = ""; // us. static int tunidx; // ifr_ifindex of tun device + int nlseqnum = 0; // netlink sequence number + int min_initok_nlseqnum = 0; // minimun seq number for messages after init is ok static int syslog_log = 0; // are we logging to syslog static FILE *log_stream = 0; // file handle for direct logging (i.e. direct into file, not via syslog). uint32_t last_id = 0; // Unique ID for radius accounting @@@ -157,10 -159,6 +159,10 @@@ config_descriptt config_values[] = CONFIG("cluster_hb_timeout", cluster_hb_timeout, INT), CONFIG("cluster_master_min_adv", cluster_master_min_adv, INT), CONFIG("ipv6_prefix", ipv6_prefix, IPv6), + CONFIG("cli_bind_address", cli_bind_address, IPv4), + CONFIG("hostname", hostname, STRING), + CONFIG("nexthop_address", nexthop_address, IPv4), + CONFIG("nexthop6_address", nexthop6_address, IPv6), { NULL, 0, 0, 0 }, }; @@@ -168,6 -166,8 +170,6 @@@ static char *plugin_functions[] = NULL, "plugin_pre_auth", "plugin_post_auth", - "plugin_packet_rx", - "plugin_packet_tx", "plugin_timer", "plugin_new_session", "plugin_kill_session", @@@ -199,6 -199,8 +201,8 @@@ struct Tstats *_statistics = NULL struct Tringbuffer *ringbuffer = NULL; #endif + static ssize_t netlink_send(struct nlmsghdr *nh); + static void netlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen); static void cache_ipmap(in_addr_t ip, sessionidt s); static void uncache_ipmap(in_addr_t ip); static void cache_ipv6map(struct in6_addr ip, int prefixlen, sessionidt s); @@@ -418,43 -420,61 +422,61 @@@ void random_data(uint8_t *buf, int len // via BGP if enabled, and stuffs it into the // 'sessionbyip' cache. // - // 'ip' and 'mask' must be in _host_ order. + // 'ip' must be in _host_ order. // - static void routeset(sessionidt s, in_addr_t ip, in_addr_t mask, in_addr_t gw, int add) + static void routeset(sessionidt s, in_addr_t ip, int prefixlen, in_addr_t gw, int add) { - struct rtentry r; + struct { + struct nlmsghdr nh; + struct rtmsg rt; + char buf[32]; + } req; int i; + in_addr_t n_ip; - if (!mask) mask = 0xffffffff; + if (!prefixlen) prefixlen = 32; - ip &= mask; // Force the ip to be the first one in the route. + ip &= 0xffffffff << (32 - prefixlen);; // Force the ip to be the first one in the route. - memset(&r, 0, sizeof(r)); - r.rt_dev = config->tundevice; - r.rt_dst.sa_family = AF_INET; - *(uint32_t *) & (((struct sockaddr_in *) &r.rt_dst)->sin_addr.s_addr) = htonl(ip); - r.rt_gateway.sa_family = AF_INET; - *(uint32_t *) & (((struct sockaddr_in *) &r.rt_gateway)->sin_addr.s_addr) = htonl(gw); - r.rt_genmask.sa_family = AF_INET; - *(uint32_t *) & (((struct sockaddr_in *) &r.rt_genmask)->sin_addr.s_addr) = htonl(mask); - r.rt_flags = (RTF_UP | RTF_STATIC); + memset(&req, 0, sizeof(req)); + + if (add) + { + req.nh.nlmsg_type = RTM_NEWROUTE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE; + } + else + req.nh.nlmsg_type = RTM_DELROUTE; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.rt)); + + req.rt.rtm_family = AF_INET; + req.rt.rtm_dst_len = prefixlen; + req.rt.rtm_table = RT_TABLE_MAIN; + req.rt.rtm_protocol = RTPROT_BOOT; // XXX + req.rt.rtm_scope = RT_SCOPE_LINK; + req.rt.rtm_type = RTN_UNICAST; + + netlink_addattr(&req.nh, RTA_OIF, &tunidx, sizeof(int)); + n_ip = htonl(ip); + netlink_addattr(&req.nh, RTA_DST, &n_ip, sizeof(n_ip)); if (gw) - r.rt_flags |= RTF_GATEWAY; - else if (mask == 0xffffffff) - r.rt_flags |= RTF_HOST; + { + n_ip = htonl(gw); + netlink_addattr(&req.nh, RTA_GATEWAY, &n_ip, sizeof(n_ip)); + } - LOG(1, s, 0, "Route %s %s/%s%s%s\n", add ? "add" : "del", - fmtaddr(htonl(ip), 0), fmtaddr(htonl(mask), 1), + LOG(1, s, 0, "Route %s %s/%d%s%s\n", add ? "add" : "del", + fmtaddr(htonl(ip), 0), prefixlen, gw ? " via" : "", gw ? fmtaddr(htonl(gw), 2) : ""); - if (ioctl(ifrfd, add ? SIOCADDRT : SIOCDELRT, (void *) &r) < 0) - LOG(0, 0, 0, "routeset() error in ioctl: %s\n", strerror(errno)); + if (netlink_send(&req.nh) < 0) + LOG(0, 0, 0, "routeset() error in sending netlink message: %s\n", strerror(errno)); #ifdef BGP if (add) - bgp_add_route(htonl(ip), htonl(mask)); + bgp_add_route(htonl(ip), prefixlen); else - bgp_del_route(htonl(ip), htonl(mask)); + bgp_del_route(htonl(ip), prefixlen); #endif /* BGP */ // Add/Remove the IPs to the 'sessionbyip' cache. @@@ -470,45 -490,59 +492,64 @@@ if (!add) // Are we deleting a route? s = 0; // Caching the session as '0' is the same as uncaching. - for (i = ip; (i&mask) == (ip&mask) ; ++i) + for (i = ip; i < ip+(1<<(32-prefixlen)) ; ++i) cache_ipmap(i, s); } } void route6set(sessionidt s, struct in6_addr ip, int prefixlen, int add) { - struct in6_rtmsg rt; + struct { + struct nlmsghdr nh; + struct rtmsg rt; + char buf[64]; + } req; + int metric; char ipv6addr[INET6_ADDRSTRLEN]; - if (ifr6fd < 0) + if (!config->ipv6_prefix.s6_addr[0]) { LOG(0, 0, 0, "Asked to set IPv6 route, but IPv6 not setup.\n"); return; } - memset(&rt, 0, sizeof(rt)); + memset(&req, 0, sizeof(req)); - memcpy(&rt.rtmsg_dst, &ip, sizeof(struct in6_addr)); - rt.rtmsg_dst_len = prefixlen; - rt.rtmsg_metric = 1; - rt.rtmsg_flags = RTF_UP; - rt.rtmsg_ifindex = tunidx; + if (add) + { + req.nh.nlmsg_type = RTM_NEWROUTE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE; + } + else + req.nh.nlmsg_type = RTM_DELROUTE; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.rt)); + + req.rt.rtm_family = AF_INET6; + req.rt.rtm_dst_len = prefixlen; + req.rt.rtm_table = RT_TABLE_MAIN; + req.rt.rtm_protocol = RTPROT_BOOT; // XXX + req.rt.rtm_scope = RT_SCOPE_LINK; + req.rt.rtm_type = RTN_UNICAST; + + netlink_addattr(&req.nh, RTA_OIF, &tunidx, sizeof(int)); + netlink_addattr(&req.nh, RTA_DST, &ip, sizeof(ip)); + metric = 1; + netlink_addattr(&req.nh, RTA_METRICS, &metric, sizeof(metric)); LOG(1, 0, 0, "Route %s %s/%d\n", add ? "add" : "del", inet_ntop(AF_INET6, &ip, ipv6addr, INET6_ADDRSTRLEN), prefixlen); - if (ioctl(ifr6fd, add ? SIOCADDRT : SIOCDELRT, (void *) &rt) < 0) - LOG(0, 0, 0, "route6set() error in ioctl: %s\n", - strerror(errno)); + if (netlink_send(&req.nh) < 0) + LOG(0, 0, 0, "route6set() error in sending netlink message: %s\n", strerror(errno)); - // FIXME: need to add BGP routing (RFC2858) +#ifdef BGP + if (add) + bgp_add_route6(ip, prefixlen); + else + bgp_del_route6(ip, prefixlen); +#endif /* BGP */ if (s) { @@@ -521,21 -555,95 +562,95 @@@ return; } - // defined in linux/ipv6.h, but tricky to include from user-space - // TODO: move routing to use netlink rather than ioctl - struct in6_ifreq { - struct in6_addr ifr6_addr; - __u32 ifr6_prefixlen; - unsigned int ifr6_ifindex; + // + // Set up netlink socket + static void initnetlink(void) + { + struct sockaddr_nl nladdr; + + nlfd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (nlfd < 0) + { + LOG(0, 0, 0, "Can't create netlink socket: %s\n", strerror(errno)); + exit(1); + } + + memset(&nladdr, 0, sizeof(nladdr)); + nladdr.nl_family = AF_NETLINK; + nladdr.nl_pid = getpid(); + + if (bind(nlfd, (struct sockaddr *)&nladdr, sizeof(nladdr)) < 0) + { + LOG(0, 0, 0, "Can't bind netlink socket: %s\n", strerror(errno)); + exit(1); + } + } + + static ssize_t netlink_send(struct nlmsghdr *nh) + { + struct sockaddr_nl nladdr; + struct iovec iov; + struct msghdr msg; + + nh->nlmsg_pid = getpid(); + nh->nlmsg_seq = ++nlseqnum; + + // set kernel address + memset(&nladdr, 0, sizeof(nladdr)); + nladdr.nl_family = AF_NETLINK; + + iov = (struct iovec){ (void *)nh, nh->nlmsg_len }; + msg = (struct msghdr){ (void *)&nladdr, sizeof(nladdr), &iov, 1, NULL, 0, 0 }; + + return sendmsg(nlfd, &msg, 0); + } + + static ssize_t netlink_recv(void *buf, ssize_t len) + { + struct sockaddr_nl nladdr; + struct iovec iov; + struct msghdr msg; + + // set kernel address + memset(&nladdr, 0, sizeof(nladdr)); + nladdr.nl_family = AF_NETLINK; + + iov = (struct iovec){ buf, len }; + msg = (struct msghdr){ (void *)&nladdr, sizeof(nladdr), &iov, 1, NULL, 0, 0 }; + + return recvmsg(nlfd, &msg, 0); + } + + /* adapted from iproute2 */ + static void netlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen) + { + int len = RTA_LENGTH(alen); + struct rtattr *rta; + + rta = (struct rtattr *)(((void *)nh) + NLMSG_ALIGN(nh->nlmsg_len)); + rta->rta_type = type; + rta->rta_len = len; + memcpy(RTA_DATA(rta), data, alen); + nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + RTA_ALIGN(len); + } + + // messages corresponding to different phases seq number + static char *tun_nl_phase_msg[] = { + "initialized", + "getting tun interface index", + "setting tun interface parameters", + "setting tun IPv4 address", + "setting tun LL IPv6 address", + "setting tun global IPv6 address", }; // // Set up TUN interface static void inittun(void) { + struct ifinfomsg ifinfo; struct ifreq ifr; - struct in6_ifreq ifr6; - struct sockaddr_in sin = {0}; + memset(&ifr, 0, sizeof(ifr)); ifr.ifr_flags = IFF_TUN; @@@ -554,75 -662,163 +669,163 @@@ LOG(0, 0, 0, "Can't set tun interface: %s\n", strerror(errno)); exit(1); } - assert(strlen(ifr.ifr_name) < sizeof(config->tundevice)); - strncpy(config->tundevice, ifr.ifr_name, sizeof(config->tundevice) - 1); - ifrfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); - - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = config->bind_address ? config->bind_address : 0x01010101; // 1.1.1.1 - memcpy(&ifr.ifr_addr, &sin, sizeof(struct sockaddr)); + assert(strlen(ifr.ifr_name) < sizeof(config->tundevice) - 1); + strncpy(config->tundevice, ifr.ifr_name, sizeof(config->tundevice)); - if (ioctl(ifrfd, SIOCSIFADDR, (void *) &ifr) < 0) { - LOG(0, 0, 0, "Error setting tun address: %s\n", strerror(errno)); - exit(1); - } - /* Bump up the qlen to deal with bursts from the network */ - ifr.ifr_qlen = 1000; - if (ioctl(ifrfd, SIOCSIFTXQLEN, (void *) &ifr) < 0) - { - LOG(0, 0, 0, "Error setting tun queue length: %s\n", strerror(errno)); - exit(1); - } - /* set MTU to modem MRU */ - ifr.ifr_mtu = MRU; - if (ioctl(ifrfd, SIOCSIFMTU, (void *) &ifr) < 0) - { - LOG(0, 0, 0, "Error setting tun MTU: %s\n", strerror(errno)); - exit(1); - } - ifr.ifr_flags = IFF_UP; - if (ioctl(ifrfd, SIOCSIFFLAGS, (void *) &ifr) < 0) - { - LOG(0, 0, 0, "Error setting tun flags: %s\n", strerror(errno)); - exit(1); - } - if (ioctl(ifrfd, SIOCGIFINDEX, (void *) &ifr) < 0) - { - LOG(0, 0, 0, "Error getting tun ifindex: %s\n", strerror(errno)); - exit(1); - } - tunidx = ifr.ifr_ifindex; + // get the interface index + struct { + struct nlmsghdr nh; + struct ifinfomsg ifinfo; + } req; + char buf[4096]; + ssize_t len; + struct nlmsghdr *resp_nh; + + req.nh.nlmsg_type = RTM_GETLINK; + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifinfo)); - // Only setup IPv6 on the tun device if we have a configured prefix - if (config->ipv6_prefix.s6_addr[0]) { - ifr6fd = socket(PF_INET6, SOCK_DGRAM, 0); + req.ifinfo.ifi_family = AF_UNSPEC; // as the man says - // Link local address is FE80::1 - memset(&ifr6.ifr6_addr, 0, sizeof(ifr6.ifr6_addr)); - ifr6.ifr6_addr.s6_addr[0] = 0xFE; - ifr6.ifr6_addr.s6_addr[1] = 0x80; - ifr6.ifr6_addr.s6_addr[15] = 1; - ifr6.ifr6_prefixlen = 64; - ifr6.ifr6_ifindex = ifr.ifr_ifindex; - if (ioctl(ifr6fd, SIOCSIFADDR, (void *) &ifr6) < 0) + netlink_addattr(&req.nh, IFLA_IFNAME, config->tundevice, strlen(config->tundevice)+1); + + if(netlink_send(&req.nh) < 0 || (len = netlink_recv(buf, sizeof(buf))) < 0) { - LOG(0, 0, 0, "Error setting tun IPv6 link local address:" - " %s\n", strerror(errno)); + LOG(0, 0, 0, "Error getting tun ifindex: %s\n", strerror(errno)); + exit(1); } - // Global address is prefix::1 - memset(&ifr6.ifr6_addr, 0, sizeof(ifr6.ifr6_addr)); - ifr6.ifr6_addr = config->ipv6_prefix; - ifr6.ifr6_addr.s6_addr[15] = 1; - ifr6.ifr6_prefixlen = 64; - ifr6.ifr6_ifindex = ifr.ifr_ifindex; - if (ioctl(ifr6fd, SIOCSIFADDR, (void *) &ifr6) < 0) + resp_nh = (struct nlmsghdr *)buf; + if (!NLMSG_OK (resp_nh, len)) { - LOG(0, 0, 0, "Error setting tun IPv6 global address: %s\n", - strerror(errno)); + LOG(0, 0, 0, "Malformed answer getting tun ifindex %ld\n", len); + exit(1); + } + + memcpy(&ifinfo, NLMSG_DATA(resp_nh), sizeof(ifinfo)); + // got index + tunidx = ifinfo.ifi_index; + } + { + struct { + // interface setting + struct nlmsghdr nh; + union { + struct ifinfomsg ifinfo; + struct ifaddrmsg ifaddr; + } ifmsg; + char rtdata[32]; // 32 should be enough + } req; + uint32_t txqlen, mtu; + in_addr_t ip; + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = RTM_SETLINK; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_MULTI; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifinfo)); + + req.ifmsg.ifinfo = ifinfo; + req.ifmsg.ifinfo.ifi_flags |= IFF_UP; // set interface up + req.ifmsg.ifinfo.ifi_change = IFF_UP; // only change this flag + + /* Bump up the qlen to deal with bursts from the network */ + txqlen = 1000; + netlink_addattr(&req.nh, IFLA_TXQLEN, &txqlen, sizeof(txqlen)); + /* set MTU to modem MRU */ + mtu = MRU; + netlink_addattr(&req.nh, IFLA_MTU, &mtu, sizeof(mtu)); + + if (netlink_send(&req.nh) < 0) + goto senderror; + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = RTM_NEWADDR; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifaddr)); + + req.ifmsg.ifaddr.ifa_family = AF_INET; + req.ifmsg.ifaddr.ifa_prefixlen = 32; + req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_UNIVERSE; + req.ifmsg.ifaddr.ifa_index = ifinfo.ifi_index; + + if (config->bind_address) + ip = config->bind_address; + else + ip = 0x01010101; // 1.1.1.1 + netlink_addattr(&req.nh, IFA_LOCAL, &ip, sizeof(ip)); + + if (netlink_send(&req.nh) < 0) + goto senderror; + + // Only setup IPv6 on the tun device if we have a configured prefix + if (config->ipv6_prefix.s6_addr[0]) { + struct in6_addr ip6; + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = RTM_NEWADDR; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifaddr)); + + req.ifmsg.ifaddr.ifa_family = AF_INET6; + req.ifmsg.ifaddr.ifa_prefixlen = 64; + req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_LINK; + req.ifmsg.ifaddr.ifa_index = ifinfo.ifi_index; + + // Link local address is FE80::1 + memset(&ip6, 0, sizeof(ip6)); + ip6.s6_addr[0] = 0xFE; + ip6.s6_addr[1] = 0x80; + ip6.s6_addr[15] = 1; + netlink_addattr(&req.nh, IFA_LOCAL, &ip6, sizeof(ip6)); + + if (netlink_send(&req.nh) < 0) + goto senderror; + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = RTM_NEWADDR; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI; + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifaddr)); + + req.ifmsg.ifaddr.ifa_family = AF_INET6; + req.ifmsg.ifaddr.ifa_prefixlen = 64; + req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_UNIVERSE; + req.ifmsg.ifaddr.ifa_index = ifinfo.ifi_index; + + // Global address is prefix::1 + ip6 = config->ipv6_prefix; + ip6.s6_addr[15] = 1; + netlink_addattr(&req.nh, IFA_LOCAL, &ip6, sizeof(ip6)); + + if (netlink_send(&req.nh) < 0) + goto senderror; } + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_type = NLMSG_DONE; + req.nh.nlmsg_len = NLMSG_LENGTH(0); + + if (netlink_send(&req.nh) < 0) + goto senderror; + + // if we get an error for seqnum < min_initok_nlseqnum, + // we must exit as initialization went wrong + if (config->ipv6_prefix.s6_addr[0]) + min_initok_nlseqnum = 5 + 1; // idx + if + addr + 2*addr6 + else + min_initok_nlseqnum = 3 + 1; // idx + if + addr } + + return; + + senderror: + LOG(0, 0, 0, "Error while setting up tun device: %s\n", strerror(errno)); + exit(1); } // set up UDP ports @@@ -1768,11 -1964,11 +1971,11 @@@ void sessionshutdown(sessionidt s, cha int routed = 0; for (r = 0; r < MAXROUTE && session[s].route[r].ip; r++) { - if ((session[s].ip & session[s].route[r].mask) == - (session[s].route[r].ip & session[s].route[r].mask)) + if ((session[s].ip >> (32-session[s].route[r].prefixlen)) == + (session[s].route[r].ip >> (32-session[s].route[r].prefixlen))) routed++; - if (del_routes) routeset(s, session[s].route[r].ip, session[s].route[r].mask, 0, 0); + if (del_routes) routeset(s, session[s].route[r].ip, session[s].route[r].prefixlen, 0, 0); session[s].route[r].ip = 0; } @@@ -3404,36 -3600,35 +3607,36 @@@ static int still_busy(void static clockt last_talked = 0; static clockt start_busy_wait = 0; - if (!config->cluster_iam_master) - { #ifdef BGP - static time_t stopped_bgp = 0; - if (bgp_configured) + static time_t stopped_bgp = 0; + if (bgp_configured) + { + if (!stopped_bgp) { - if (!stopped_bgp) - { - LOG(1, 0, 0, "Shutting down in %d seconds, stopping BGP...\n", QUIT_DELAY); + LOG(1, 0, 0, "Shutting down in %d seconds, stopping BGP...\n", QUIT_DELAY); - for (i = 0; i < BGP_NUM_PEERS; i++) - if (bgp_peers[i].state == Established) - bgp_stop(&bgp_peers[i]); + for (i = 0; i < BGP_NUM_PEERS; i++) + if (bgp_peers[i].state == Established) + bgp_stop(&bgp_peers[i]); - stopped_bgp = time_now; + stopped_bgp = time_now; + if (!config->cluster_iam_master) + { // we don't want to become master cluster_send_ping(0); return 1; } - - if (time_now < (stopped_bgp + QUIT_DELAY)) - return 1; } + + if (!config->cluster_iam_master && time_now < (stopped_bgp + QUIT_DELAY)) + return 1; + } #endif /* BGP */ + if (!config->cluster_iam_master) return 0; - } if (main_quit == QUIT_SHUTDOWN) { @@@ -3499,8 -3694,8 +3702,8 @@@ # include "fake_epoll.h" #endif - // the base set of fds polled: cli, cluster, tun, udp, control, dae - #define BASE_FDS 6 + // the base set of fds polled: cli, cluster, tun, udp, control, dae, netlink + #define BASE_FDS 7 // additional polled fds #ifdef BGP @@@ -3524,8 -3719,8 +3727,8 @@@ static void mainloop(void exit(1); } - LOG(4, 0, 0, "Beginning of main loop. clifd=%d, cluster_sockfd=%d, tunfd=%d, udpfd=%d, controlfd=%d, daefd=%d\n", - clifd, cluster_sockfd, tunfd, udpfd, controlfd, daefd); + LOG(4, 0, 0, "Beginning of main loop. clifd=%d, cluster_sockfd=%d, tunfd=%d, udpfd=%d, controlfd=%d, daefd=%d, nlfd=%d\n", + clifd, cluster_sockfd, tunfd, udpfd, controlfd, daefd, nlfd); /* setup our fds to poll for input */ { @@@ -3561,6 -3756,10 +3764,10 @@@ d[i].type = FD_TYPE_DAE; e.data.ptr = &d[i++]; epoll_ctl(epollfd, EPOLL_CTL_ADD, daefd, &e); + + d[i].type = FD_TYPE_NETLINK; + e.data.ptr = &d[i++]; + epoll_ctl(epollfd, EPOLL_CTL_ADD, nlfd, &e); } #ifdef BGP @@@ -3574,8 -3773,7 +3781,8 @@@ if (config->neighbour[i].name[0]) bgp_start(&bgp_peers[i], config->neighbour[i].name, config->neighbour[i].as, config->neighbour[i].keepalive, - config->neighbour[i].hold, 0); /* 0 = routing disabled */ + config->neighbour[i].hold, config->neighbour[i].update_source, + 0); /* 0 = routing disabled */ } #endif /* BGP */ @@@ -3698,6 -3896,32 +3905,32 @@@ break; #endif /* BGP */ + case FD_TYPE_NETLINK: + { + struct nlmsghdr *nh = (struct nlmsghdr *)buf; + s = netlink_recv(buf, sizeof(buf)); + if (nh->nlmsg_type == NLMSG_ERROR) + { + struct nlmsgerr *errmsg = NLMSG_DATA(nh); + if (errmsg->error) + { + if (errmsg->msg.nlmsg_seq < min_initok_nlseqnum) + { + LOG(0, 0, 0, "Got a fatal netlink error (while %s): %s\n", tun_nl_phase_msg[nh->nlmsg_seq], strerror(-errmsg->error)); + exit(1); + } + else + + LOG(0, 0, 0, "Got a netlink error: %s\n", strerror(-errmsg->error)); + } + // else it's a ack + } + else + LOG(1, 0, 0, "Got a unknown netlink message: type %d seq %d flags %d\n", nh->nlmsg_type, nh->nlmsg_seq, nh->nlmsg_flags); + n--; + break; + } + default: LOG(0, 0, 0, "Unexpected fd type returned from epoll_wait: %d\n", d->type); } @@@ -3769,11 -3993,6 +4002,11 @@@ more++; } } +#ifdef BGP + else + /* no event received, but timers could still have expired */ + bgp_process_peers_timers(); +#endif /* BGP */ if (time_changed) { @@@ -4081,14 -4300,9 +4314,14 @@@ static void initdata(int optdebug, cha if (!*hostname) { - // Grab my hostname unless it's been specified - gethostname(hostname, sizeof(hostname)); - stripdomain(hostname); + if (!*config->hostname) + { + // Grab my hostname unless it's been specified + gethostname(hostname, sizeof(hostname)); + stripdomain(hostname); + } + else + strcpy(hostname, config->hostname); } _statistics->start_time = _statistics->last_reset = time(NULL); @@@ -4266,18 -4480,18 +4499,18 @@@ static void fix_address_pool(int sid // // Add a block of addresses to the IP pool to hand out. // - static void add_to_ip_pool(in_addr_t addr, in_addr_t mask) + static void add_to_ip_pool(in_addr_t addr, int prefixlen) { int i; - if (mask == 0) - mask = 0xffffffff; // Host route only. + if (prefixlen == 0) + prefixlen = 32; // Host route only. - addr &= mask; + addr &= 0xffffffff << (32 - prefixlen); if (ip_pool_size >= MAXIPPOOL) // Pool is full! return ; - for (i = addr ;(i & mask) == addr; ++i) + for (i = addr ; i < addr+(1<<(32-prefixlen)); ++i) { if ((i & 0xff) == 0 || (i&0xff) == 255) continue; // Skip 0 and broadcast addresses. @@@ -4335,7 -4549,7 +4568,7 @@@ static void initippool( { // It's a range int numbits = 0; - in_addr_t start = 0, mask = 0; + in_addr_t start = 0; LOG(2, 0, 0, "Adding IP address range %s\n", buf); *p++ = 0; @@@ -4345,15 -4559,14 +4578,14 @@@ continue; } start = ntohl(inet_addr(pool)); - mask = (in_addr_t) (pow(2, numbits) - 1) << (32 - numbits); // Add a static route for this pool - LOG(5, 0, 0, "Adding route for address pool %s/%u\n", - fmtaddr(htonl(start), 0), 32 + mask); + LOG(5, 0, 0, "Adding route for address pool %s/%d\n", + fmtaddr(htonl(start), 0), numbits); - routeset(0, start, mask, 0, 1); + routeset(0, start, numbits, 0, 1); - add_to_ip_pool(start, mask); + add_to_ip_pool(start, numbits); } else { @@@ -4549,6 -4762,8 +4781,8 @@@ int main(int argc, char *argv[] } } + initnetlink(); + /* Set up the cluster communications port. */ if (cluster_init() < 0) exit(1); @@@ -5000,11 -5215,11 +5234,11 @@@ int sessionsetup(sessionidt s, tunnelid // Add the route for this session. for (r = 0; r < MAXROUTE && session[s].route[r].ip; r++) { - if ((session[s].ip & session[s].route[r].mask) == - (session[s].route[r].ip & session[s].route[r].mask)) + if ((session[s].ip >> (32-session[s].route[r].prefixlen)) == + (session[s].route[r].ip >> (32-session[s].route[r].prefixlen))) routed++; - routeset(s, session[s].route[r].ip, session[s].route[r].mask, 0, 1); + routeset(s, session[s].route[r].ip, session[s].route[r].prefixlen, 0, 1); } // Static IPs need to be routed if not already @@@ -5075,7 -5290,7 +5309,7 @@@ int load_session(sessionidt s, session for (i = 0; !newip && i < MAXROUTE && (session[s].route[i].ip || new->route[i].ip); i++) if (new->route[i].ip != session[s].route[i].ip || - new->route[i].mask != session[s].route[i].mask) + new->route[i].prefixlen != session[s].route[i].prefixlen) newip++; // needs update @@@ -5086,11 -5301,11 +5320,11 @@@ // remove old routes... for (i = 0; i < MAXROUTE && session[s].route[i].ip; i++) { - if ((session[s].ip & session[s].route[i].mask) == - (session[s].route[i].ip & session[s].route[i].mask)) + if ((session[s].ip >> (32-session[s].route[i].prefixlen)) == + (session[s].route[i].ip >> (32-session[s].route[i].prefixlen))) routed++; - routeset(s, session[s].route[i].ip, session[s].route[i].mask, 0, 0); + routeset(s, session[s].route[i].ip, session[s].route[i].prefixlen, 0, 0); } // ...ip @@@ -5109,11 -5324,11 +5343,11 @@@ // add new routes... for (i = 0; i < MAXROUTE && new->route[i].ip; i++) { - if ((new->ip & new->route[i].mask) == - (new->route[i].ip & new->route[i].mask)) + if ((new->ip >> (32-new->route[i].prefixlen)) == + (new->route[i].ip >> (32-new->route[i].prefixlen))) routed++; - routeset(s, new->route[i].ip, new->route[i].mask, 0, 1); + routeset(s, new->route[i].ip, new->route[i].prefixlen, 0, 1); } // ...ip diff --combined l2tpns.h index 4f7fad9,bab2921..5cba32c --- a/l2tpns.h +++ b/l2tpns.h @@@ -38,7 -38,6 +38,7 @@@ #define PPPoE_MRU 1492 // maximum PPPoE MRU (rfc2516: 1500 less PPPoE header (6) and PPP protocol ID (2)) #define MAXETHER (MAXMTU+18) // max packet we try sending to tun #define MAXTEL 96 // telephone number +#define MAXHOSTNAME 256 // hostname #define MAXUSER 128 // username #define MAXPASS 128 // password #define MAXPLUGINS 20 // maximum number of plugins to load @@@ -234,7 -233,7 +234,7 @@@ struct cli_tunnel_actions typedef struct // route { in_addr_t ip; - in_addr_t mask; + int prefixlen; } routet; @@@ -617,6 -616,10 +617,10 @@@ struct Tstat #define SET_STAT(x, y) #endif + #ifndef IFNAMSIZ + # define IFNAMSIZ 16 + #endif + typedef struct { int debug; // debugging level @@@ -632,7 -635,7 +636,7 @@@ int reload_config; // flag to re-read config (set by cli) int multi_read_count; // amount of packets to read per fd in processing loop - char tundevice[10]; // tun device name + char tundevice[IFNAMSIZ]; // tun device name char log_filename[128]; char l2tp_secret[64]; // L2TP shared secret @@@ -713,8 -716,6 +717,8 @@@ int cluster_master_min_adv; // Master advertises routes while the number of up to date // slaves is less than this value. + in_addr_t cli_bind_address; // bind address for CLI + char hostname[MAXHOSTNAME]; // hostname (overridden by -h on command line) // Guest change char guest_user[MAXUSER]; // Guest account username @@@ -726,10 -727,7 +730,10 @@@ uint16_t as; int keepalive; int hold; + struct in_addr update_source; } neighbour[BGP_NUM_PEERS]; + in_addr_t nexthop_address; + struct in6_addr nexthop6_address; #endif } configt; @@@ -941,6 -939,7 +945,7 @@@ struct event_data FD_TYPE_DAE, FD_TYPE_RADIUS, FD_TYPE_BGP, + FD_TYPE_NETLINK, } type; int index; // for RADIUS, BGP };