static void bgp_clear(struct bgp_peer *peer);
static void bgp_set_retry(struct bgp_peer *peer);
- static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx);
+ static void bgp_cidr(in_addr_t ip, int prefixlen, struct bgp_ip_prefix *pfx);
static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
struct bgp_route_list *new);
+static struct bgp_route6_list *bgp_insert_route6(struct bgp_route6_list *head,
+ struct bgp_route6_list *new);
+static void bgp_process_timers(struct bgp_peer *peer);
static void bgp_free_routes(struct bgp_route_list *routes);
+static void bgp_free_routes6(struct bgp_route6_list *routes);
static char const *bgp_msg_type_str(uint8_t type);
static int bgp_connect(struct bgp_peer *peer);
static int bgp_handle_connect(struct bgp_peer *peer);
static int bgp_send_open(struct bgp_peer *peer);
static int bgp_send_keepalive(struct bgp_peer *peer);
static int bgp_send_update(struct bgp_peer *peer);
+static int bgp_send_update6(struct bgp_peer *peer);
static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
uint8_t subcode);
static uint16_t our_as;
static struct bgp_route_list *bgp_routes = 0;
+static struct bgp_route6_list *bgp_routes6 = 0;
int bgp_configured = 0;
struct bgp_peer *bgp_peers = 0;
return 0;
bgp_routes = 0;
+ bgp_routes6 = 0;
bgp_configured = 0; /* set by bgp_start */
return 1;
/* start connection with a peer */
int bgp_start(struct bgp_peer *peer, char *name, int as, int keepalive,
- int hold, int enable)
+ int hold, struct in_addr update_source, int enable)
{
struct hostent *h;
int ibgp;
}
memcpy(&peer->addr, h->h_addr, sizeof(peer->addr));
+ peer->source_addr = update_source.s_addr;
peer->as = as > 0 ? as : our_as;
ibgp = peer->as == our_as;
ADD_ATTRIBUTE();
- /* NEXT_HOP */
- a.flags = BGP_PATH_ATTR_FLAG_TRANS;
- a.code = BGP_PATH_ATTR_CODE_NEXT_HOP;
- ip = my_address; /* we're it */
- a.data.s.len = sizeof(ip);
- memcpy(a.data.s.value, &ip, sizeof(ip));
-
- ADD_ATTRIBUTE();
-
/* MULTI_EXIT_DISC */
a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL;
a.code = BGP_PATH_ATTR_CODE_MULTI_EXIT_DISC;
ADD_ATTRIBUTE();
+ /* remember the len before adding NEXT_HOP */
+ peer->path_attr_len_without_nexthop = peer->path_attr_len;
+
+ /* NEXT_HOP */
+ a.flags = BGP_PATH_ATTR_FLAG_TRANS;
+ a.code = BGP_PATH_ATTR_CODE_NEXT_HOP;
+ if (config->nexthop_address)
+ {
+ ip = config->nexthop_address;
+ }
+ else
+ {
+ ip = my_address; /* we're it */
+ }
+ a.data.s.len = sizeof(ip);
+ memcpy(a.data.s.value, &ip, sizeof(ip));
+
+ ADD_ATTRIBUTE();
+
if (!(peer->path_attrs = malloc(peer->path_attr_len)))
{
LOG(0, 0, 0, "Can't allocate path_attrs for %s (%s)\n",
memcpy(peer->path_attrs, path_attrs, peer->path_attr_len);
+ /* multiprotocol attributes initialization */
+ if (config->ipv6_prefix.s6_addr[0])
+ {
+ struct bgp_attr_mp_reach_nlri_partial mp_reach_nlri_partial;
+ struct bgp_attr_mp_unreach_nlri_partial mp_unreach_nlri_partial;
+
+ a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL;
+ a.code = BGP_PATH_ATTR_CODE_MP_REACH_NLRI;
+ a.data.s.len = 0; /* will be set on UPDATE */
+
+ mp_reach_nlri_partial.afi = htons(BGP_MP_AFI_IPv6);
+ mp_reach_nlri_partial.safi = BGP_MP_SAFI_UNICAST;
+ mp_reach_nlri_partial.reserved = 0;
+ mp_reach_nlri_partial.next_hop_len = 16;
+
+ /* use the defined nexthop6, or our address in ipv6_prefix */
+ if (config->nexthop6_address.s6_addr[0])
+ memcpy(&mp_reach_nlri_partial.next_hop,
+ &config->nexthop6_address.s6_addr, 16);
+ else
+ {
+ /* our address is ipv6prefix::1 */
+ memcpy(&mp_reach_nlri_partial.next_hop,
+ &config->ipv6_prefix.s6_addr, 16);
+ mp_reach_nlri_partial.next_hop[15] = 1;
+ }
+
+ memcpy(&a.data.s.value, &mp_reach_nlri_partial,
+ sizeof(struct bgp_attr_mp_reach_nlri_partial));
+ memcpy(&peer->mp_reach_nlri_partial, &a,
+ BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE);
+
+ a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL | BGP_PATH_ATTR_FLAG_EXTLEN;
+ a.code = BGP_PATH_ATTR_CODE_MP_UNREACH_NLRI;
+ a.data.e.len = 0; /* will be set on UPDATE */
+
+ mp_unreach_nlri_partial.afi = htons(BGP_MP_AFI_IPv6);
+ mp_unreach_nlri_partial.safi = BGP_MP_SAFI_UNICAST;
+
+ memcpy(&a.data.e.value, &mp_unreach_nlri_partial,
+ sizeof(struct bgp_attr_mp_unreach_nlri_partial));
+ memcpy(&peer->mp_unreach_nlri_partial, &a,
+ BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE);
+ }
+
+ peer->mp_handling = HandlingUnknown;
+
LOG(4, 0, 0, "Initiating BGP connection to %s (routing %s)\n",
name, enable ? "enabled" : "suspended");
bgp_free_routes(peer->routes);
peer->routes = 0;
+ bgp_free_routes6(peer->routes6);
+ peer->routes6 = 0;
peer->outbuf->packet.header.len = 0;
peer->outbuf->done = 0;
bgp_halt(peer); /* give up */
}
- /* convert ip/mask to CIDR notation */
- static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx)
- {
- int i;
- uint32_t b;
-
- /* convert to prefix notation */
- pfx->len = 32;
- pfx->prefix = ip;
-
- if (!mask) /* bogus */
- mask = 0xffffffff;
-
- for (i = 0; i < 32 && ((b = ntohl(1 << i)), !(mask & b)); i++)
- {
- pfx->len--;
- pfx->prefix &= ~b;
- }
- }
-
/* insert route into list; sorted */
static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
struct bgp_route_list *new)
return head;
}
+/* insert route6 into list; sorted */
+static struct bgp_route6_list *bgp_insert_route6(struct bgp_route6_list *head,
+ struct bgp_route6_list *new)
+{
+ struct bgp_route6_list *p = head;
+ struct bgp_route6_list *e = 0;
+
+ while (p && memcmp(&p->dest, &new->dest, sizeof(p->dest)) < 0)
+ {
+ e = p;
+ p = p->next;
+ }
+
+ if (e)
+ {
+ new->next = e->next;
+ e->next = new;
+ }
+ else
+ {
+ new->next = head;
+ head = new;
+ }
+
+ return head;
+}
+
/* add route to list for peers */
/*
* Note: this doesn't do route aggregation, nor drop routes if a less
* that if that route is later deleted we don't have to be concerned
* about adding back the more specific one).
*/
- int bgp_add_route(in_addr_t ip, in_addr_t mask)
+ int bgp_add_route(in_addr_t ip, int prefixlen)
{
struct bgp_route_list *r = bgp_routes;
struct bgp_route_list add;
int i;
- bgp_cidr(ip, mask, &add.dest);
+ add.dest.prefix = ip;
+ add.dest.len = prefixlen;
add.next = 0;
/* check for duplicate */
return 1;
}
+/* add route to list for peers */
+/*
+ * Note: same provisions as above
+ */
+int bgp_add_route6(struct in6_addr ip, int prefixlen)
+{
+ struct bgp_route6_list *r = bgp_routes6;
+ struct bgp_route6_list add;
+ int i;
+ char ipv6addr[INET6_ADDRSTRLEN];
+
+ memcpy(&add.dest.prefix, &ip.s6_addr, 16);
+ add.dest.len = prefixlen;
+ add.next = 0;
+
+ /* check for duplicate */
+ while (r)
+ {
+ i = memcmp(&r->dest, &add.dest, sizeof(r->dest));
+ if (!i)
+ return 1; /* already covered */
+
+ if (i > 0)
+ break;
+
+ r = r->next;
+ }
+
+ /* insert into route list; sorted */
+ if (!(r = malloc(sizeof(*r))))
+ {
+ LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
+ inet_ntop(AF_INET6, &ip, ipv6addr, INET6_ADDRSTRLEN), add.dest.len,
+ strerror(errno));
+
+ return 0;
+ }
+
+ memcpy(r, &add, sizeof(*r));
+ bgp_routes6 = bgp_insert_route6(bgp_routes6, r);
+
+ /* flag established peers for update */
+ for (i = 0; i < BGP_NUM_PEERS; i++)
+ if (bgp_peers[i].state == Established
+ && bgp_peers[i].mp_handling == HandleIPv6Routes)
+ bgp_peers[i].update_routes6 = 1;
+
+ LOG(4, 0, 0, "Registered BGP route %s/%d\n",
+ inet_ntop(AF_INET6, &ip, ipv6addr, INET6_ADDRSTRLEN), add.dest.len);
+
+ return 1;
+}
+
/* remove route from list for peers */
- int bgp_del_route(in_addr_t ip, in_addr_t mask)
+ int bgp_del_route(in_addr_t ip, int prefixlen)
{
struct bgp_route_list *r = bgp_routes;
struct bgp_route_list *e = 0;
struct bgp_route_list del;
int i;
- bgp_cidr(ip, mask, &del.dest);
+ del.dest.prefix = ip;
+ del.dest.len = prefixlen;
del.next = 0;
/* find entry in routes list and remove */
return 1;
}
+/* remove route from list for peers */
+int bgp_del_route6(struct in6_addr ip, int prefixlen)
+{
+ struct bgp_route6_list *r = bgp_routes6;
+ struct bgp_route6_list *e = 0;
+ struct bgp_route6_list del;
+ int i;
+ char ipv6addr[INET6_ADDRSTRLEN];
+
+ memcpy(&del.dest.prefix, &ip.s6_addr, 16);
+ del.dest.len = prefixlen;
+ del.next = 0;
+
+ /* find entry in routes list and remove */
+ while (r)
+ {
+ i = memcmp(&r->dest, &del.dest, sizeof(r->dest));
+ if (!i)
+ {
+ if (e)
+ e->next = r->next;
+ else
+ bgp_routes6 = r->next;
+
+ free(r);
+ break;
+ }
+
+ e = r;
+
+ if (i > 0)
+ r = 0; /* stop */
+ else
+ r = r->next;
+ }
+
+ /* not found */
+ if (!r)
+ return 1;
+
+ /* flag established peers for update */
+ for (i = 0; i < BGP_NUM_PEERS; i++)
+ if (bgp_peers[i].state == Established
+ && bgp_peers[i].mp_handling == HandleIPv6Routes)
+ bgp_peers[i].update_routes6 = 1;
+
+ LOG(4, 0, 0, "Removed BGP route %s/%d\n",
+ inet_ntop(AF_INET6, &ip, ipv6addr, INET6_ADDRSTRLEN), del.dest.len);
+
+ return 1;
+}
+
/* enable or disable routing */
void bgp_enable_routing(int enable)
{
continue;
}
- /* process timers */
- if (peer->state == Established)
+ /* process pending IPv6 updates */
+ if (peer->update_routes6
+ && !peer->outbuf->packet.header.len) /* ditto */
{
- if (time_now > peer->expire_time)
- {
- LOG(1, 0, 0, "No message from BGP peer %s in %ds\n",
- peer->name, peer->hold);
-
- bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0);
+ if (!bgp_send_update6(peer))
continue;
- }
-
- if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len)
- bgp_send_keepalive(peer);
- }
- else if (peer->state == Idle)
- {
- if (time_now > peer->retry_time)
- bgp_connect(peer);
}
- else if (time_now > peer->state_time + BGP_STATE_TIME)
+
+ /* process timers */
+ bgp_process_timers(peer);
+ }
+
+ return 1;
+}
+
+/* process bgp timers only */
+void bgp_process_peers_timers()
+{
+ int i;
+
+ if (!bgp_configured)
+ return;
+
+ for (i = 0; i < BGP_NUM_PEERS; i++)
+ {
+ struct bgp_peer *peer = &bgp_peers[i];
+
+ if (peer->state == Disabled)
+ continue;
+
+ bgp_process_timers(peer);
+ }
+}
+
+static void bgp_process_timers(struct bgp_peer *peer)
+{
+ if (peer->state == Established)
+ {
+ if (time_now > peer->expire_time)
{
- LOG(1, 0, 0, "%s timer expired for BGP peer %s\n",
- bgp_state_str(peer->state), peer->name);
+ LOG(1, 0, 0, "No message from BGP peer %s in %ds\n",
+ peer->name, peer->hold);
- bgp_restart(peer);
+ bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0);
+ return;
}
+
+ if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len)
+ bgp_send_keepalive(peer);
}
+ else if (peer->state == Idle)
+ {
+ if (time_now > peer->retry_time)
+ bgp_connect(peer);
+ }
+ else if (time_now > peer->state_time + BGP_STATE_TIME)
+ {
+ LOG(1, 0, 0, "%s timer expired for BGP peer %s\n",
+ bgp_state_str(peer->state), peer->name);
- return 1;
+ bgp_restart(peer);
+ }
}
static void bgp_free_routes(struct bgp_route_list *routes)
}
}
+static void bgp_free_routes6(struct bgp_route6_list *routes)
+{
+ struct bgp_route6_list *tmp;
+
+ while ((tmp = routes))
+ {
+ routes = tmp->next;
+ free(tmp);
+ }
+}
+
char const *bgp_state_str(enum bgp_state state)
{
switch (state)
{
static int bgp_port = 0;
struct sockaddr_in addr;
+ struct sockaddr_in source_addr;
struct epoll_event ev;
if (!bgp_port)
/* set to non-blocking */
fcntl(peer->sock, F_SETFL, fcntl(peer->sock, F_GETFL, 0) | O_NONBLOCK);
+ /* set source address */
+ memset(&source_addr, 0, sizeof(source_addr));
+ source_addr.sin_family = AF_INET;
+ source_addr.sin_addr.s_addr = peer->source_addr; /* defaults to INADDR_ANY */
+ if (bind(peer->sock, (struct sockaddr *) &source_addr, sizeof(source_addr)) < 0)
+ {
+ LOG(1, 0, 0, "Can't set source address to %s: %s\n",
+ inet_ntoa(source_addr.sin_addr), strerror(errno));
+
+ bgp_set_retry(peer);
+ return 0;
+ }
+
/* try connect */
memset(&addr, 0, sizeof(addr));
addr.sin_family = AF_INET;
struct bgp_data_open data;
int hold;
int i;
+ off_t param_offset, capability_offset;
+ struct bgp_opt_param *param;
+ uint8_t capabilities_len;
+ char *capabilities = NULL;
+ struct bgp_capability *capability;
+ struct bgp_mp_cap_param *mp_cap;
for (i = 0; i < sizeof(p->header.marker); i++)
{
if (peer->keepalive * 3 > peer->hold)
peer->keepalive = peer->hold / 3;
+ /* check for optional parameters */
+ /* 2 is for the size of type + len (both uint8_t) */
+ for (param_offset = 0;
+ param_offset < data.opt_len;
+ param_offset += 2 + param->len)
+ {
+ param = (struct bgp_opt_param *)((char *)&data.opt_params + param_offset);
+
+ /* sensible check */
+ if (data.opt_len - param_offset < 2
+ || param->len > data.opt_len - param_offset - 2)
+ {
+ LOG(1, 0, 0, "Malformed Optional Parameter list from BGP peer %s\n",
+ peer->name);
+
+ bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_UNSPEC);
+ return 0;
+ }
+
+ /* we know only one parameter type */
+ if (param->type != BGP_PARAM_TYPE_CAPABILITY)
+ {
+ LOG(1, 0, 0, "Unsupported Optional Parameter type %d from BGP peer %s\n",
+ param->type, peer->name);
+
+ bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_UNSUP_PARAM);
+ return 0;
+ }
+
+ capabilities_len = param->len;
+ capabilities = (char *)¶m->value;
+
+ /* look for BGP multiprotocol capability */
+ for (capability_offset = 0;
+ capability_offset < capabilities_len;
+ capability_offset += 2 + capability->len)
+ {
+ capability = (struct bgp_capability *)(capabilities + capability_offset);
+
+ /* sensible check */
+ if (capabilities_len - capability_offset < 2
+ || capability->len > capabilities_len - capability_offset - 2)
+ {
+ LOG(1, 0, 0, "Malformed Capabilities list from BGP peer %s\n",
+ peer->name);
+
+ bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_UNSPEC);
+ return 0;
+ }
+
+ /* we only know one capability code */
+ if (capability->code != BGP_CAP_CODE_MP
+ && capability->len != sizeof(struct bgp_mp_cap_param))
+ {
+ LOG(4, 0, 0, "Unsupported Capability code %d from BGP peer %s\n",
+ capability->code, peer->name);
+
+ /* we don't terminate, still; we just jump to the next one */
+ continue;
+ }
+
+ mp_cap = (struct bgp_mp_cap_param *)&capability->value;
+ /* the only <AFI, SAFI> tuple we support */
+ if (ntohs(mp_cap->afi) != BGP_MP_AFI_IPv6 && mp_cap->safi != BGP_MP_SAFI_UNICAST)
+ {
+ LOG(4, 0, 0, "Unsupported multiprotocol AFI %d and SAFI %d from BGP peer %s\n",
+ mp_cap->afi, mp_cap->safi, peer->name);
+
+ /* we don't terminate, still; we just jump to the next one */
+ continue;
+ }
+
+ /* yes it can! */
+ peer->mp_handling = HandleIPv6Routes;
+ }
+ }
+
+ if (peer->mp_handling != HandleIPv6Routes)
+ {
+ peer->mp_handling = DoesntHandleIPv6Routes;
+ if (config->ipv6_prefix.s6_addr[0])
+ LOG(1, 0, 0, "Warning: BGP peer %s doesn't handle IPv6 prefixes updates\n",
+ peer->name);
+ }
+
/* next transition requires an exchange of keepalives */
bgp_send_keepalive(peer);
-
- /* FIXME: may need to check for optional params */
}
break;
if (notification->error_code == BGP_ERR_CEASE)
{
LOG(4, 0, 0, "BGP peer %s sent CEASE\n", peer->name);
- bgp_restart(peer);
+ bgp_set_retry(peer);
return 0;
}
+ if (notification->error_code == BGP_ERR_OPEN
+ && notification->error_subcode == BGP_ERR_OPN_UNSUP_PARAM)
+ {
+ LOG(4, 0, 0, "BGP peer %s doesn't support BGP Capabilities\n", peer->name);
+ peer->mp_handling = DoesntHandleIPv6Routes;
+ bgp_set_retry(peer);
+ return 0;
+ }
+
+ if (notification->error_code == BGP_ERR_OPEN
+ && notification->error_subcode == BGP_ERR_OPN_UNSUP_CAP)
+ {
+ /* the only capability we advertise is this one, so upon receiving
+ an "unsupported capability" message, we disable IPv6 routes for
+ this peer */
+ LOG(4, 0, 0, "BGP peer %s doesn't support IPv6 routes advertisement\n", peer->name);
+ peer->mp_handling = DoesntHandleIPv6Routes;
+ break;
+ }
+
/* FIXME: should handle more notifications */
LOG(4, 0, 0, "BGP peer %s sent unhandled NOTIFICATION %d\n",
peer->name, (int) notification->error_code);
static int bgp_send_open(struct bgp_peer *peer)
{
struct bgp_data_open data;
+ struct bgp_mp_cap_param mp_ipv6 = { htons(BGP_MP_AFI_IPv6), 0, BGP_MP_SAFI_UNICAST };
+ struct bgp_capability cap_mp_ipv6;
+ struct bgp_opt_param param_cap_mp_ipv6;
uint16_t len = sizeof(peer->outbuf->packet.header);
memset(peer->outbuf->packet.header.marker, 0xff,
data.version = BGP_VERSION;
data.as = htons(our_as);
data.hold_time = htons(peer->hold);
- data.identifier = my_address;
- data.opt_len = 0;
+ /* use the source IP we use as identifier, if available */
+ if (peer->source_addr != INADDR_ANY)
+ data.identifier = peer->source_addr;
+ else
+ data.identifier = my_address;
+
+ /* if we know peer doesn't support MP (mp_handling == DoesntHandleIPv6Routes)
+ then don't add this parameter */
+ if (config->ipv6_prefix.s6_addr[0]
+ && (peer->mp_handling == HandlingUnknown
+ || peer->mp_handling == HandleIPv6Routes))
+ {
+ /* construct the param and capability */
+ cap_mp_ipv6.code = BGP_CAP_CODE_MP;
+ cap_mp_ipv6.len = sizeof(mp_ipv6);
+ memcpy(&cap_mp_ipv6.value, &mp_ipv6, cap_mp_ipv6.len);
+
+ param_cap_mp_ipv6.type = BGP_PARAM_TYPE_CAPABILITY;
+ param_cap_mp_ipv6.len = 2 + sizeof(mp_ipv6);
+ memcpy(¶m_cap_mp_ipv6.value, &cap_mp_ipv6, param_cap_mp_ipv6.len);
+
+ data.opt_len = 2 + param_cap_mp_ipv6.len;
+ memcpy(&data.opt_params, ¶m_cap_mp_ipv6, data.opt_len);
+ }
+ else
+ data.opt_len = 0;
- memcpy(peer->outbuf->packet.data, &data, BGP_DATA_OPEN_SIZE);
- len += BGP_DATA_OPEN_SIZE;
+ memcpy(peer->outbuf->packet.data, &data, BGP_DATA_OPEN_SIZE + data.opt_len);
+ len += BGP_DATA_OPEN_SIZE + data.opt_len;
peer->outbuf->packet.header.len = htons(len);
peer->outbuf->done = 0;
return bgp_write(peer);
}
+/* send/buffer UPDATE message for IPv6 routes */
+static int bgp_send_update6(struct bgp_peer *peer)
+{
+ uint16_t attr_len;
+ uint16_t unreach_len = 0;
+ char *unreach_len_pos;
+ uint8_t reach_len;
+ uint16_t len = sizeof(peer->outbuf->packet.header);
+ struct bgp_route6_list *have = peer->routes6;
+ struct bgp_route6_list *want = peer->routing ? bgp_routes6 : 0;
+ struct bgp_route6_list *e = 0;
+ struct bgp_route6_list *add = 0;
+ int s;
+ char ipv6addr[INET6_ADDRSTRLEN];
+
+ char *data = (char *) &peer->outbuf->packet.data;
+
+ /* need leave room for attr_len, bgp_path_attrs and one prefix */
+ char *max = (char *) &peer->outbuf->packet.data
+ + sizeof(peer->outbuf->packet.data)
+ - sizeof(attr_len) - peer->path_attr_len_without_nexthop
+ - BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE - sizeof(struct bgp_ip6_prefix);
+
+ memset(peer->outbuf->packet.header.marker, 0xff,
+ sizeof(peer->outbuf->packet.header.marker));
+
+ peer->outbuf->packet.header.type = BGP_MSG_UPDATE;
+
+ /* insert non-MP unfeasible routes length */
+ memcpy(data, &unreach_len, sizeof(unreach_len));
+ /* skip over it and attr_len too; it will be filled when known */
+ data += sizeof(unreach_len) + sizeof(attr_len);
+ len += sizeof(unreach_len) + sizeof(attr_len);
+
+ /* copy usual attributes */
+ memcpy(data, peer->path_attrs, peer->path_attr_len_without_nexthop);
+ data += peer->path_attr_len_without_nexthop;
+ attr_len = peer->path_attr_len_without_nexthop;
+
+ /* copy MP unreachable NLRI heading */
+ memcpy(data, peer->mp_unreach_nlri_partial,
+ BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE);
+ /* remember where to update this attr len */
+ unreach_len_pos = data + 2;
+ data += BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE;
+ attr_len += BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE;
+
+ peer->update_routes6 = 0; /* tentatively clear */
+
+ /* find differences */
+ while ((have || want) && data < (max - sizeof(struct bgp_ip6_prefix)))
+ {
+ if (have)
+ s = want
+ ? memcmp(&have->dest, &want->dest, sizeof(have->dest))
+ : -1;
+ else
+ s = 1;
+
+ if (s < 0) /* found one to delete */
+ {
+ struct bgp_route6_list *tmp = have;
+ have = have->next;
+
+ s = BGP_IP_PREFIX_SIZE(tmp->dest);
+ memcpy(data, &tmp->dest, s);
+ data += s;
+ unreach_len += s;
+ attr_len += s;
+
+ LOG(5, 0, 0, "Withdrawing route %s/%d from BGP peer %s\n",
+ inet_ntop(AF_INET6, &tmp->dest.prefix, ipv6addr, INET6_ADDRSTRLEN),
+ tmp->dest.len, peer->name);
+
+ free(tmp);
+
+ if (e)
+ e->next = have;
+ else
+ peer->routes6 = have;
+ }
+ else
+ {
+ if (!s) /* same */
+ {
+ e = have; /* stash the last found to relink above */
+ have = have->next;
+ want = want->next;
+ }
+ else if (s > 0) /* addition reqd. */
+ {
+ if (add)
+ {
+ peer->update_routes6 = 1; /* only one add per packet */
+ if (!have)
+ break;
+ }
+ else
+ add = want;
+
+ if (want)
+ want = want->next;
+ }
+ }
+ }
+
+ if (have || want)
+ peer->update_routes6 = 1; /* more to do */
+
+ /* anything changed? */
+ if (!(unreach_len || add))
+ return 1;
+
+ if (unreach_len)
+ {
+ /* go back and insert MP unreach_len */
+ unreach_len += sizeof(struct bgp_attr_mp_unreach_nlri_partial);
+ unreach_len = htons(unreach_len);
+ memcpy(unreach_len_pos, &unreach_len, sizeof(unreach_len));
+ }
+ else
+ {
+ /* we can remove this attribute, then */
+ data -= BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE;
+ attr_len -= BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE;
+ }
+
+ if (add)
+ {
+ if (!(e = malloc(sizeof(*e))))
+ {
+ LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
+ inet_ntop(AF_INET6, &add->dest.prefix, ipv6addr, INET6_ADDRSTRLEN),
+ add->dest.len, strerror(errno));
+
+ return 0;
+ }
+
+ memcpy(e, add, sizeof(*e));
+ e->next = 0;
+ peer->routes6 = bgp_insert_route6(peer->routes6, e);
+
+ /* copy MP reachable NLRI heading */
+ memcpy(data, peer->mp_reach_nlri_partial,
+ BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE);
+ /* with proper len */
+ reach_len = BGP_IP_PREFIX_SIZE(add->dest);
+ data[2] = sizeof(struct bgp_attr_mp_reach_nlri_partial) + reach_len;
+ data += BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE;
+ attr_len += BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE;
+
+ memcpy(data, &add->dest, reach_len);
+ data += reach_len;
+ attr_len += reach_len;
+
+ LOG(5, 0, 0, "Advertising route %s/%d to BGP peer %s\n",
+ inet_ntop(AF_INET6, &add->dest.prefix, ipv6addr, INET6_ADDRSTRLEN),
+ add->dest.len, peer->name);
+ }
+
+ /* update len with attributes we added */
+ len += attr_len;
+
+ /* go back and insert attr_len */
+ attr_len = htons(attr_len);
+ memcpy((char *)&peer->outbuf->packet.data + 2, &attr_len, sizeof(attr_len));
+
+ peer->outbuf->packet.header.len = htons(len);
+ peer->outbuf->done = 0;
+
+ return bgp_write(peer);
+}
+
/* send/buffer NOTIFICATION message */
static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
uint8_t subcode)
char opt_params[sizeof(((struct bgp_packet *)0)->data) - BGP_DATA_OPEN_SIZE]; /* variable */
} __attribute__ ((packed));
+struct bgp_opt_param {
+ uint8_t type;
+ uint8_t len;
+#define BGP_MAX_OPT_PARAM_SIZE 256
+ char value[BGP_MAX_OPT_PARAM_SIZE];
+} __attribute__ ((packed));
+
+#define BGP_PARAM_TYPE_CAPABILITY 2
+struct bgp_capability {
+ uint8_t code;
+ uint8_t len;
+#define BGP_MAX_CAPABILITY_SIZE 256
+ char value[BGP_MAX_CAPABILITY_SIZE];
+} __attribute__ ((packed));
+
+/* RFC4760 Multiprotocol extension */
+#define BGP_CAP_CODE_MP 1
+
+struct bgp_mp_cap_param {
+ uint16_t afi; /* sa_family_t */
+ uint8_t reserved; /* SHOULD be 0 */
+ uint8_t safi;
+} __attribute__ ((packed));
+
+/* bgp_mp_cap_param.afi */
+#define BGP_MP_AFI_RESERVED 0
+#define BGP_MP_AFI_IPv4 1
+#define BGP_MP_AFI_IPv6 2
+/* bgp_mp_cap_param.safi */
+#define BGP_MP_SAFI_UNICAST 1
+#define BGP_MP_SAFI_MULTICAST 2
+
+struct bgp_ip6_prefix {
+ uint8_t len;
+ uint8_t prefix[16]; /* variable */
+} __attribute__ ((packed));
+
+/* end of RFC4760 specific definitions */
+
struct bgp_ip_prefix {
uint8_t len;
uint32_t prefix; /* variable */
} __attribute__ ((packed));
+/* works for both IPv4 and IPv6 prefixes */
#define BGP_IP_PREFIX_SIZE(p) (1 + ((p).len / 8) + ((p).len % 8 != 0))
struct bgp_path_attr {
} data; /* variable */
} __attribute__ ((packed));
+struct bgp_attr_mp_reach_nlri_partial {
+ uint16_t afi; /* sa_family_t */
+ uint8_t safi;
+ uint8_t next_hop_len;
+ uint8_t next_hop[16];
+ uint8_t reserved;
+} __attribute__ ((packed));
+#define BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE (3 + sizeof(struct bgp_attr_mp_reach_nlri_partial))
+
+struct bgp_attr_mp_unreach_nlri_partial {
+ uint16_t afi; /* sa_family_t */
+ uint8_t safi;
+} __attribute__ ((packed));
+/* we use it as an extended attribute */
+#define BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE (4 + sizeof(struct bgp_attr_mp_unreach_nlri_partial))
+
/* bgp_path_attr.flags (bitfields) */
#define BGP_PATH_ATTR_FLAG_OPTIONAL (1 << 7)
#define BGP_PATH_ATTR_FLAG_TRANS (1 << 6)
#define BGP_PATH_ATTR_CODE_ATOMIC_AGGREGATE 6 /* well-known, discretionary */
#define BGP_PATH_ATTR_CODE_AGGREGATOR 7 /* optional, transitive */
#define BGP_PATH_ATTR_CODE_COMMUNITIES 8 /* optional, transitive (RFC1997) */
+#define BGP_PATH_ATTR_CODE_MP_REACH_NLRI 14 /* optional, non-transitive (RFC4760) */
+#define BGP_PATH_ATTR_CODE_MP_UNREACH_NLRI 15 /* optional, non-transitive (RFC4760) */
#define BGP_PATH_ATTR_SIZE(p) ((((p).flags & BGP_PATH_ATTR_FLAG_EXTLEN) \
- ? ((p).data.e.len + 1) : (p).data.s.len) + 3)
+ ? ((p).data.e.len + 4) : (p).data.s.len) + 3)
/* well known COMMUNITIES */
#define BGP_COMMUNITY_NO_EXPORT 0xffffff01 /* don't advertise outside confederation */
} __attribute__ ((packed));
/* bgp_data_notification.error_code, .error_subcode */
+#define BGP_ERR_UNSPEC 0
#define BGP_ERR_HEADER 1
# define BGP_ERR_HDR_NOT_SYNC 1
# define BGP_ERR_HDR_BAD_LEN 2
# define BGP_ERR_OPN_UNSUP_PARAM 4
# define BGP_ERR_OPN_AUTH_FAILURE 5
# define BGP_ERR_OPN_HOLD_TIME 6
+# define BGP_ERR_OPN_UNSUP_CAP 7
#define BGP_ERR_UPDATE 3
# define BGP_ERR_UPD_BAD_ATTR_LIST 1
# define BGP_ERR_UPD_UNKN_WK_ATTR 2
Established, /* established */
};
+struct bgp_route6_list {
+ struct bgp_ip6_prefix dest;
+ struct bgp_route6_list *next;
+};
+
struct bgp_route_list {
struct bgp_ip_prefix dest;
struct bgp_route_list *next;
size_t done; /* bytes sent/recvd */
};
+enum bgp_mp_handling {
+ HandleIPv6Routes,
+ DoesntHandleIPv6Routes,
+ HandlingUnknown,
+};
+
/* state */
struct bgp_peer {
char name[32]; /* peer name */
in_addr_t addr; /* peer address */
+ in_addr_t source_addr; /* our source address */
int as; /* AS number */
int sock;
enum bgp_state state; /* FSM state */
int cli_flag; /* updates requested from CLI */
char *path_attrs; /* path attrs to send in UPDATE message */
int path_attr_len; /* length of path attrs */
+ int path_attr_len_without_nexthop; /* length of path attrs without NEXT_HOP */
uint32_t events; /* events to poll */
struct event_data edata; /* poll data */
+ enum bgp_mp_handling mp_handling; /* how it handles IPv6 routes advertisements */
+ int update_routes6; /* UPDATE required for IPv6 routes */
+ struct bgp_route6_list *routes6; /* IPv6 routes known by this peer */
+ char mp_reach_nlri_partial[BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE];
+ char mp_unreach_nlri_partial[BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE];
};
/* bgp_peer.cli_flag */
/* actions */
int bgp_setup(int as);
int bgp_start(struct bgp_peer *peer, char *name, int as, int keepalive,
- int hold, int enable);
+ int hold, struct in_addr update_source, int enable);
void bgp_stop(struct bgp_peer *peer);
void bgp_halt(struct bgp_peer *peer);
int bgp_restart(struct bgp_peer *peer);
- int bgp_add_route(in_addr_t ip, in_addr_t mask);
+ int bgp_add_route(in_addr_t ip, int prefixlen);
+int bgp_add_route6(struct in6_addr ip, int prefixlen);
- int bgp_del_route(in_addr_t ip, in_addr_t mask);
+ int bgp_del_route(in_addr_t ip, int prefixlen);
+int bgp_del_route6(struct in6_addr ip, int prefixlen);
void bgp_enable_routing(int enable);
int bgp_set_poll(void);
int bgp_process(uint32_t events[]);
+void bgp_process_peers_timers();
char const *bgp_state_str(enum bgp_state state);
extern char const *cvs_id_bgp;
#define SYSLOG_NAMES
#include <syslog.h>
#include <malloc.h>
- #include <math.h>
#include <net/route.h>
#include <sys/mman.h>
#include <netdb.h>
#include <sched.h>
#include <sys/sysinfo.h>
#include <libcli.h>
+ #include <linux/netlink.h>
+ #include <linux/rtnetlink.h>
#include "md5.h"
#include "l2tpns.h"
// Globals
configt *config = NULL; // all configuration
+ int nlfd = -1; // netlink socket
int tunfd = -1; // tun interface file handle. (network device)
int udpfd = -1; // UDP file handle
int controlfd = -1; // Control signal handle
int daefd = -1; // Socket listening for DAE connections.
int snoopfd = -1; // UDP file handle for sending out intercept data
int *radfds = NULL; // RADIUS requests file handles
- int ifrfd = -1; // File descriptor for routing, etc
- int ifr6fd = -1; // File descriptor for IPv6 routing, etc
int rand_fd = -1; // Random data source
int cluster_sockfd = -1; // Intra-cluster communications socket.
int epollfd = -1; // event polling
time_t basetime = 0; // base clock
-char hostname[1000] = ""; // us.
+char hostname[MAXHOSTNAME] = ""; // us.
static int tunidx; // ifr_ifindex of tun device
+ int nlseqnum = 0; // netlink sequence number
+ int min_initok_nlseqnum = 0; // minimun seq number for messages after init is ok
static int syslog_log = 0; // are we logging to syslog
static FILE *log_stream = 0; // file handle for direct logging (i.e. direct into file, not via syslog).
uint32_t last_id = 0; // Unique ID for radius accounting
CONFIG("cluster_hb_timeout", cluster_hb_timeout, INT),
CONFIG("cluster_master_min_adv", cluster_master_min_adv, INT),
CONFIG("ipv6_prefix", ipv6_prefix, IPv6),
+ CONFIG("cli_bind_address", cli_bind_address, IPv4),
+ CONFIG("hostname", hostname, STRING),
+ CONFIG("nexthop_address", nexthop_address, IPv4),
+ CONFIG("nexthop6_address", nexthop6_address, IPv6),
{ NULL, 0, 0, 0 },
};
NULL,
"plugin_pre_auth",
"plugin_post_auth",
- "plugin_packet_rx",
- "plugin_packet_tx",
"plugin_timer",
"plugin_new_session",
"plugin_kill_session",
struct Tringbuffer *ringbuffer = NULL;
#endif
+ static ssize_t netlink_send(struct nlmsghdr *nh);
+ static void netlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen);
static void cache_ipmap(in_addr_t ip, sessionidt s);
static void uncache_ipmap(in_addr_t ip);
static void cache_ipv6map(struct in6_addr ip, int prefixlen, sessionidt s);
// via BGP if enabled, and stuffs it into the
// 'sessionbyip' cache.
//
- // 'ip' and 'mask' must be in _host_ order.
+ // 'ip' must be in _host_ order.
//
- static void routeset(sessionidt s, in_addr_t ip, in_addr_t mask, in_addr_t gw, int add)
+ static void routeset(sessionidt s, in_addr_t ip, int prefixlen, in_addr_t gw, int add)
{
- struct rtentry r;
+ struct {
+ struct nlmsghdr nh;
+ struct rtmsg rt;
+ char buf[32];
+ } req;
int i;
+ in_addr_t n_ip;
- if (!mask) mask = 0xffffffff;
+ if (!prefixlen) prefixlen = 32;
- ip &= mask; // Force the ip to be the first one in the route.
+ ip &= 0xffffffff << (32 - prefixlen);; // Force the ip to be the first one in the route.
- memset(&r, 0, sizeof(r));
- r.rt_dev = config->tundevice;
- r.rt_dst.sa_family = AF_INET;
- *(uint32_t *) & (((struct sockaddr_in *) &r.rt_dst)->sin_addr.s_addr) = htonl(ip);
- r.rt_gateway.sa_family = AF_INET;
- *(uint32_t *) & (((struct sockaddr_in *) &r.rt_gateway)->sin_addr.s_addr) = htonl(gw);
- r.rt_genmask.sa_family = AF_INET;
- *(uint32_t *) & (((struct sockaddr_in *) &r.rt_genmask)->sin_addr.s_addr) = htonl(mask);
- r.rt_flags = (RTF_UP | RTF_STATIC);
+ memset(&req, 0, sizeof(req));
+
+ if (add)
+ {
+ req.nh.nlmsg_type = RTM_NEWROUTE;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE;
+ }
+ else
+ req.nh.nlmsg_type = RTM_DELROUTE;
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.rt));
+
+ req.rt.rtm_family = AF_INET;
+ req.rt.rtm_dst_len = prefixlen;
+ req.rt.rtm_table = RT_TABLE_MAIN;
+ req.rt.rtm_protocol = RTPROT_BOOT; // XXX
+ req.rt.rtm_scope = RT_SCOPE_LINK;
+ req.rt.rtm_type = RTN_UNICAST;
+
+ netlink_addattr(&req.nh, RTA_OIF, &tunidx, sizeof(int));
+ n_ip = htonl(ip);
+ netlink_addattr(&req.nh, RTA_DST, &n_ip, sizeof(n_ip));
if (gw)
- r.rt_flags |= RTF_GATEWAY;
- else if (mask == 0xffffffff)
- r.rt_flags |= RTF_HOST;
+ {
+ n_ip = htonl(gw);
+ netlink_addattr(&req.nh, RTA_GATEWAY, &n_ip, sizeof(n_ip));
+ }
- LOG(1, s, 0, "Route %s %s/%s%s%s\n", add ? "add" : "del",
- fmtaddr(htonl(ip), 0), fmtaddr(htonl(mask), 1),
+ LOG(1, s, 0, "Route %s %s/%d%s%s\n", add ? "add" : "del",
+ fmtaddr(htonl(ip), 0), prefixlen,
gw ? " via" : "", gw ? fmtaddr(htonl(gw), 2) : "");
- if (ioctl(ifrfd, add ? SIOCADDRT : SIOCDELRT, (void *) &r) < 0)
- LOG(0, 0, 0, "routeset() error in ioctl: %s\n", strerror(errno));
+ if (netlink_send(&req.nh) < 0)
+ LOG(0, 0, 0, "routeset() error in sending netlink message: %s\n", strerror(errno));
#ifdef BGP
if (add)
- bgp_add_route(htonl(ip), htonl(mask));
+ bgp_add_route(htonl(ip), prefixlen);
else
- bgp_del_route(htonl(ip), htonl(mask));
+ bgp_del_route(htonl(ip), prefixlen);
#endif /* BGP */
// Add/Remove the IPs to the 'sessionbyip' cache.
if (!add) // Are we deleting a route?
s = 0; // Caching the session as '0' is the same as uncaching.
- for (i = ip; (i&mask) == (ip&mask) ; ++i)
+ for (i = ip; i < ip+(1<<(32-prefixlen)) ; ++i)
cache_ipmap(i, s);
}
}
void route6set(sessionidt s, struct in6_addr ip, int prefixlen, int add)
{
- struct in6_rtmsg rt;
+ struct {
+ struct nlmsghdr nh;
+ struct rtmsg rt;
+ char buf[64];
+ } req;
+ int metric;
char ipv6addr[INET6_ADDRSTRLEN];
- if (ifr6fd < 0)
+ if (!config->ipv6_prefix.s6_addr[0])
{
LOG(0, 0, 0, "Asked to set IPv6 route, but IPv6 not setup.\n");
return;
}
- memset(&rt, 0, sizeof(rt));
+ memset(&req, 0, sizeof(req));
- memcpy(&rt.rtmsg_dst, &ip, sizeof(struct in6_addr));
- rt.rtmsg_dst_len = prefixlen;
- rt.rtmsg_metric = 1;
- rt.rtmsg_flags = RTF_UP;
- rt.rtmsg_ifindex = tunidx;
+ if (add)
+ {
+ req.nh.nlmsg_type = RTM_NEWROUTE;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE;
+ }
+ else
+ req.nh.nlmsg_type = RTM_DELROUTE;
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.rt));
+
+ req.rt.rtm_family = AF_INET6;
+ req.rt.rtm_dst_len = prefixlen;
+ req.rt.rtm_table = RT_TABLE_MAIN;
+ req.rt.rtm_protocol = RTPROT_BOOT; // XXX
+ req.rt.rtm_scope = RT_SCOPE_LINK;
+ req.rt.rtm_type = RTN_UNICAST;
+
+ netlink_addattr(&req.nh, RTA_OIF, &tunidx, sizeof(int));
+ netlink_addattr(&req.nh, RTA_DST, &ip, sizeof(ip));
+ metric = 1;
+ netlink_addattr(&req.nh, RTA_METRICS, &metric, sizeof(metric));
LOG(1, 0, 0, "Route %s %s/%d\n",
add ? "add" : "del",
inet_ntop(AF_INET6, &ip, ipv6addr, INET6_ADDRSTRLEN),
prefixlen);
- if (ioctl(ifr6fd, add ? SIOCADDRT : SIOCDELRT, (void *) &rt) < 0)
- LOG(0, 0, 0, "route6set() error in ioctl: %s\n",
- strerror(errno));
+ if (netlink_send(&req.nh) < 0)
+ LOG(0, 0, 0, "route6set() error in sending netlink message: %s\n", strerror(errno));
- // FIXME: need to add BGP routing (RFC2858)
+#ifdef BGP
+ if (add)
+ bgp_add_route6(ip, prefixlen);
+ else
+ bgp_del_route6(ip, prefixlen);
+#endif /* BGP */
if (s)
{
return;
}
- // defined in linux/ipv6.h, but tricky to include from user-space
- // TODO: move routing to use netlink rather than ioctl
- struct in6_ifreq {
- struct in6_addr ifr6_addr;
- __u32 ifr6_prefixlen;
- unsigned int ifr6_ifindex;
+ //
+ // Set up netlink socket
+ static void initnetlink(void)
+ {
+ struct sockaddr_nl nladdr;
+
+ nlfd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (nlfd < 0)
+ {
+ LOG(0, 0, 0, "Can't create netlink socket: %s\n", strerror(errno));
+ exit(1);
+ }
+
+ memset(&nladdr, 0, sizeof(nladdr));
+ nladdr.nl_family = AF_NETLINK;
+ nladdr.nl_pid = getpid();
+
+ if (bind(nlfd, (struct sockaddr *)&nladdr, sizeof(nladdr)) < 0)
+ {
+ LOG(0, 0, 0, "Can't bind netlink socket: %s\n", strerror(errno));
+ exit(1);
+ }
+ }
+
+ static ssize_t netlink_send(struct nlmsghdr *nh)
+ {
+ struct sockaddr_nl nladdr;
+ struct iovec iov;
+ struct msghdr msg;
+
+ nh->nlmsg_pid = getpid();
+ nh->nlmsg_seq = ++nlseqnum;
+
+ // set kernel address
+ memset(&nladdr, 0, sizeof(nladdr));
+ nladdr.nl_family = AF_NETLINK;
+
+ iov = (struct iovec){ (void *)nh, nh->nlmsg_len };
+ msg = (struct msghdr){ (void *)&nladdr, sizeof(nladdr), &iov, 1, NULL, 0, 0 };
+
+ return sendmsg(nlfd, &msg, 0);
+ }
+
+ static ssize_t netlink_recv(void *buf, ssize_t len)
+ {
+ struct sockaddr_nl nladdr;
+ struct iovec iov;
+ struct msghdr msg;
+
+ // set kernel address
+ memset(&nladdr, 0, sizeof(nladdr));
+ nladdr.nl_family = AF_NETLINK;
+
+ iov = (struct iovec){ buf, len };
+ msg = (struct msghdr){ (void *)&nladdr, sizeof(nladdr), &iov, 1, NULL, 0, 0 };
+
+ return recvmsg(nlfd, &msg, 0);
+ }
+
+ /* adapted from iproute2 */
+ static void netlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen)
+ {
+ int len = RTA_LENGTH(alen);
+ struct rtattr *rta;
+
+ rta = (struct rtattr *)(((void *)nh) + NLMSG_ALIGN(nh->nlmsg_len));
+ rta->rta_type = type;
+ rta->rta_len = len;
+ memcpy(RTA_DATA(rta), data, alen);
+ nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + RTA_ALIGN(len);
+ }
+
+ // messages corresponding to different phases seq number
+ static char *tun_nl_phase_msg[] = {
+ "initialized",
+ "getting tun interface index",
+ "setting tun interface parameters",
+ "setting tun IPv4 address",
+ "setting tun LL IPv6 address",
+ "setting tun global IPv6 address",
};
//
// Set up TUN interface
static void inittun(void)
{
+ struct ifinfomsg ifinfo;
struct ifreq ifr;
- struct in6_ifreq ifr6;
- struct sockaddr_in sin = {0};
+
memset(&ifr, 0, sizeof(ifr));
ifr.ifr_flags = IFF_TUN;
LOG(0, 0, 0, "Can't set tun interface: %s\n", strerror(errno));
exit(1);
}
- assert(strlen(ifr.ifr_name) < sizeof(config->tundevice));
- strncpy(config->tundevice, ifr.ifr_name, sizeof(config->tundevice) - 1);
- ifrfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
-
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = config->bind_address ? config->bind_address : 0x01010101; // 1.1.1.1
- memcpy(&ifr.ifr_addr, &sin, sizeof(struct sockaddr));
+ assert(strlen(ifr.ifr_name) < sizeof(config->tundevice) - 1);
+ strncpy(config->tundevice, ifr.ifr_name, sizeof(config->tundevice));
- if (ioctl(ifrfd, SIOCSIFADDR, (void *) &ifr) < 0)
{
- LOG(0, 0, 0, "Error setting tun address: %s\n", strerror(errno));
- exit(1);
- }
- /* Bump up the qlen to deal with bursts from the network */
- ifr.ifr_qlen = 1000;
- if (ioctl(ifrfd, SIOCSIFTXQLEN, (void *) &ifr) < 0)
- {
- LOG(0, 0, 0, "Error setting tun queue length: %s\n", strerror(errno));
- exit(1);
- }
- /* set MTU to modem MRU */
- ifr.ifr_mtu = MRU;
- if (ioctl(ifrfd, SIOCSIFMTU, (void *) &ifr) < 0)
- {
- LOG(0, 0, 0, "Error setting tun MTU: %s\n", strerror(errno));
- exit(1);
- }
- ifr.ifr_flags = IFF_UP;
- if (ioctl(ifrfd, SIOCSIFFLAGS, (void *) &ifr) < 0)
- {
- LOG(0, 0, 0, "Error setting tun flags: %s\n", strerror(errno));
- exit(1);
- }
- if (ioctl(ifrfd, SIOCGIFINDEX, (void *) &ifr) < 0)
- {
- LOG(0, 0, 0, "Error getting tun ifindex: %s\n", strerror(errno));
- exit(1);
- }
- tunidx = ifr.ifr_ifindex;
+ // get the interface index
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg ifinfo;
+ } req;
+ char buf[4096];
+ ssize_t len;
+ struct nlmsghdr *resp_nh;
+
+ req.nh.nlmsg_type = RTM_GETLINK;
+ req.nh.nlmsg_flags = NLM_F_REQUEST;
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifinfo));
- // Only setup IPv6 on the tun device if we have a configured prefix
- if (config->ipv6_prefix.s6_addr[0]) {
- ifr6fd = socket(PF_INET6, SOCK_DGRAM, 0);
+ req.ifinfo.ifi_family = AF_UNSPEC; // as the man says
- // Link local address is FE80::1
- memset(&ifr6.ifr6_addr, 0, sizeof(ifr6.ifr6_addr));
- ifr6.ifr6_addr.s6_addr[0] = 0xFE;
- ifr6.ifr6_addr.s6_addr[1] = 0x80;
- ifr6.ifr6_addr.s6_addr[15] = 1;
- ifr6.ifr6_prefixlen = 64;
- ifr6.ifr6_ifindex = ifr.ifr_ifindex;
- if (ioctl(ifr6fd, SIOCSIFADDR, (void *) &ifr6) < 0)
+ netlink_addattr(&req.nh, IFLA_IFNAME, config->tundevice, strlen(config->tundevice)+1);
+
+ if(netlink_send(&req.nh) < 0 || (len = netlink_recv(buf, sizeof(buf))) < 0)
{
- LOG(0, 0, 0, "Error setting tun IPv6 link local address:"
- " %s\n", strerror(errno));
+ LOG(0, 0, 0, "Error getting tun ifindex: %s\n", strerror(errno));
+ exit(1);
}
- // Global address is prefix::1
- memset(&ifr6.ifr6_addr, 0, sizeof(ifr6.ifr6_addr));
- ifr6.ifr6_addr = config->ipv6_prefix;
- ifr6.ifr6_addr.s6_addr[15] = 1;
- ifr6.ifr6_prefixlen = 64;
- ifr6.ifr6_ifindex = ifr.ifr_ifindex;
- if (ioctl(ifr6fd, SIOCSIFADDR, (void *) &ifr6) < 0)
+ resp_nh = (struct nlmsghdr *)buf;
+ if (!NLMSG_OK (resp_nh, len))
{
- LOG(0, 0, 0, "Error setting tun IPv6 global address: %s\n",
- strerror(errno));
+ LOG(0, 0, 0, "Malformed answer getting tun ifindex %ld\n", len);
+ exit(1);
+ }
+
+ memcpy(&ifinfo, NLMSG_DATA(resp_nh), sizeof(ifinfo));
+ // got index
+ tunidx = ifinfo.ifi_index;
+ }
+ {
+ struct {
+ // interface setting
+ struct nlmsghdr nh;
+ union {
+ struct ifinfomsg ifinfo;
+ struct ifaddrmsg ifaddr;
+ } ifmsg;
+ char rtdata[32]; // 32 should be enough
+ } req;
+ uint32_t txqlen, mtu;
+ in_addr_t ip;
+
+ memset(&req, 0, sizeof(req));
+
+ req.nh.nlmsg_type = RTM_SETLINK;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_MULTI;
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifinfo));
+
+ req.ifmsg.ifinfo = ifinfo;
+ req.ifmsg.ifinfo.ifi_flags |= IFF_UP; // set interface up
+ req.ifmsg.ifinfo.ifi_change = IFF_UP; // only change this flag
+
+ /* Bump up the qlen to deal with bursts from the network */
+ txqlen = 1000;
+ netlink_addattr(&req.nh, IFLA_TXQLEN, &txqlen, sizeof(txqlen));
+ /* set MTU to modem MRU */
+ mtu = MRU;
+ netlink_addattr(&req.nh, IFLA_MTU, &mtu, sizeof(mtu));
+
+ if (netlink_send(&req.nh) < 0)
+ goto senderror;
+
+ memset(&req, 0, sizeof(req));
+
+ req.nh.nlmsg_type = RTM_NEWADDR;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI;
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifaddr));
+
+ req.ifmsg.ifaddr.ifa_family = AF_INET;
+ req.ifmsg.ifaddr.ifa_prefixlen = 32;
+ req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_UNIVERSE;
+ req.ifmsg.ifaddr.ifa_index = ifinfo.ifi_index;
+
+ if (config->bind_address)
+ ip = config->bind_address;
+ else
+ ip = 0x01010101; // 1.1.1.1
+ netlink_addattr(&req.nh, IFA_LOCAL, &ip, sizeof(ip));
+
+ if (netlink_send(&req.nh) < 0)
+ goto senderror;
+
+ // Only setup IPv6 on the tun device if we have a configured prefix
+ if (config->ipv6_prefix.s6_addr[0]) {
+ struct in6_addr ip6;
+
+ memset(&req, 0, sizeof(req));
+
+ req.nh.nlmsg_type = RTM_NEWADDR;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI;
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifaddr));
+
+ req.ifmsg.ifaddr.ifa_family = AF_INET6;
+ req.ifmsg.ifaddr.ifa_prefixlen = 64;
+ req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_LINK;
+ req.ifmsg.ifaddr.ifa_index = ifinfo.ifi_index;
+
+ // Link local address is FE80::1
+ memset(&ip6, 0, sizeof(ip6));
+ ip6.s6_addr[0] = 0xFE;
+ ip6.s6_addr[1] = 0x80;
+ ip6.s6_addr[15] = 1;
+ netlink_addattr(&req.nh, IFA_LOCAL, &ip6, sizeof(ip6));
+
+ if (netlink_send(&req.nh) < 0)
+ goto senderror;
+
+ memset(&req, 0, sizeof(req));
+
+ req.nh.nlmsg_type = RTM_NEWADDR;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI;
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifaddr));
+
+ req.ifmsg.ifaddr.ifa_family = AF_INET6;
+ req.ifmsg.ifaddr.ifa_prefixlen = 64;
+ req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_UNIVERSE;
+ req.ifmsg.ifaddr.ifa_index = ifinfo.ifi_index;
+
+ // Global address is prefix::1
+ ip6 = config->ipv6_prefix;
+ ip6.s6_addr[15] = 1;
+ netlink_addattr(&req.nh, IFA_LOCAL, &ip6, sizeof(ip6));
+
+ if (netlink_send(&req.nh) < 0)
+ goto senderror;
}
+
+ memset(&req, 0, sizeof(req));
+
+ req.nh.nlmsg_type = NLMSG_DONE;
+ req.nh.nlmsg_len = NLMSG_LENGTH(0);
+
+ if (netlink_send(&req.nh) < 0)
+ goto senderror;
+
+ // if we get an error for seqnum < min_initok_nlseqnum,
+ // we must exit as initialization went wrong
+ if (config->ipv6_prefix.s6_addr[0])
+ min_initok_nlseqnum = 5 + 1; // idx + if + addr + 2*addr6
+ else
+ min_initok_nlseqnum = 3 + 1; // idx + if + addr
}
+
+ return;
+
+ senderror:
+ LOG(0, 0, 0, "Error while setting up tun device: %s\n", strerror(errno));
+ exit(1);
}
// set up UDP ports
int routed = 0;
for (r = 0; r < MAXROUTE && session[s].route[r].ip; r++)
{
- if ((session[s].ip & session[s].route[r].mask) ==
- (session[s].route[r].ip & session[s].route[r].mask))
+ if ((session[s].ip >> (32-session[s].route[r].prefixlen)) ==
+ (session[s].route[r].ip >> (32-session[s].route[r].prefixlen)))
routed++;
- if (del_routes) routeset(s, session[s].route[r].ip, session[s].route[r].mask, 0, 0);
+ if (del_routes) routeset(s, session[s].route[r].ip, session[s].route[r].prefixlen, 0, 0);
session[s].route[r].ip = 0;
}
static clockt last_talked = 0;
static clockt start_busy_wait = 0;
- if (!config->cluster_iam_master)
- {
#ifdef BGP
- static time_t stopped_bgp = 0;
- if (bgp_configured)
+ static time_t stopped_bgp = 0;
+ if (bgp_configured)
+ {
+ if (!stopped_bgp)
{
- if (!stopped_bgp)
- {
- LOG(1, 0, 0, "Shutting down in %d seconds, stopping BGP...\n", QUIT_DELAY);
+ LOG(1, 0, 0, "Shutting down in %d seconds, stopping BGP...\n", QUIT_DELAY);
- for (i = 0; i < BGP_NUM_PEERS; i++)
- if (bgp_peers[i].state == Established)
- bgp_stop(&bgp_peers[i]);
+ for (i = 0; i < BGP_NUM_PEERS; i++)
+ if (bgp_peers[i].state == Established)
+ bgp_stop(&bgp_peers[i]);
- stopped_bgp = time_now;
+ stopped_bgp = time_now;
+ if (!config->cluster_iam_master)
+ {
// we don't want to become master
cluster_send_ping(0);
return 1;
}
-
- if (time_now < (stopped_bgp + QUIT_DELAY))
- return 1;
}
+
+ if (!config->cluster_iam_master && time_now < (stopped_bgp + QUIT_DELAY))
+ return 1;
+ }
#endif /* BGP */
+ if (!config->cluster_iam_master)
return 0;
- }
if (main_quit == QUIT_SHUTDOWN)
{
# include "fake_epoll.h"
#endif
- // the base set of fds polled: cli, cluster, tun, udp, control, dae
- #define BASE_FDS 6
+ // the base set of fds polled: cli, cluster, tun, udp, control, dae, netlink
+ #define BASE_FDS 7
// additional polled fds
#ifdef BGP
exit(1);
}
- LOG(4, 0, 0, "Beginning of main loop. clifd=%d, cluster_sockfd=%d, tunfd=%d, udpfd=%d, controlfd=%d, daefd=%d\n",
- clifd, cluster_sockfd, tunfd, udpfd, controlfd, daefd);
+ LOG(4, 0, 0, "Beginning of main loop. clifd=%d, cluster_sockfd=%d, tunfd=%d, udpfd=%d, controlfd=%d, daefd=%d, nlfd=%d\n",
+ clifd, cluster_sockfd, tunfd, udpfd, controlfd, daefd, nlfd);
/* setup our fds to poll for input */
{
d[i].type = FD_TYPE_DAE;
e.data.ptr = &d[i++];
epoll_ctl(epollfd, EPOLL_CTL_ADD, daefd, &e);
+
+ d[i].type = FD_TYPE_NETLINK;
+ e.data.ptr = &d[i++];
+ epoll_ctl(epollfd, EPOLL_CTL_ADD, nlfd, &e);
}
#ifdef BGP
if (config->neighbour[i].name[0])
bgp_start(&bgp_peers[i], config->neighbour[i].name,
config->neighbour[i].as, config->neighbour[i].keepalive,
- config->neighbour[i].hold, 0); /* 0 = routing disabled */
+ config->neighbour[i].hold, config->neighbour[i].update_source,
+ 0); /* 0 = routing disabled */
}
#endif /* BGP */
break;
#endif /* BGP */
+ case FD_TYPE_NETLINK:
+ {
+ struct nlmsghdr *nh = (struct nlmsghdr *)buf;
+ s = netlink_recv(buf, sizeof(buf));
+ if (nh->nlmsg_type == NLMSG_ERROR)
+ {
+ struct nlmsgerr *errmsg = NLMSG_DATA(nh);
+ if (errmsg->error)
+ {
+ if (errmsg->msg.nlmsg_seq < min_initok_nlseqnum)
+ {
+ LOG(0, 0, 0, "Got a fatal netlink error (while %s): %s\n", tun_nl_phase_msg[nh->nlmsg_seq], strerror(-errmsg->error));
+ exit(1);
+ }
+ else
+
+ LOG(0, 0, 0, "Got a netlink error: %s\n", strerror(-errmsg->error));
+ }
+ // else it's a ack
+ }
+ else
+ LOG(1, 0, 0, "Got a unknown netlink message: type %d seq %d flags %d\n", nh->nlmsg_type, nh->nlmsg_seq, nh->nlmsg_flags);
+ n--;
+ break;
+ }
+
default:
LOG(0, 0, 0, "Unexpected fd type returned from epoll_wait: %d\n", d->type);
}
more++;
}
}
+#ifdef BGP
+ else
+ /* no event received, but timers could still have expired */
+ bgp_process_peers_timers();
+#endif /* BGP */
if (time_changed)
{
if (!*hostname)
{
- // Grab my hostname unless it's been specified
- gethostname(hostname, sizeof(hostname));
- stripdomain(hostname);
+ if (!*config->hostname)
+ {
+ // Grab my hostname unless it's been specified
+ gethostname(hostname, sizeof(hostname));
+ stripdomain(hostname);
+ }
+ else
+ strcpy(hostname, config->hostname);
}
_statistics->start_time = _statistics->last_reset = time(NULL);
//
// Add a block of addresses to the IP pool to hand out.
//
- static void add_to_ip_pool(in_addr_t addr, in_addr_t mask)
+ static void add_to_ip_pool(in_addr_t addr, int prefixlen)
{
int i;
- if (mask == 0)
- mask = 0xffffffff; // Host route only.
+ if (prefixlen == 0)
+ prefixlen = 32; // Host route only.
- addr &= mask;
+ addr &= 0xffffffff << (32 - prefixlen);
if (ip_pool_size >= MAXIPPOOL) // Pool is full!
return ;
- for (i = addr ;(i & mask) == addr; ++i)
+ for (i = addr ; i < addr+(1<<(32-prefixlen)); ++i)
{
if ((i & 0xff) == 0 || (i&0xff) == 255)
continue; // Skip 0 and broadcast addresses.
{
// It's a range
int numbits = 0;
- in_addr_t start = 0, mask = 0;
+ in_addr_t start = 0;
LOG(2, 0, 0, "Adding IP address range %s\n", buf);
*p++ = 0;
continue;
}
start = ntohl(inet_addr(pool));
- mask = (in_addr_t) (pow(2, numbits) - 1) << (32 - numbits);
// Add a static route for this pool
- LOG(5, 0, 0, "Adding route for address pool %s/%u\n",
- fmtaddr(htonl(start), 0), 32 + mask);
+ LOG(5, 0, 0, "Adding route for address pool %s/%d\n",
+ fmtaddr(htonl(start), 0), numbits);
- routeset(0, start, mask, 0, 1);
+ routeset(0, start, numbits, 0, 1);
- add_to_ip_pool(start, mask);
+ add_to_ip_pool(start, numbits);
}
else
{
}
}
+ initnetlink();
+
/* Set up the cluster communications port. */
if (cluster_init() < 0)
exit(1);
// Add the route for this session.
for (r = 0; r < MAXROUTE && session[s].route[r].ip; r++)
{
- if ((session[s].ip & session[s].route[r].mask) ==
- (session[s].route[r].ip & session[s].route[r].mask))
+ if ((session[s].ip >> (32-session[s].route[r].prefixlen)) ==
+ (session[s].route[r].ip >> (32-session[s].route[r].prefixlen)))
routed++;
- routeset(s, session[s].route[r].ip, session[s].route[r].mask, 0, 1);
+ routeset(s, session[s].route[r].ip, session[s].route[r].prefixlen, 0, 1);
}
// Static IPs need to be routed if not already
for (i = 0; !newip && i < MAXROUTE && (session[s].route[i].ip || new->route[i].ip); i++)
if (new->route[i].ip != session[s].route[i].ip ||
- new->route[i].mask != session[s].route[i].mask)
+ new->route[i].prefixlen != session[s].route[i].prefixlen)
newip++;
// needs update
// remove old routes...
for (i = 0; i < MAXROUTE && session[s].route[i].ip; i++)
{
- if ((session[s].ip & session[s].route[i].mask) ==
- (session[s].route[i].ip & session[s].route[i].mask))
+ if ((session[s].ip >> (32-session[s].route[i].prefixlen)) ==
+ (session[s].route[i].ip >> (32-session[s].route[i].prefixlen)))
routed++;
- routeset(s, session[s].route[i].ip, session[s].route[i].mask, 0, 0);
+ routeset(s, session[s].route[i].ip, session[s].route[i].prefixlen, 0, 0);
}
// ...ip
// add new routes...
for (i = 0; i < MAXROUTE && new->route[i].ip; i++)
{
- if ((new->ip & new->route[i].mask) ==
- (new->route[i].ip & new->route[i].mask))
+ if ((new->ip >> (32-new->route[i].prefixlen)) ==
+ (new->route[i].ip >> (32-new->route[i].prefixlen)))
routed++;
- routeset(s, new->route[i].ip, new->route[i].mask, 0, 1);
+ routeset(s, new->route[i].ip, new->route[i].prefixlen, 0, 1);
}
// ...ip
#define PPPoE_MRU 1492 // maximum PPPoE MRU (rfc2516: 1500 less PPPoE header (6) and PPP protocol ID (2))
#define MAXETHER (MAXMTU+18) // max packet we try sending to tun
#define MAXTEL 96 // telephone number
+#define MAXHOSTNAME 256 // hostname
#define MAXUSER 128 // username
#define MAXPASS 128 // password
#define MAXPLUGINS 20 // maximum number of plugins to load
typedef struct // route
{
in_addr_t ip;
- in_addr_t mask;
+ int prefixlen;
}
routet;
#define SET_STAT(x, y)
#endif
+ #ifndef IFNAMSIZ
+ # define IFNAMSIZ 16
+ #endif
+
typedef struct
{
int debug; // debugging level
int reload_config; // flag to re-read config (set by cli)
int multi_read_count; // amount of packets to read per fd in processing loop
- char tundevice[10]; // tun device name
+ char tundevice[IFNAMSIZ]; // tun device name
char log_filename[128];
char l2tp_secret[64]; // L2TP shared secret
int cluster_master_min_adv; // Master advertises routes while the number of up to date
// slaves is less than this value.
+ in_addr_t cli_bind_address; // bind address for CLI
+ char hostname[MAXHOSTNAME]; // hostname (overridden by -h on command line)
// Guest change
char guest_user[MAXUSER]; // Guest account username
uint16_t as;
int keepalive;
int hold;
+ struct in_addr update_source;
} neighbour[BGP_NUM_PEERS];
+ in_addr_t nexthop_address;
+ struct in6_addr nexthop6_address;
#endif
} configt;
FD_TYPE_DAE,
FD_TYPE_RADIUS,
FD_TYPE_BGP,
+ FD_TYPE_NETLINK,
} type;
int index; // for RADIUS, BGP
};