Merge branch 'use-netlink' into fdn-mods

author Benjamin Cama <benoar@dolka.fr>

Wed, 17 Aug 2011 15:23:07 +0000 (17:23 +0200)

committer Benjamin Cama <benoar@dolka.fr>

Wed, 17 Aug 2011 15:23:07 +0000 (17:23 +0200)
author Benjamin Cama <benoar@dolka.fr>
Wed, 17 Aug 2011 15:23:07 +0000 (17:23 +0200)
committer Benjamin Cama <benoar@dolka.fr>
Wed, 17 Aug 2011 15:23:07 +0000 (17:23 +0200)
diff --combined bgp.c

index 80a445e,f1d0ec2..4b8cb64
--- 1/bgp.c
--- 2/bgp.c
+++ b/bgp.c
@@@ -29,15 -29,11 +29,15 @@@ char const *cvs_id_bgp = "$Id: bgp.c,v 
   
   static void bgp_clear(struct bgp_peer *peer);
   static void bgp_set_retry(struct bgp_peer *peer);
- static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx);
+ static void bgp_cidr(in_addr_t ip, int prefixlen, struct bgp_ip_prefix *pfx);
   static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
       struct bgp_route_list *new);
+ +static struct bgp_route6_list *bgp_insert_route6(struct bgp_route6_list *head,
+ +    struct bgp_route6_list *new);
   
+ +static void bgp_process_timers(struct bgp_peer *peer);
   static void bgp_free_routes(struct bgp_route_list *routes);
+ +static void bgp_free_routes6(struct bgp_route6_list *routes);
   static char const *bgp_msg_type_str(uint8_t type);
   static int bgp_connect(struct bgp_peer *peer);
   static int bgp_handle_connect(struct bgp_peer *peer);
@@@ -47,13 -43,11 +47,13 @@@ static int bgp_handle_input(struct bgp_
   static int bgp_send_open(struct bgp_peer *peer);
   static int bgp_send_keepalive(struct bgp_peer *peer);
   static int bgp_send_update(struct bgp_peer *peer);
+ +static int bgp_send_update6(struct bgp_peer *peer);
   static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
       uint8_t subcode);
   
   static uint16_t our_as;
   static struct bgp_route_list *bgp_routes = 0;
+ +static struct bgp_route6_list *bgp_routes6 = 0;
   
   int bgp_configured = 0;
   struct bgp_peer *bgp_peers = 0;
@@@ -94,7 -88,6 +94,7 @@@ int bgp_setup(int as
         return 0;
   
       bgp_routes = 0;
+ +    bgp_routes6 = 0;
       bgp_configured = 0; /* set by bgp_start */
   
       return 1;
@@@ -102,7 -95,7 +102,7 @@@
   
   /* start connection with a peer */
   int bgp_start(struct bgp_peer *peer, char *name, int as, int keepalive,
- -    int hold, int enable)
+ +    int hold, struct in_addr update_source, int enable)
   {
       struct hostent *h;
       int ibgp;
@@@ -131,7 -124,6 +131,7 @@@
       }
   
       memcpy(&peer->addr, h->h_addr, sizeof(peer->addr));
+ +    peer->source_addr = update_source.s_addr;
       peer->as = as > 0 ? as : our_as;
       ibgp = peer->as == our_as;
   
@@@ -199,6 -191,15 +199,6 @@@
   
       ADD_ATTRIBUTE();
   
- -    /* NEXT_HOP */
- -    a.flags = BGP_PATH_ATTR_FLAG_TRANS;
- -    a.code = BGP_PATH_ATTR_CODE_NEXT_HOP;
- -    ip = my_address; /* we're it */
- -    a.data.s.len = sizeof(ip);
- -    memcpy(a.data.s.value, &ip, sizeof(ip));
- -
- -    ADD_ATTRIBUTE();
- -
       /* MULTI_EXIT_DISC */
       a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL;
       a.code = BGP_PATH_ATTR_CODE_MULTI_EXIT_DISC;
@@@ -228,25 -229,6 +228,25 @@@
   
       ADD_ATTRIBUTE();
   
+ +    /* remember the len before adding NEXT_HOP */
+ +    peer->path_attr_len_without_nexthop = peer->path_attr_len;
+ +
+ +    /* NEXT_HOP */
+ +    a.flags = BGP_PATH_ATTR_FLAG_TRANS;
+ +    a.code = BGP_PATH_ATTR_CODE_NEXT_HOP;
+ +    if (config->nexthop_address)
+ +    {
+ +      ip = config->nexthop_address;
+ +    }
+ +    else
+ +    {
+ +      ip = my_address; /* we're it */
+ +    }
+ +    a.data.s.len = sizeof(ip);
+ +    memcpy(a.data.s.value, &ip, sizeof(ip));
+ +
+ +    ADD_ATTRIBUTE();
+ +
       if (!(peer->path_attrs = malloc(peer->path_attr_len)))
       {
         LOG(0, 0, 0, "Can't allocate path_attrs for %s (%s)\n",
@@@ -257,53 -239,6 +257,53 @@@
   
       memcpy(peer->path_attrs, path_attrs, peer->path_attr_len);
   
+ +    /* multiprotocol attributes initialization */
+ +    if (config->ipv6_prefix.s6_addr[0])
+ +    {
+ +      struct bgp_attr_mp_reach_nlri_partial mp_reach_nlri_partial;
+ +      struct bgp_attr_mp_unreach_nlri_partial mp_unreach_nlri_partial;
+ +
+ +      a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL;
+ +      a.code = BGP_PATH_ATTR_CODE_MP_REACH_NLRI;
+ +      a.data.s.len = 0; /* will be set on UPDATE */
+ +
+ +      mp_reach_nlri_partial.afi = htons(BGP_MP_AFI_IPv6);
+ +      mp_reach_nlri_partial.safi = BGP_MP_SAFI_UNICAST;
+ +      mp_reach_nlri_partial.reserved = 0;
+ +      mp_reach_nlri_partial.next_hop_len = 16;
+ +
+ +      /* use the defined nexthop6, or our address in ipv6_prefix */
+ +      if (config->nexthop6_address.s6_addr[0])
+ +          memcpy(&mp_reach_nlri_partial.next_hop,
+ +                  &config->nexthop6_address.s6_addr, 16);
+ +      else
+ +      {
+ +          /* our address is ipv6prefix::1 */
+ +          memcpy(&mp_reach_nlri_partial.next_hop,
+ +                  &config->ipv6_prefix.s6_addr, 16);
+ +          mp_reach_nlri_partial.next_hop[15] = 1;
+ +      }
+ +
+ +      memcpy(&a.data.s.value, &mp_reach_nlri_partial,
+ +              sizeof(struct bgp_attr_mp_reach_nlri_partial));
+ +      memcpy(&peer->mp_reach_nlri_partial, &a,
+ +              BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE);
+ +
+ +      a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL | BGP_PATH_ATTR_FLAG_EXTLEN;
+ +      a.code = BGP_PATH_ATTR_CODE_MP_UNREACH_NLRI;
+ +      a.data.e.len = 0; /* will be set on UPDATE */
+ +
+ +      mp_unreach_nlri_partial.afi = htons(BGP_MP_AFI_IPv6);
+ +      mp_unreach_nlri_partial.safi = BGP_MP_SAFI_UNICAST;
+ +
+ +      memcpy(&a.data.e.value, &mp_unreach_nlri_partial,
+ +              sizeof(struct bgp_attr_mp_unreach_nlri_partial));
+ +      memcpy(&peer->mp_unreach_nlri_partial, &a,
+ +              BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE);
+ +    }
+ +
+ +    peer->mp_handling = HandlingUnknown;
+ +
       LOG(4, 0, 0, "Initiating BGP connection to %s (routing %s)\n",
         name, enable ? "enabled" : "suspended");
   
@@@ -332,8 -267,6 +332,8 @@@ static void bgp_clear(struct bgp_peer *
   
       bgp_free_routes(peer->routes);
       peer->routes = 0;
+ +    bgp_free_routes6(peer->routes6);
+ +    peer->routes6 = 0;
   
       peer->outbuf->packet.header.len = 0;
       peer->outbuf->done = 0;
@@@ -394,26 -327,6 +394,6 @@@ static void bgp_set_retry(struct bgp_pe
         bgp_halt(peer); /* give up */
   }
   
- /* convert ip/mask to CIDR notation */
- static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx)
- {
-     int i;
-     uint32_t b;
- 
-     /* convert to prefix notation */
-     pfx->len = 32;
-     pfx->prefix = ip;
- 
-     if (!mask) /* bogus */
-       mask = 0xffffffff;
- 
-     for (i = 0; i < 32 && ((b = ntohl(1 << i)), !(mask & b)); i++)
-     {
-       pfx->len--;
-       pfx->prefix &= ~b;
-     }
- }
- 
   /* insert route into list; sorted */
   static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
       struct bgp_route_list *new)
@@@ -441,33 -354,6 +421,33 @@@
       return head;
   }
   
+ +/* insert route6 into list; sorted */
+ +static struct bgp_route6_list *bgp_insert_route6(struct bgp_route6_list *head,
+ +    struct bgp_route6_list *new)
+ +{
+ +    struct bgp_route6_list *p = head;
+ +    struct bgp_route6_list *e = 0;
+ +
+ +    while (p && memcmp(&p->dest, &new->dest, sizeof(p->dest)) < 0)
+ +    {
+ +      e = p;
+ +      p = p->next;
+ +    }
+ +
+ +    if (e)
+ +    {
+ +      new->next = e->next;
+ +      e->next = new;
+ +    }
+ +    else
+ +    {
+ +      new->next = head;
+ +      head = new;
+ +    }
+ +
+ +    return head;
+ +}
+ +
   /* add route to list for peers */
   /*
    * Note:  this doesn't do route aggregation, nor drop routes if a less
@@@ -475,13 -361,14 +455,14 @@@
    * that if that route is later deleted we don't have to be concerned
    * about adding back the more specific one).
    */
- int bgp_add_route(in_addr_t ip, in_addr_t mask)
+ int bgp_add_route(in_addr_t ip, int prefixlen)
   {
       struct bgp_route_list *r = bgp_routes;
       struct bgp_route_list add;
       int i;
   
-     bgp_cidr(ip, mask, &add.dest);
+     add.dest.prefix = ip;
+     add.dest.len = prefixlen;
       add.next = 0;
   
       /* check for duplicate */
@@@ -520,68 -407,16 +501,69 @@@
       return 1;
   }
   
+ +/* add route to list for peers */
+ +/*
+ + * Note: same provisions as above
+ + */
+ +int bgp_add_route6(struct in6_addr ip, int prefixlen)
+ +{
+ +    struct bgp_route6_list *r = bgp_routes6;
+ +    struct bgp_route6_list add;
+ +    int i;
+ +    char ipv6addr[INET6_ADDRSTRLEN];
+ +
+ +    memcpy(&add.dest.prefix, &ip.s6_addr, 16);
+ +    add.dest.len = prefixlen;
+ +    add.next = 0;
+ +
+ +    /* check for duplicate */
+ +    while (r)
+ +    {
+ +      i = memcmp(&r->dest, &add.dest, sizeof(r->dest));
+ +      if (!i)
+ +          return 1; /* already covered */
+ +
+ +      if (i > 0)
+ +          break;
+ +
+ +      r = r->next;
+ +    }
+ +
+ +    /* insert into route list; sorted */
+ +    if (!(r = malloc(sizeof(*r))))
+ +    {
+ +      LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
+ +          inet_ntop(AF_INET6, &ip, ipv6addr, INET6_ADDRSTRLEN), add.dest.len,
+ +          strerror(errno));
+ +
+ +      return 0;
+ +    }
+ +
+ +    memcpy(r, &add, sizeof(*r));
+ +    bgp_routes6 = bgp_insert_route6(bgp_routes6, r);
+ +
+ +    /* flag established peers for update */
+ +    for (i = 0; i < BGP_NUM_PEERS; i++)
+ +      if (bgp_peers[i].state == Established
+ +              && bgp_peers[i].mp_handling == HandleIPv6Routes)
+ +          bgp_peers[i].update_routes6 = 1;
+ +
+ +    LOG(4, 0, 0, "Registered BGP route %s/%d\n",
+ +      inet_ntop(AF_INET6, &ip, ipv6addr, INET6_ADDRSTRLEN), add.dest.len);
+ +
+ +    return 1;
+ +}
+ +
   /* remove route from list for peers */
- int bgp_del_route(in_addr_t ip, in_addr_t mask)
+ int bgp_del_route(in_addr_t ip, int prefixlen)
   {
       struct bgp_route_list *r = bgp_routes;
       struct bgp_route_list *e = 0;
       struct bgp_route_list del;
       int i;
   
-     bgp_cidr(ip, mask, &del.dest);
+     del.dest.prefix = ip;
+     del.dest.len = prefixlen;
       del.next = 0;
   
       /* find entry in routes list and remove */
@@@ -622,58 -457,6 +604,58 @@@
       return 1;
   }
   
+ +/* remove route from list for peers */
+ +int bgp_del_route6(struct in6_addr ip, int prefixlen)
+ +{
+ +    struct bgp_route6_list *r = bgp_routes6;
+ +    struct bgp_route6_list *e = 0;
+ +    struct bgp_route6_list del;
+ +    int i;
+ +    char ipv6addr[INET6_ADDRSTRLEN];
+ +
+ +    memcpy(&del.dest.prefix, &ip.s6_addr, 16);
+ +    del.dest.len = prefixlen;
+ +    del.next = 0;
+ +
+ +    /* find entry in routes list and remove */
+ +    while (r)
+ +    {
+ +      i = memcmp(&r->dest, &del.dest, sizeof(r->dest));
+ +      if (!i)
+ +      {
+ +          if (e)
+ +              e->next = r->next;
+ +          else
+ +              bgp_routes6 = r->next;
+ +
+ +          free(r);
+ +          break;
+ +      }
+ +
+ +      e = r;
+ +
+ +      if (i > 0)
+ +          r = 0; /* stop */
+ +      else
+ +          r = r->next;
+ +    }
+ +
+ +    /* not found */
+ +    if (!r)
+ +      return 1;
+ +
+ +    /* flag established peers for update */
+ +    for (i = 0; i < BGP_NUM_PEERS; i++)
+ +      if (bgp_peers[i].state == Established
+ +              && bgp_peers[i].mp_handling == HandleIPv6Routes)
+ +          bgp_peers[i].update_routes6 = 1;
+ +
+ +    LOG(4, 0, 0, "Removed BGP route %s/%d\n",
+ +      inet_ntop(AF_INET6, &ip, ipv6addr, INET6_ADDRSTRLEN), del.dest.len);
+ +
+ +    return 1;
+ +}
+ +
   /* enable or disable routing */
   void bgp_enable_routing(int enable)
   {
@@@ -818,68 -601,36 +800,68 @@@ int bgp_process(uint32_t events[]
                 continue;
         }
   
- -      /* process timers */
- -      if (peer->state == Established)
+ +      /* process pending IPv6 updates */
+ +      if (peer->update_routes6
+ +          && !peer->outbuf->packet.header.len) /* ditto */
         {
- -          if (time_now > peer->expire_time)
- -          {
- -              LOG(1, 0, 0, "No message from BGP peer %s in %ds\n",
- -                  peer->name, peer->hold);
- -
- -              bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0);
+ +          if (!bgp_send_update6(peer))
                 continue;
- -          }
- -
- -          if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len)
- -              bgp_send_keepalive(peer);
- -      }
- -      else if (peer->state == Idle)
- -      {
- -          if (time_now > peer->retry_time)
- -              bgp_connect(peer);
         }
- -      else if (time_now > peer->state_time + BGP_STATE_TIME)
+ +
+ +      /* process timers */
+ +      bgp_process_timers(peer);
+ +    }
+ +
+ +    return 1;
+ +}
+ +
+ +/* process bgp timers only */
+ +void bgp_process_peers_timers()
+ +{
+ +    int i;
+ +
+ +    if (!bgp_configured)
+ +      return;
+ +
+ +    for (i = 0; i < BGP_NUM_PEERS; i++)
+ +    {
+ +      struct bgp_peer *peer = &bgp_peers[i];
+ +
+ +      if (peer->state == Disabled)
+ +          continue;
+ +
+ +      bgp_process_timers(peer);
+ +    }
+ +}
+ +
+ +static void bgp_process_timers(struct bgp_peer *peer)
+ +{
+ +    if (peer->state == Established)
+ +    {
+ +      if (time_now > peer->expire_time)
         {
- -          LOG(1, 0, 0, "%s timer expired for BGP peer %s\n",
- -              bgp_state_str(peer->state), peer->name);
+ +          LOG(1, 0, 0, "No message from BGP peer %s in %ds\n",
+ +              peer->name, peer->hold);
   
- -          bgp_restart(peer);
+ +          bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0);
+ +          return;
         }
+ +
+ +      if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len)
+ +          bgp_send_keepalive(peer);
       }
+ +    else if (peer->state == Idle)
+ +    {
+ +      if (time_now > peer->retry_time)
+ +          bgp_connect(peer);
+ +    }
+ +    else if (time_now > peer->state_time + BGP_STATE_TIME)
+ +    {
+ +      LOG(1, 0, 0, "%s timer expired for BGP peer %s\n",
+ +          bgp_state_str(peer->state), peer->name);
   
- -    return 1;
+ +      bgp_restart(peer);
+ +    }
   }
   
   static void bgp_free_routes(struct bgp_route_list *routes)
@@@ -893,17 -644,6 +875,17 @@@
       }
   }
   
+ +static void bgp_free_routes6(struct bgp_route6_list *routes)
+ +{
+ +    struct bgp_route6_list *tmp;
+ +
+ +    while ((tmp = routes))
+ +    {
+ +      routes = tmp->next;
+ +      free(tmp);
+ +    }
+ +}
+ +
   char const *bgp_state_str(enum bgp_state state)
   {
       switch (state)
@@@ -938,7 -678,6 +920,7 @@@ static int bgp_connect(struct bgp_peer 
   {
       static int bgp_port = 0;
       struct sockaddr_in addr;
+ +    struct sockaddr_in source_addr;
       struct epoll_event ev;
   
       if (!bgp_port)
@@@ -970,19 -709,6 +952,19 @@@
       /* set to non-blocking */
       fcntl(peer->sock, F_SETFL, fcntl(peer->sock, F_GETFL, 0) | O_NONBLOCK);
   
+ +    /* set source address */
+ +    memset(&source_addr, 0, sizeof(source_addr));
+ +    source_addr.sin_family = AF_INET;
+ +    source_addr.sin_addr.s_addr = peer->source_addr; /* defaults to INADDR_ANY */
+ +    if (bind(peer->sock, (struct sockaddr *) &source_addr, sizeof(source_addr)) < 0)
+ +    {
+ +      LOG(1, 0, 0, "Can't set source address to %s: %s\n",
+ +          inet_ntoa(source_addr.sin_addr), strerror(errno));
+ +
+ +      bgp_set_retry(peer);
+ +      return 0;
+ +    }
+ +
       /* try connect */
       memset(&addr, 0, sizeof(addr));
       addr.sin_family = AF_INET;
@@@ -1158,12 -884,6 +1140,12 @@@ static int bgp_handle_input(struct bgp_
             struct bgp_data_open data;
             int hold;
             int i;
+ +          off_t param_offset, capability_offset;
+ +          struct bgp_opt_param *param;
+ +          uint8_t capabilities_len;
+ +          char *capabilities = NULL;
+ +          struct bgp_capability *capability;
+ +          struct bgp_mp_cap_param *mp_cap;
   
             for (i = 0; i < sizeof(p->header.marker); i++)
             {
@@@ -1226,93 -946,10 +1208,93 @@@
             if (peer->keepalive * 3 > peer->hold)
                 peer->keepalive = peer->hold / 3;
   
+ +          /* check for optional parameters */
+ +          /* 2 is for the size of type + len (both uint8_t) */
+ +          for (param_offset = 0;
+ +                  param_offset < data.opt_len;
+ +                  param_offset += 2 + param->len)
+ +          {
+ +              param = (struct bgp_opt_param *)((char *)&data.opt_params + param_offset);
+ +
+ +              /* sensible check */
+ +              if (data.opt_len - param_offset < 2
+ +                      || param->len > data.opt_len - param_offset - 2)
+ +              {
+ +                  LOG(1, 0, 0, "Malformed Optional Parameter list from BGP peer %s\n",
+ +                      peer->name);
+ +
+ +                  bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_UNSPEC);
+ +                  return 0;
+ +              }
+ +
+ +              /* we know only one parameter type */
+ +              if (param->type != BGP_PARAM_TYPE_CAPABILITY)
+ +              {
+ +                  LOG(1, 0, 0, "Unsupported Optional Parameter type %d from BGP peer %s\n",
+ +                      param->type, peer->name);
+ +
+ +                  bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_UNSUP_PARAM);
+ +                  return 0;
+ +              }
+ +
+ +              capabilities_len = param->len;
+ +              capabilities = (char *)&param->value;
+ +
+ +              /* look for BGP multiprotocol capability */
+ +              for (capability_offset = 0;
+ +                      capability_offset < capabilities_len;
+ +                      capability_offset += 2 + capability->len)
+ +              {
+ +                  capability = (struct bgp_capability *)(capabilities + capability_offset);
+ +
+ +                  /* sensible check */
+ +                  if (capabilities_len - capability_offset < 2
+ +                          || capability->len > capabilities_len - capability_offset - 2)
+ +                  {
+ +                      LOG(1, 0, 0, "Malformed Capabilities list from BGP peer %s\n",
+ +                          peer->name);
+ +
+ +                      bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_UNSPEC);
+ +                      return 0;
+ +                  }
+ +
+ +                  /* we only know one capability code */
+ +                  if (capability->code != BGP_CAP_CODE_MP
+ +                          && capability->len != sizeof(struct bgp_mp_cap_param))
+ +                  {
+ +                      LOG(4, 0, 0, "Unsupported Capability code %d from BGP peer %s\n",
+ +                          capability->code, peer->name);
+ +
+ +                      /* we don't terminate, still; we just jump to the next one */
+ +                      continue;
+ +                  }
+ +
+ +                  mp_cap = (struct bgp_mp_cap_param *)&capability->value;
+ +                  /* the only <AFI, SAFI> tuple we support */
+ +                  if (ntohs(mp_cap->afi) != BGP_MP_AFI_IPv6 && mp_cap->safi != BGP_MP_SAFI_UNICAST)
+ +                  {
+ +                      LOG(4, 0, 0, "Unsupported multiprotocol AFI %d and SAFI %d from BGP peer %s\n",
+ +                          mp_cap->afi, mp_cap->safi, peer->name);
+ +
+ +                      /* we don't terminate, still; we just jump to the next one */
+ +                      continue;
+ +                  }
+ +
+ +                  /* yes it can! */
+ +                  peer->mp_handling = HandleIPv6Routes;
+ +              }
+ +          }
+ +
+ +          if (peer->mp_handling != HandleIPv6Routes)
+ +          {
+ +              peer->mp_handling = DoesntHandleIPv6Routes;
+ +              if (config->ipv6_prefix.s6_addr[0])
+ +                  LOG(1, 0, 0, "Warning: BGP peer %s doesn't handle IPv6 prefixes updates\n",
+ +                          peer->name);
+ +          }
+ +
             /* next transition requires an exchange of keepalives */
             bgp_send_keepalive(peer);
- -
- -          /* FIXME: may need to check for optional params */
         }
   
         break;
@@@ -1341,30 -978,10 +1323,30 @@@
             if (notification->error_code == BGP_ERR_CEASE)
             {
                 LOG(4, 0, 0, "BGP peer %s sent CEASE\n", peer->name);
- -              bgp_restart(peer);
+ +              bgp_set_retry(peer);
                 return 0;
             }
   
+ +          if (notification->error_code == BGP_ERR_OPEN
+ +                  && notification->error_subcode == BGP_ERR_OPN_UNSUP_PARAM)
+ +          {
+ +              LOG(4, 0, 0, "BGP peer %s doesn't support BGP Capabilities\n", peer->name);
+ +              peer->mp_handling = DoesntHandleIPv6Routes;
+ +              bgp_set_retry(peer);
+ +              return 0;
+ +          }
+ +
+ +          if (notification->error_code == BGP_ERR_OPEN
+ +                  && notification->error_subcode == BGP_ERR_OPN_UNSUP_CAP)
+ +          {
+ +              /* the only capability we advertise is this one, so upon receiving
+ +                 an "unsupported capability" message, we disable IPv6 routes for
+ +                 this peer */
+ +              LOG(4, 0, 0, "BGP peer %s doesn't support IPv6 routes advertisement\n", peer->name);
+ +              peer->mp_handling = DoesntHandleIPv6Routes;
+ +              break;
+ +          }
+ +
             /* FIXME: should handle more notifications */
             LOG(4, 0, 0, "BGP peer %s sent unhandled NOTIFICATION %d\n",
                 peer->name, (int) notification->error_code);
@@@ -1395,9 -1012,6 +1377,9 @@@
   static int bgp_send_open(struct bgp_peer *peer)
   {
       struct bgp_data_open data;
+ +    struct bgp_mp_cap_param mp_ipv6 = { htons(BGP_MP_AFI_IPv6), 0, BGP_MP_SAFI_UNICAST };
+ +    struct bgp_capability cap_mp_ipv6;
+ +    struct bgp_opt_param param_cap_mp_ipv6;
       uint16_t len = sizeof(peer->outbuf->packet.header);
   
       memset(peer->outbuf->packet.header.marker, 0xff,
@@@ -1408,35 -1022,11 +1390,35 @@@
       data.version = BGP_VERSION;
       data.as = htons(our_as);
       data.hold_time = htons(peer->hold);
- -    data.identifier = my_address;
- -    data.opt_len = 0;
+ +    /* use the source IP we use as identifier, if available */
+ +    if (peer->source_addr != INADDR_ANY)
+ +      data.identifier = peer->source_addr;
+ +    else
+ +      data.identifier = my_address;
+ +
+ +    /* if we know peer doesn't support MP (mp_handling == DoesntHandleIPv6Routes)
+ +       then don't add this parameter */
+ +    if (config->ipv6_prefix.s6_addr[0]
+ +          && (peer->mp_handling == HandlingUnknown
+ +              || peer->mp_handling == HandleIPv6Routes))
+ +    {
+ +      /* construct the param and capability */
+ +      cap_mp_ipv6.code = BGP_CAP_CODE_MP;
+ +      cap_mp_ipv6.len = sizeof(mp_ipv6);
+ +      memcpy(&cap_mp_ipv6.value, &mp_ipv6, cap_mp_ipv6.len);
+ +
+ +      param_cap_mp_ipv6.type = BGP_PARAM_TYPE_CAPABILITY;
+ +      param_cap_mp_ipv6.len = 2 + sizeof(mp_ipv6);
+ +      memcpy(&param_cap_mp_ipv6.value, &cap_mp_ipv6, param_cap_mp_ipv6.len);
+ +
+ +      data.opt_len = 2 + param_cap_mp_ipv6.len;
+ +      memcpy(&data.opt_params, &param_cap_mp_ipv6, data.opt_len);
+ +    }
+ +    else
+ +      data.opt_len = 0;
   
- -    memcpy(peer->outbuf->packet.data, &data, BGP_DATA_OPEN_SIZE);
- -    len += BGP_DATA_OPEN_SIZE;
+ +    memcpy(peer->outbuf->packet.data, &data, BGP_DATA_OPEN_SIZE + data.opt_len);
+ +    len += BGP_DATA_OPEN_SIZE + data.opt_len;
   
       peer->outbuf->packet.header.len = htons(len);
       peer->outbuf->done = 0;
@@@ -1603,179 -1193,6 +1585,179 @@@ static int bgp_send_update(struct bgp_p
       return bgp_write(peer);
   }
   
+ +/* send/buffer UPDATE message for IPv6 routes */
+ +static int bgp_send_update6(struct bgp_peer *peer)
+ +{
+ +    uint16_t attr_len;
+ +    uint16_t unreach_len = 0;
+ +    char *unreach_len_pos;
+ +    uint8_t reach_len;
+ +    uint16_t len = sizeof(peer->outbuf->packet.header);
+ +    struct bgp_route6_list *have = peer->routes6;
+ +    struct bgp_route6_list *want = peer->routing ? bgp_routes6 : 0;
+ +    struct bgp_route6_list *e = 0;
+ +    struct bgp_route6_list *add = 0;
+ +    int s;
+ +    char ipv6addr[INET6_ADDRSTRLEN];
+ +
+ +    char *data = (char *) &peer->outbuf->packet.data;
+ +
+ +    /* need leave room for attr_len, bgp_path_attrs and one prefix */
+ +    char *max = (char *) &peer->outbuf->packet.data
+ +      + sizeof(peer->outbuf->packet.data)
+ +      - sizeof(attr_len) - peer->path_attr_len_without_nexthop
+ +      - BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE - sizeof(struct bgp_ip6_prefix);
+ +
+ +    memset(peer->outbuf->packet.header.marker, 0xff,
+ +      sizeof(peer->outbuf->packet.header.marker));
+ +
+ +    peer->outbuf->packet.header.type = BGP_MSG_UPDATE;
+ +
+ +    /* insert non-MP unfeasible routes length */
+ +    memcpy(data, &unreach_len, sizeof(unreach_len));
+ +    /* skip over it and attr_len too; it will be filled when known */
+ +    data += sizeof(unreach_len) + sizeof(attr_len);
+ +    len += sizeof(unreach_len) + sizeof(attr_len);
+ +
+ +    /* copy usual attributes */
+ +    memcpy(data, peer->path_attrs, peer->path_attr_len_without_nexthop);
+ +    data += peer->path_attr_len_without_nexthop;
+ +    attr_len = peer->path_attr_len_without_nexthop;
+ +
+ +    /* copy MP unreachable NLRI heading */
+ +    memcpy(data, peer->mp_unreach_nlri_partial,
+ +          BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE);
+ +    /* remember where to update this attr len */
+ +    unreach_len_pos = data + 2;
+ +    data += BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE;
+ +    attr_len += BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE;
+ +
+ +    peer->update_routes6 = 0; /* tentatively clear */
+ +
+ +    /* find differences */
+ +    while ((have || want) && data < (max - sizeof(struct bgp_ip6_prefix)))
+ +    {
+ +      if (have)
+ +          s = want
+ +              ? memcmp(&have->dest, &want->dest, sizeof(have->dest))
+ +              : -1;
+ +      else
+ +          s = 1;
+ +
+ +      if (s < 0) /* found one to delete */
+ +      {
+ +          struct bgp_route6_list *tmp = have;
+ +          have = have->next;
+ +
+ +          s = BGP_IP_PREFIX_SIZE(tmp->dest);
+ +          memcpy(data, &tmp->dest, s);
+ +          data += s;
+ +          unreach_len += s;
+ +          attr_len += s;
+ +
+ +          LOG(5, 0, 0, "Withdrawing route %s/%d from BGP peer %s\n",
+ +              inet_ntop(AF_INET6, &tmp->dest.prefix, ipv6addr, INET6_ADDRSTRLEN),
+ +              tmp->dest.len, peer->name);
+ +
+ +          free(tmp);
+ +
+ +          if (e)
+ +              e->next = have;
+ +          else
+ +              peer->routes6 = have;
+ +      }
+ +      else
+ +      {
+ +          if (!s) /* same */
+ +          {
+ +              e = have; /* stash the last found to relink above */
+ +              have = have->next;
+ +              want = want->next;
+ +          }
+ +          else if (s > 0) /* addition reqd. */
+ +          {
+ +              if (add)
+ +              {
+ +                  peer->update_routes6 = 1; /* only one add per packet */
+ +                  if (!have)
+ +                      break;
+ +              }
+ +              else
+ +                  add = want;
+ +
+ +              if (want)
+ +                  want = want->next;
+ +          }
+ +      }
+ +    }
+ +
+ +    if (have || want)
+ +      peer->update_routes6 = 1; /* more to do */
+ +
+ +    /* anything changed? */
+ +    if (!(unreach_len || add))
+ +      return 1;
+ +
+ +    if (unreach_len)
+ +    {
+ +      /* go back and insert MP unreach_len */
+ +      unreach_len += sizeof(struct bgp_attr_mp_unreach_nlri_partial);
+ +      unreach_len = htons(unreach_len);
+ +      memcpy(unreach_len_pos, &unreach_len, sizeof(unreach_len));
+ +    }
+ +    else
+ +    {
+ +      /* we can remove this attribute, then */
+ +      data -= BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE;
+ +      attr_len -= BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE;
+ +    }
+ +
+ +    if (add)
+ +    {
+ +      if (!(e = malloc(sizeof(*e))))
+ +      {
+ +          LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
+ +              inet_ntop(AF_INET6, &add->dest.prefix, ipv6addr, INET6_ADDRSTRLEN),
+ +              add->dest.len, strerror(errno));
+ +
+ +          return 0;
+ +      }
+ +
+ +      memcpy(e, add, sizeof(*e));
+ +      e->next = 0;
+ +      peer->routes6 = bgp_insert_route6(peer->routes6, e);
+ +
+ +      /* copy MP reachable NLRI heading */
+ +      memcpy(data, peer->mp_reach_nlri_partial,
+ +              BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE);
+ +      /* with proper len */
+ +      reach_len = BGP_IP_PREFIX_SIZE(add->dest);
+ +      data[2] = sizeof(struct bgp_attr_mp_reach_nlri_partial) + reach_len;
+ +      data += BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE;
+ +      attr_len += BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE;
+ +
+ +      memcpy(data, &add->dest, reach_len);
+ +      data += reach_len;
+ +      attr_len += reach_len;
+ +
+ +      LOG(5, 0, 0, "Advertising route %s/%d to BGP peer %s\n",
+ +          inet_ntop(AF_INET6, &add->dest.prefix, ipv6addr, INET6_ADDRSTRLEN),
+ +          add->dest.len, peer->name);
+ +    }
+ +
+ +    /* update len with attributes we added */
+ +    len += attr_len;
+ +
+ +    /* go back and insert attr_len */
+ +    attr_len = htons(attr_len);
+ +    memcpy((char *)&peer->outbuf->packet.data + 2, &attr_len, sizeof(attr_len));
+ +
+ +    peer->outbuf->packet.header.len = htons(len);
+ +    peer->outbuf->done = 0;
+ +
+ +    return bgp_write(peer);
+ +}
+ +
   /* send/buffer NOTIFICATION message */
   static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
       uint8_t subcode)
diff --combined bgp.h

index 44bad0d,f8b52d6..bc5a0e9
--- 1/bgp.h
--- 2/bgp.h
+++ b/bgp.h
@@@ -43,51 -43,11 +43,51 @@@ struct bgp_data_open 
       char opt_params[sizeof(((struct bgp_packet *)0)->data) - BGP_DATA_OPEN_SIZE]; /* variable */
   } __attribute__ ((packed));
   
+ +struct bgp_opt_param {
+ +    uint8_t type;
+ +    uint8_t len;
+ +#define BGP_MAX_OPT_PARAM_SIZE        256
+ +    char value[BGP_MAX_OPT_PARAM_SIZE];
+ +} __attribute__ ((packed));
+ +
+ +#define BGP_PARAM_TYPE_CAPABILITY     2
+ +struct bgp_capability {
+ +    uint8_t code;
+ +    uint8_t len;
+ +#define BGP_MAX_CAPABILITY_SIZE       256
+ +    char value[BGP_MAX_CAPABILITY_SIZE];
+ +} __attribute__ ((packed));
+ +
+ +/* RFC4760 Multiprotocol extension */
+ +#define BGP_CAP_CODE_MP       1
+ +
+ +struct bgp_mp_cap_param {
+ +    uint16_t afi; /* sa_family_t */
+ +    uint8_t reserved; /* SHOULD be 0 */
+ +    uint8_t safi;
+ +} __attribute__ ((packed));
+ +
+ +/* bgp_mp_cap_param.afi */
+ +#define BGP_MP_AFI_RESERVED   0
+ +#define BGP_MP_AFI_IPv4               1
+ +#define BGP_MP_AFI_IPv6               2
+ +/* bgp_mp_cap_param.safi */
+ +#define BGP_MP_SAFI_UNICAST   1
+ +#define BGP_MP_SAFI_MULTICAST 2
+ +
+ +struct bgp_ip6_prefix {
+ +    uint8_t len;
+ +    uint8_t prefix[16]; /* variable */
+ +} __attribute__ ((packed));
+ +
+ +/* end of RFC4760 specific definitions */
+ +
   struct bgp_ip_prefix {
       uint8_t len;
       uint32_t prefix; /* variable */
   } __attribute__ ((packed));
   
+ +/* works for both IPv4 and IPv6 prefixes */
   #define BGP_IP_PREFIX_SIZE(p) (1 + ((p).len / 8) + ((p).len % 8 != 0))
   
   struct bgp_path_attr {
@@@ -105,22 -65,6 +105,22 @@@
       } data; /* variable */
   } __attribute__ ((packed));
   
+ +struct bgp_attr_mp_reach_nlri_partial {
+ +    uint16_t afi; /* sa_family_t */
+ +    uint8_t safi;
+ +    uint8_t next_hop_len;
+ +    uint8_t next_hop[16];
+ +    uint8_t reserved;
+ +} __attribute__ ((packed));
+ +#define BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE (3 + sizeof(struct bgp_attr_mp_reach_nlri_partial))
+ +
+ +struct bgp_attr_mp_unreach_nlri_partial {
+ +    uint16_t afi; /* sa_family_t */
+ +    uint8_t safi;
+ +} __attribute__ ((packed));
+ +/* we use it as an extended attribute */
+ +#define BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE (4 + sizeof(struct bgp_attr_mp_unreach_nlri_partial))
+ +
   /* bgp_path_attr.flags (bitfields) */
   #define BGP_PATH_ATTR_FLAG_OPTIONAL   (1 << 7)
   #define BGP_PATH_ATTR_FLAG_TRANS      (1 << 6)
@@@ -141,11 -85,9 +141,11 @@@
   #define BGP_PATH_ATTR_CODE_ATOMIC_AGGREGATE   6       /* well-known, discretionary */
   #define BGP_PATH_ATTR_CODE_AGGREGATOR         7       /* optional, transitive */
   #define BGP_PATH_ATTR_CODE_COMMUNITIES                8       /* optional, transitive (RFC1997) */
+ +#define BGP_PATH_ATTR_CODE_MP_REACH_NLRI      14      /* optional, non-transitive (RFC4760) */
+ +#define BGP_PATH_ATTR_CODE_MP_UNREACH_NLRI    15      /* optional, non-transitive (RFC4760) */
   
   #define BGP_PATH_ATTR_SIZE(p) ((((p).flags & BGP_PATH_ATTR_FLAG_EXTLEN) \
- -    ? ((p).data.e.len + 1) : (p).data.s.len) + 3)
+ +    ? ((p).data.e.len + 4) : (p).data.s.len) + 3)
   
   /* well known COMMUNITIES */
   #define BGP_COMMUNITY_NO_EXPORT                       0xffffff01      /* don't advertise outside confederation */
@@@ -159,7 -101,6 +159,7 @@@ struct bgp_data_notification 
   } __attribute__ ((packed));
   
   /* bgp_data_notification.error_code, .error_subcode */
+ +#define BGP_ERR_UNSPEC                        0
   #define BGP_ERR_HEADER                        1
   #  define BGP_ERR_HDR_NOT_SYNC                  1
   #  define BGP_ERR_HDR_BAD_LEN           2
@@@ -171,7 -112,6 +171,7 @@@
   #  define BGP_ERR_OPN_UNSUP_PARAM       4
   #  define BGP_ERR_OPN_AUTH_FAILURE      5
   #  define BGP_ERR_OPN_HOLD_TIME                 6
+ +#  define BGP_ERR_OPN_UNSUP_CAP                 7
   #define BGP_ERR_UPDATE                        3
   #  define BGP_ERR_UPD_BAD_ATTR_LIST     1
   #  define BGP_ERR_UPD_UNKN_WK_ATTR      2
@@@ -198,11 -138,6 +198,11 @@@ enum bgp_state 
       Established,                      /* established */
   };
   
+ +struct bgp_route6_list {
+ +    struct bgp_ip6_prefix dest;
+ +    struct bgp_route6_list *next;
+ +};
+ +
   struct bgp_route_list {
       struct bgp_ip_prefix dest;
       struct bgp_route_list *next;
@@@ -213,17 -148,10 +213,17 @@@ struct bgp_buf 
       size_t done;                      /* bytes sent/recvd */
   };
   
+ +enum bgp_mp_handling {
+ +    HandleIPv6Routes,
+ +    DoesntHandleIPv6Routes,
+ +    HandlingUnknown,
+ +};
+ +
   /* state */
   struct bgp_peer {
       char name[32];                    /* peer name */
       in_addr_t addr;                   /* peer address */
+ +    in_addr_t source_addr;            /* our source address */
       int as;                           /* AS number */
       int sock;
       enum bgp_state state;             /* FSM state */
@@@ -245,14 -173,8 +245,14 @@@
       int cli_flag;                     /* updates requested from CLI */
       char *path_attrs;                 /* path attrs to send in UPDATE message */
       int path_attr_len;                        /* length of path attrs */
+ +    int path_attr_len_without_nexthop;        /* length of path attrs  without NEXT_HOP */
       uint32_t events;                  /* events to poll */
       struct event_data edata;          /* poll data */
+ +    enum bgp_mp_handling mp_handling; /* how it handles IPv6 routes advertisements */
+ +    int update_routes6;                       /* UPDATE required for IPv6 routes */
+ +    struct bgp_route6_list *routes6;  /* IPv6 routes known by this peer */
+ +    char mp_reach_nlri_partial[BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE];
+ +    char mp_unreach_nlri_partial[BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE];
   };
   
   /* bgp_peer.cli_flag */
@@@ -266,19 -188,16 +266,19 @@@ extern int bgp_configured
   /* actions */
   int bgp_setup(int as);
   int bgp_start(struct bgp_peer *peer, char *name, int as, int keepalive,
- -    int hold, int enable);
+ +    int hold, struct in_addr update_source, int enable);
   
   void bgp_stop(struct bgp_peer *peer);
   void bgp_halt(struct bgp_peer *peer);
   int bgp_restart(struct bgp_peer *peer);
- int bgp_add_route(in_addr_t ip, in_addr_t mask);
+ int bgp_add_route(in_addr_t ip, int prefixlen);
+ +int bgp_add_route6(struct in6_addr ip, int prefixlen);
- int bgp_del_route(in_addr_t ip, in_addr_t mask);
+ int bgp_del_route(in_addr_t ip, int prefixlen);
+ +int bgp_del_route6(struct in6_addr ip, int prefixlen);
   void bgp_enable_routing(int enable);
   int bgp_set_poll(void);
   int bgp_process(uint32_t events[]);
+ +void bgp_process_peers_timers();
   char const *bgp_state_str(enum bgp_state state);
   
   extern char const *cvs_id_bgp;
diff --combined l2tpns.c

index f2a60c9,c34dc63..581198d
--- 1/l2tpns.c
--- 2/l2tpns.c
+++ b/l2tpns.c
@@@ -14,7 -14,6 +14,6 @@@ char const *cvs_id_l2tpns = "$Id: l2tpn
   #define SYSLOG_NAMES
   #include <syslog.h>
   #include <malloc.h>
- #include <math.h>
   #include <net/route.h>
   #include <sys/mman.h>
   #include <netdb.h>
@@@ -39,6 -38,8 +38,8 @@@
   #include <sched.h>
   #include <sys/sysinfo.h>
   #include <libcli.h>
+ #include <linux/netlink.h>
+ #include <linux/rtnetlink.h>
   
   #include "md5.h"
   #include "l2tpns.h"
@@@ -56,6 -57,7 +57,7 @@@
   
   // Globals
   configt *config = NULL;               // all configuration
+ int nlfd = -1;                        // netlink socket
   int tunfd = -1;                       // tun interface file handle. (network device)
   int udpfd = -1;                       // UDP file handle
   int controlfd = -1;           // Control signal handle
@@@ -63,14 -65,14 +65,14 @@@ int clifd = -1;                    // Socket listening f
   int daefd = -1;                       // Socket listening for DAE connections.
   int snoopfd = -1;             // UDP file handle for sending out intercept data
   int *radfds = NULL;           // RADIUS requests file handles
- int ifrfd = -1;                       // File descriptor for routing, etc
- int ifr6fd = -1;              // File descriptor for IPv6 routing, etc
   int rand_fd = -1;             // Random data source
   int cluster_sockfd = -1;      // Intra-cluster communications socket.
   int epollfd = -1;             // event polling
   time_t basetime = 0;          // base clock
- -char hostname[1000] = "";     // us.
+ +char hostname[MAXHOSTNAME] = "";      // us.
   static int tunidx;            // ifr_ifindex of tun device
+ int nlseqnum = 0;             // netlink sequence number
+ int min_initok_nlseqnum = 0;  // minimun seq number for messages after init is ok
   static int syslog_log = 0;    // are we logging to syslog
   static FILE *log_stream = 0;  // file handle for direct logging (i.e. direct into file, not via syslog).
   uint32_t last_id = 0;         // Unique ID for radius accounting
@@@ -157,10 -159,6 +159,10 @@@ config_descriptt config_values[] = 
         CONFIG("cluster_hb_timeout", cluster_hb_timeout, INT),
         CONFIG("cluster_master_min_adv", cluster_master_min_adv, INT),
         CONFIG("ipv6_prefix", ipv6_prefix, IPv6),
+ +      CONFIG("cli_bind_address", cli_bind_address, IPv4),
+ +      CONFIG("hostname", hostname, STRING),
+ +      CONFIG("nexthop_address", nexthop_address, IPv4),
+ +      CONFIG("nexthop6_address", nexthop6_address, IPv6),
         { NULL, 0, 0, 0 },
   };
   
@@@ -168,6 -166,8 +170,6 @@@ static char *plugin_functions[] = 
         NULL,
         "plugin_pre_auth",
         "plugin_post_auth",
- -      "plugin_packet_rx",
- -      "plugin_packet_tx",
         "plugin_timer",
         "plugin_new_session",
         "plugin_kill_session",
@@@ -199,6 -199,8 +201,8 @@@ struct Tstats *_statistics = NULL
   struct Tringbuffer *ringbuffer = NULL;
   #endif
   
+ static ssize_t netlink_send(struct nlmsghdr *nh);
+ static void netlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen);
   static void cache_ipmap(in_addr_t ip, sessionidt s);
   static void uncache_ipmap(in_addr_t ip);
   static void cache_ipv6map(struct in6_addr ip, int prefixlen, sessionidt s);
@@@ -418,43 -420,61 +422,61 @@@ void random_data(uint8_t *buf, int len
   // via BGP if enabled, and stuffs it into the
   // 'sessionbyip' cache.
   //
- // 'ip' and 'mask' must be in _host_ order.
+ // 'ip' must be in _host_ order.
   //
- static void routeset(sessionidt s, in_addr_t ip, in_addr_t mask, in_addr_t gw, int add)
+ static void routeset(sessionidt s, in_addr_t ip, int prefixlen, in_addr_t gw, int add)
   {
-       struct rtentry r;
+       struct {
+               struct nlmsghdr nh;
+               struct rtmsg rt;
+               char buf[32];
+       } req;
         int i;
+       in_addr_t n_ip;
   
-       if (!mask) mask = 0xffffffff;
+       if (!prefixlen) prefixlen = 32;
   
-       ip &= mask;             // Force the ip to be the first one in the route.
+       ip &= 0xffffffff << (32 - prefixlen);;  // Force the ip to be the first one in the route.
   
-       memset(&r, 0, sizeof(r));
-       r.rt_dev = config->tundevice;
-       r.rt_dst.sa_family = AF_INET;
-       *(uint32_t *) & (((struct sockaddr_in *) &r.rt_dst)->sin_addr.s_addr) = htonl(ip);
-       r.rt_gateway.sa_family = AF_INET;
-       *(uint32_t *) & (((struct sockaddr_in *) &r.rt_gateway)->sin_addr.s_addr) = htonl(gw);
-       r.rt_genmask.sa_family = AF_INET;
-       *(uint32_t *) & (((struct sockaddr_in *) &r.rt_genmask)->sin_addr.s_addr) = htonl(mask);
-       r.rt_flags = (RTF_UP | RTF_STATIC);
+       memset(&req, 0, sizeof(req));
+ 
+       if (add)
+       {
+               req.nh.nlmsg_type = RTM_NEWROUTE;
+               req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE;
+       }
+       else
+               req.nh.nlmsg_type = RTM_DELROUTE;
+       req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.rt));
+ 
+       req.rt.rtm_family = AF_INET;
+       req.rt.rtm_dst_len = prefixlen;
+       req.rt.rtm_table = RT_TABLE_MAIN;
+       req.rt.rtm_protocol = RTPROT_BOOT; // XXX
+       req.rt.rtm_scope = RT_SCOPE_LINK;
+       req.rt.rtm_type = RTN_UNICAST;
+ 
+       netlink_addattr(&req.nh, RTA_OIF, &tunidx, sizeof(int));
+       n_ip = htonl(ip);
+       netlink_addattr(&req.nh, RTA_DST, &n_ip, sizeof(n_ip));
         if (gw)
-               r.rt_flags |= RTF_GATEWAY;
-       else if (mask == 0xffffffff)
-               r.rt_flags |= RTF_HOST;
+       {
+               n_ip = htonl(gw);
+               netlink_addattr(&req.nh, RTA_GATEWAY, &n_ip, sizeof(n_ip));
+       }
   
-       LOG(1, s, 0, "Route %s %s/%s%s%s\n", add ? "add" : "del",
-           fmtaddr(htonl(ip), 0), fmtaddr(htonl(mask), 1),
+       LOG(1, s, 0, "Route %s %s/%d%s%s\n", add ? "add" : "del",
+           fmtaddr(htonl(ip), 0), prefixlen,
             gw ? " via" : "", gw ? fmtaddr(htonl(gw), 2) : "");
   
-       if (ioctl(ifrfd, add ? SIOCADDRT : SIOCDELRT, (void *) &r) < 0)
-               LOG(0, 0, 0, "routeset() error in ioctl: %s\n", strerror(errno));
+       if (netlink_send(&req.nh) < 0)
+               LOG(0, 0, 0, "routeset() error in sending netlink message: %s\n", strerror(errno));
   
   #ifdef BGP
         if (add)
-               bgp_add_route(htonl(ip), htonl(mask));
+               bgp_add_route(htonl(ip), prefixlen);
         else
-               bgp_del_route(htonl(ip), htonl(mask));
+               bgp_del_route(htonl(ip), prefixlen);
   #endif /* BGP */
   
                 // Add/Remove the IPs to the 'sessionbyip' cache.
@@@ -470,45 -490,59 +492,64 @@@
                 if (!add)       // Are we deleting a route?
                         s = 0;  // Caching the session as '0' is the same as uncaching.
   
-               for (i = ip; (i&mask) == (ip&mask) ; ++i)
+               for (i = ip; i < ip+(1<<(32-prefixlen)) ; ++i)
                         cache_ipmap(i, s);
         }
   }
   
   void route6set(sessionidt s, struct in6_addr ip, int prefixlen, int add)
   {
-       struct in6_rtmsg rt;
+       struct {
+               struct nlmsghdr nh;
+               struct rtmsg rt;
+               char buf[64];
+       } req;
+       int metric;
         char ipv6addr[INET6_ADDRSTRLEN];
   
-       if (ifr6fd < 0)
+       if (!config->ipv6_prefix.s6_addr[0])
         {
                 LOG(0, 0, 0, "Asked to set IPv6 route, but IPv6 not setup.\n");
                 return;
         }
   
-       memset(&rt, 0, sizeof(rt));
+       memset(&req, 0, sizeof(req));
   
-       memcpy(&rt.rtmsg_dst, &ip, sizeof(struct in6_addr));
-       rt.rtmsg_dst_len = prefixlen;
-       rt.rtmsg_metric = 1;
-       rt.rtmsg_flags = RTF_UP;
-       rt.rtmsg_ifindex = tunidx;
+       if (add)
+       {
+               req.nh.nlmsg_type = RTM_NEWROUTE;
+               req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE;
+       }
+       else
+               req.nh.nlmsg_type = RTM_DELROUTE;
+       req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.rt));
+ 
+       req.rt.rtm_family = AF_INET6;
+       req.rt.rtm_dst_len = prefixlen;
+       req.rt.rtm_table = RT_TABLE_MAIN;
+       req.rt.rtm_protocol = RTPROT_BOOT; // XXX
+       req.rt.rtm_scope = RT_SCOPE_LINK;
+       req.rt.rtm_type = RTN_UNICAST;
+ 
+       netlink_addattr(&req.nh, RTA_OIF, &tunidx, sizeof(int));
+       netlink_addattr(&req.nh, RTA_DST, &ip, sizeof(ip));
+       metric = 1;
+       netlink_addattr(&req.nh, RTA_METRICS, &metric, sizeof(metric));
   
         LOG(1, 0, 0, "Route %s %s/%d\n",
             add ? "add" : "del",
             inet_ntop(AF_INET6, &ip, ipv6addr, INET6_ADDRSTRLEN),
             prefixlen);
   
-       if (ioctl(ifr6fd, add ? SIOCADDRT : SIOCDELRT, (void *) &rt) < 0)
-               LOG(0, 0, 0, "route6set() error in ioctl: %s\n",
-                               strerror(errno));
+       if (netlink_send(&req.nh) < 0)
+               LOG(0, 0, 0, "route6set() error in sending netlink message: %s\n", strerror(errno));
   
- -      // FIXME: need to add BGP routing (RFC2858)
+ +#ifdef BGP
+ +      if (add)
+ +              bgp_add_route6(ip, prefixlen);
+ +      else
+ +              bgp_del_route6(ip, prefixlen);
+ +#endif /* BGP */
   
         if (s)
         {
@@@ -521,21 -555,95 +562,95 @@@
         return;
   }
   
- // defined in linux/ipv6.h, but tricky to include from user-space
- // TODO: move routing to use netlink rather than ioctl
- struct in6_ifreq {
-       struct in6_addr ifr6_addr;
-       __u32 ifr6_prefixlen;
-       unsigned int ifr6_ifindex;
+ //
+ // Set up netlink socket
+ static void initnetlink(void)
+ {
+       struct sockaddr_nl nladdr;
+ 
+       nlfd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+       if (nlfd < 0)
+       {
+               LOG(0, 0, 0, "Can't create netlink socket: %s\n", strerror(errno));
+               exit(1);
+       }
+ 
+       memset(&nladdr, 0, sizeof(nladdr));
+       nladdr.nl_family = AF_NETLINK;
+       nladdr.nl_pid = getpid();
+ 
+       if (bind(nlfd, (struct sockaddr *)&nladdr, sizeof(nladdr)) < 0)
+       {
+               LOG(0, 0, 0, "Can't bind netlink socket: %s\n", strerror(errno));
+               exit(1);
+       }
+ }
+ 
+ static ssize_t netlink_send(struct nlmsghdr *nh)
+ {
+       struct sockaddr_nl nladdr;
+       struct iovec iov;
+       struct msghdr msg;
+ 
+       nh->nlmsg_pid = getpid();
+       nh->nlmsg_seq = ++nlseqnum;
+ 
+       // set kernel address
+       memset(&nladdr, 0, sizeof(nladdr));
+       nladdr.nl_family = AF_NETLINK;
+ 
+       iov = (struct iovec){ (void *)nh, nh->nlmsg_len };
+       msg = (struct msghdr){ (void *)&nladdr, sizeof(nladdr), &iov, 1, NULL, 0, 0 };
+ 
+       return sendmsg(nlfd, &msg, 0);
+ }
+ 
+ static ssize_t netlink_recv(void *buf, ssize_t len)
+ {
+       struct sockaddr_nl nladdr;
+       struct iovec iov;
+       struct msghdr msg;
+ 
+       // set kernel address
+       memset(&nladdr, 0, sizeof(nladdr));
+       nladdr.nl_family = AF_NETLINK;
+ 
+       iov = (struct iovec){ buf, len };
+       msg = (struct msghdr){ (void *)&nladdr, sizeof(nladdr), &iov, 1, NULL, 0, 0 };
+ 
+       return recvmsg(nlfd, &msg, 0);
+ }
+ 
+ /* adapted from iproute2 */
+ static void netlink_addattr(struct nlmsghdr *nh, int type, const void *data, int alen)
+ {
+       int len = RTA_LENGTH(alen);
+       struct rtattr *rta;
+ 
+       rta = (struct rtattr *)(((void *)nh) + NLMSG_ALIGN(nh->nlmsg_len));
+       rta->rta_type = type;
+       rta->rta_len = len;
+       memcpy(RTA_DATA(rta), data, alen);
+       nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + RTA_ALIGN(len);
+ }
+ 
+ // messages corresponding to different phases seq number
+ static char *tun_nl_phase_msg[] = {
+       "initialized",
+       "getting tun interface index",
+       "setting tun interface parameters",
+       "setting tun IPv4 address",
+       "setting tun LL IPv6 address",
+       "setting tun global IPv6 address",
   };
   
   //
   // Set up TUN interface
   static void inittun(void)
   {
+       struct ifinfomsg ifinfo;
         struct ifreq ifr;
-       struct in6_ifreq ifr6;
-       struct sockaddr_in sin = {0};
+ 
         memset(&ifr, 0, sizeof(ifr));
         ifr.ifr_flags = IFF_TUN;
   
@@@ -554,75 -662,163 +669,163 @@@
                 LOG(0, 0, 0, "Can't set tun interface: %s\n", strerror(errno));
                 exit(1);
         }
-       assert(strlen(ifr.ifr_name) < sizeof(config->tundevice));
-       strncpy(config->tundevice, ifr.ifr_name, sizeof(config->tundevice) - 1);
-       ifrfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
- 
-       sin.sin_family = AF_INET;
-       sin.sin_addr.s_addr = config->bind_address ? config->bind_address : 0x01010101; // 1.1.1.1
-       memcpy(&ifr.ifr_addr, &sin, sizeof(struct sockaddr));
+       assert(strlen(ifr.ifr_name) < sizeof(config->tundevice) - 1);
+       strncpy(config->tundevice, ifr.ifr_name, sizeof(config->tundevice));
   
-       if (ioctl(ifrfd, SIOCSIFADDR, (void *) &ifr) < 0)
         {
-               LOG(0, 0, 0, "Error setting tun address: %s\n", strerror(errno));
-               exit(1);
-       }
-       /* Bump up the qlen to deal with bursts from the network */
-       ifr.ifr_qlen = 1000;
-       if (ioctl(ifrfd, SIOCSIFTXQLEN, (void *) &ifr) < 0)
-       {
-               LOG(0, 0, 0, "Error setting tun queue length: %s\n", strerror(errno));
-               exit(1);
-       }
-       /* set MTU to modem MRU */
-       ifr.ifr_mtu = MRU;
-       if (ioctl(ifrfd, SIOCSIFMTU, (void *) &ifr) < 0)
-       {
-               LOG(0, 0, 0, "Error setting tun MTU: %s\n", strerror(errno));
-               exit(1);
-       }
-       ifr.ifr_flags = IFF_UP;
-       if (ioctl(ifrfd, SIOCSIFFLAGS, (void *) &ifr) < 0)
-       {
-               LOG(0, 0, 0, "Error setting tun flags: %s\n", strerror(errno));
-               exit(1);
-       }
-       if (ioctl(ifrfd, SIOCGIFINDEX, (void *) &ifr) < 0)
-       {
-               LOG(0, 0, 0, "Error getting tun ifindex: %s\n", strerror(errno));
-               exit(1);
-       }
-       tunidx = ifr.ifr_ifindex;
+               // get the interface index
+               struct {
+                       struct nlmsghdr nh;
+                       struct ifinfomsg ifinfo;
+               } req;
+               char buf[4096];
+               ssize_t len;
+               struct nlmsghdr *resp_nh;
+ 
+               req.nh.nlmsg_type = RTM_GETLINK;
+               req.nh.nlmsg_flags = NLM_F_REQUEST;
+               req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifinfo));
   
-       // Only setup IPv6 on the tun device if we have a configured prefix
-       if (config->ipv6_prefix.s6_addr[0]) {
-               ifr6fd = socket(PF_INET6, SOCK_DGRAM, 0);
+               req.ifinfo.ifi_family = AF_UNSPEC; // as the man says
   
-               // Link local address is FE80::1
-               memset(&ifr6.ifr6_addr, 0, sizeof(ifr6.ifr6_addr));
-               ifr6.ifr6_addr.s6_addr[0] = 0xFE;
-               ifr6.ifr6_addr.s6_addr[1] = 0x80;
-               ifr6.ifr6_addr.s6_addr[15] = 1;
-               ifr6.ifr6_prefixlen = 64;
-               ifr6.ifr6_ifindex = ifr.ifr_ifindex;
-               if (ioctl(ifr6fd, SIOCSIFADDR, (void *) &ifr6) < 0)
+               netlink_addattr(&req.nh, IFLA_IFNAME, config->tundevice, strlen(config->tundevice)+1);
+ 
+               if(netlink_send(&req.nh) < 0 || (len = netlink_recv(buf, sizeof(buf))) < 0)
                 {
-                       LOG(0, 0, 0, "Error setting tun IPv6 link local address:"
-                               " %s\n", strerror(errno));
+                       LOG(0, 0, 0, "Error getting tun ifindex: %s\n", strerror(errno));
+                       exit(1);
                 }
   
-               // Global address is prefix::1
-               memset(&ifr6.ifr6_addr, 0, sizeof(ifr6.ifr6_addr));
-               ifr6.ifr6_addr = config->ipv6_prefix;
-               ifr6.ifr6_addr.s6_addr[15] = 1;
-               ifr6.ifr6_prefixlen = 64;
-               ifr6.ifr6_ifindex = ifr.ifr_ifindex;
-               if (ioctl(ifr6fd, SIOCSIFADDR, (void *) &ifr6) < 0)
+               resp_nh = (struct nlmsghdr *)buf;
+               if (!NLMSG_OK (resp_nh, len))
                 {
-                       LOG(0, 0, 0, "Error setting tun IPv6 global address: %s\n",
-                               strerror(errno));
+                       LOG(0, 0, 0, "Malformed answer getting tun ifindex %ld\n", len);
+                       exit(1);
+               }
+ 
+               memcpy(&ifinfo, NLMSG_DATA(resp_nh), sizeof(ifinfo));
+               // got index
+               tunidx = ifinfo.ifi_index;
+       }
+       {
+               struct {
+                       // interface setting
+                       struct nlmsghdr nh;
+                       union {
+                               struct ifinfomsg ifinfo;
+                               struct ifaddrmsg ifaddr;
+                       } ifmsg;
+                       char rtdata[32]; // 32 should be enough
+               } req;
+               uint32_t txqlen, mtu;
+               in_addr_t ip;
+ 
+               memset(&req, 0, sizeof(req));
+ 
+               req.nh.nlmsg_type = RTM_SETLINK;
+               req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_MULTI;
+               req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifinfo));
+ 
+               req.ifmsg.ifinfo = ifinfo;
+               req.ifmsg.ifinfo.ifi_flags |= IFF_UP; // set interface up
+               req.ifmsg.ifinfo.ifi_change = IFF_UP; // only change this flag
+ 
+               /* Bump up the qlen to deal with bursts from the network */
+               txqlen = 1000;
+               netlink_addattr(&req.nh, IFLA_TXQLEN, &txqlen, sizeof(txqlen));
+               /* set MTU to modem MRU */
+               mtu = MRU;
+               netlink_addattr(&req.nh, IFLA_MTU, &mtu, sizeof(mtu));
+ 
+               if (netlink_send(&req.nh) < 0)
+                       goto senderror;
+ 
+               memset(&req, 0, sizeof(req));
+ 
+               req.nh.nlmsg_type = RTM_NEWADDR;
+               req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI;
+               req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifaddr));
+ 
+               req.ifmsg.ifaddr.ifa_family = AF_INET;
+               req.ifmsg.ifaddr.ifa_prefixlen = 32;
+               req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_UNIVERSE;
+               req.ifmsg.ifaddr.ifa_index = ifinfo.ifi_index;
+ 
+               if (config->bind_address)
+                       ip = config->bind_address;
+               else
+                       ip = 0x01010101; // 1.1.1.1
+               netlink_addattr(&req.nh, IFA_LOCAL, &ip, sizeof(ip));
+ 
+               if (netlink_send(&req.nh) < 0)
+                       goto senderror;
+ 
+               // Only setup IPv6 on the tun device if we have a configured prefix
+               if (config->ipv6_prefix.s6_addr[0]) {
+                       struct in6_addr ip6;
+ 
+                       memset(&req, 0, sizeof(req));
+ 
+                       req.nh.nlmsg_type = RTM_NEWADDR;
+                       req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI;
+                       req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifaddr));
+ 
+                       req.ifmsg.ifaddr.ifa_family = AF_INET6;
+                       req.ifmsg.ifaddr.ifa_prefixlen = 64;
+                       req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_LINK;
+                       req.ifmsg.ifaddr.ifa_index = ifinfo.ifi_index;
+ 
+                       // Link local address is FE80::1
+                       memset(&ip6, 0, sizeof(ip6));
+                       ip6.s6_addr[0] = 0xFE;
+                       ip6.s6_addr[1] = 0x80;
+                       ip6.s6_addr[15] = 1;
+                       netlink_addattr(&req.nh, IFA_LOCAL, &ip6, sizeof(ip6));
+ 
+                       if (netlink_send(&req.nh) < 0)
+                               goto senderror;
+ 
+                       memset(&req, 0, sizeof(req));
+ 
+                       req.nh.nlmsg_type = RTM_NEWADDR;
+                       req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_REPLACE | NLM_F_MULTI;
+                       req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.ifmsg.ifaddr));
+ 
+                       req.ifmsg.ifaddr.ifa_family = AF_INET6;
+                       req.ifmsg.ifaddr.ifa_prefixlen = 64;
+                       req.ifmsg.ifaddr.ifa_scope = RT_SCOPE_UNIVERSE;
+                       req.ifmsg.ifaddr.ifa_index = ifinfo.ifi_index;
+ 
+                       // Global address is prefix::1
+                       ip6 = config->ipv6_prefix;
+                       ip6.s6_addr[15] = 1;
+                       netlink_addattr(&req.nh, IFA_LOCAL, &ip6, sizeof(ip6));
+ 
+                       if (netlink_send(&req.nh) < 0)
+                               goto senderror;
                 }
+ 
+               memset(&req, 0, sizeof(req));
+ 
+               req.nh.nlmsg_type = NLMSG_DONE;
+               req.nh.nlmsg_len = NLMSG_LENGTH(0);
+ 
+               if (netlink_send(&req.nh) < 0)
+                       goto senderror;
+ 
+               // if we get an error for seqnum < min_initok_nlseqnum,
+               // we must exit as initialization went wrong
+               if (config->ipv6_prefix.s6_addr[0])
+                       min_initok_nlseqnum = 5 + 1; // idx + if + addr + 2*addr6
+               else
+                       min_initok_nlseqnum = 3 + 1; // idx + if + addr
         }
+ 
+       return;
+ 
+ senderror:
+       LOG(0, 0, 0, "Error while setting up tun device: %s\n", strerror(errno));
+       exit(1);
   }
   
   // set up UDP ports
@@@ -1768,11 -1964,11 +1971,11 @@@ void sessionshutdown(sessionidt s, cha
                 int routed = 0;
                 for (r = 0; r < MAXROUTE && session[s].route[r].ip; r++)
                 {
-                       if ((session[s].ip & session[s].route[r].mask) ==
-                           (session[s].route[r].ip & session[s].route[r].mask))
+                       if ((session[s].ip >> (32-session[s].route[r].prefixlen)) ==
+                           (session[s].route[r].ip >> (32-session[s].route[r].prefixlen)))
                                 routed++;
   
-                       if (del_routes) routeset(s, session[s].route[r].ip, session[s].route[r].mask, 0, 0);
+                       if (del_routes) routeset(s, session[s].route[r].ip, session[s].route[r].prefixlen, 0, 0);
                         session[s].route[r].ip = 0;
                 }
   
@@@ -3404,36 -3600,35 +3607,36 @@@ static int still_busy(void
         static clockt last_talked = 0;
         static clockt start_busy_wait = 0;
   
- -      if (!config->cluster_iam_master)
- -      {
   #ifdef BGP
- -              static time_t stopped_bgp = 0;
- -              if (bgp_configured)
+ +      static time_t stopped_bgp = 0;
+ +      if (bgp_configured)
+ +      {
+ +              if (!stopped_bgp)
                 {
- -                      if (!stopped_bgp)
- -                      {
- -                              LOG(1, 0, 0, "Shutting down in %d seconds, stopping BGP...\n", QUIT_DELAY);
+ +                      LOG(1, 0, 0, "Shutting down in %d seconds, stopping BGP...\n", QUIT_DELAY);
   
- -                              for (i = 0; i < BGP_NUM_PEERS; i++)
- -                                      if (bgp_peers[i].state == Established)
- -                                              bgp_stop(&bgp_peers[i]);
+ +                      for (i = 0; i < BGP_NUM_PEERS; i++)
+ +                              if (bgp_peers[i].state == Established)
+ +                                      bgp_stop(&bgp_peers[i]);
   
- -                              stopped_bgp = time_now;
+ +                      stopped_bgp = time_now;
   
+ +                      if (!config->cluster_iam_master)
+ +                      {
                                 // we don't want to become master
                                 cluster_send_ping(0);
   
                                 return 1;
                         }
- -
- -                      if (time_now < (stopped_bgp + QUIT_DELAY))
- -                              return 1;
                 }
+ +
+ +              if (!config->cluster_iam_master && time_now < (stopped_bgp + QUIT_DELAY))
+ +                      return 1;
+ +      }
   #endif /* BGP */
   
+ +      if (!config->cluster_iam_master)
                 return 0;
- -      }
   
         if (main_quit == QUIT_SHUTDOWN)
         {
@@@ -3499,8 -3694,8 +3702,8 @@@
   # include "fake_epoll.h"
   #endif
   
- // the base set of fds polled: cli, cluster, tun, udp, control, dae
- #define BASE_FDS      6
+ // the base set of fds polled: cli, cluster, tun, udp, control, dae, netlink
+ #define BASE_FDS      7
   
   // additional polled fds
   #ifdef BGP
@@@ -3524,8 -3719,8 +3727,8 @@@ static void mainloop(void
                 exit(1);
         }
   
-       LOG(4, 0, 0, "Beginning of main loop.  clifd=%d, cluster_sockfd=%d, tunfd=%d, udpfd=%d, controlfd=%d, daefd=%d\n",
-               clifd, cluster_sockfd, tunfd, udpfd, controlfd, daefd);
+       LOG(4, 0, 0, "Beginning of main loop.  clifd=%d, cluster_sockfd=%d, tunfd=%d, udpfd=%d, controlfd=%d, daefd=%d, nlfd=%d\n",
+               clifd, cluster_sockfd, tunfd, udpfd, controlfd, daefd, nlfd);
   
         /* setup our fds to poll for input */
         {
@@@ -3561,6 -3756,10 +3764,10 @@@
                 d[i].type = FD_TYPE_DAE;
                 e.data.ptr = &d[i++];
                 epoll_ctl(epollfd, EPOLL_CTL_ADD, daefd, &e);
+ 
+               d[i].type = FD_TYPE_NETLINK;
+               e.data.ptr = &d[i++];
+               epoll_ctl(epollfd, EPOLL_CTL_ADD, nlfd, &e);
         }
   
   #ifdef BGP
@@@ -3574,8 -3773,7 +3781,8 @@@
                 if (config->neighbour[i].name[0])
                         bgp_start(&bgp_peers[i], config->neighbour[i].name,
                                 config->neighbour[i].as, config->neighbour[i].keepalive,
- -                              config->neighbour[i].hold, 0); /* 0 = routing disabled */
+ +                              config->neighbour[i].hold, config->neighbour[i].update_source,
+ +                              0); /* 0 = routing disabled */
         }
   #endif /* BGP */
   
@@@ -3698,6 -3896,32 +3905,32 @@@
                                         break;
   #endif /* BGP */
   
+                               case FD_TYPE_NETLINK:
+                               {
+                                       struct nlmsghdr *nh = (struct nlmsghdr *)buf;
+                                       s = netlink_recv(buf, sizeof(buf));
+                                       if (nh->nlmsg_type == NLMSG_ERROR)
+                                       {
+                                               struct nlmsgerr *errmsg = NLMSG_DATA(nh);
+                                               if (errmsg->error)
+                                               {
+                                                       if (errmsg->msg.nlmsg_seq < min_initok_nlseqnum)
+                                                       {
+                                                               LOG(0, 0, 0, "Got a fatal netlink error (while %s): %s\n", tun_nl_phase_msg[nh->nlmsg_seq], strerror(-errmsg->error));
+                                                               exit(1);
+                                                       }
+                                                       else
+ 
+                                                               LOG(0, 0, 0, "Got a netlink error: %s\n", strerror(-errmsg->error));
+                                               }
+                                               // else it's a ack
+                                       }
+                                       else
+                                               LOG(1, 0, 0, "Got a unknown netlink message: type %d seq %d flags %d\n", nh->nlmsg_type, nh->nlmsg_seq, nh->nlmsg_flags);
+                                       n--;
+                                       break;
+                               }
+ 
                                 default:
                                         LOG(0, 0, 0, "Unexpected fd type returned from epoll_wait: %d\n", d->type);
                                 }
@@@ -3769,11 -3993,6 +4002,11 @@@
                                 more++;
                         }
                 }
+ +#ifdef BGP
+ +              else
+ +                      /* no event received, but timers could still have expired */
+ +                      bgp_process_peers_timers();
+ +#endif /* BGP */
   
                 if (time_changed)
                 {
@@@ -4081,14 -4300,9 +4314,14 @@@ static void initdata(int optdebug, cha
   
         if (!*hostname)
         {
- -              // Grab my hostname unless it's been specified
- -              gethostname(hostname, sizeof(hostname));
- -              stripdomain(hostname);
+ +              if (!*config->hostname)
+ +              {
+ +                      // Grab my hostname unless it's been specified
+ +                      gethostname(hostname, sizeof(hostname));
+ +                      stripdomain(hostname);
+ +              }
+ +              else
+ +                      strcpy(hostname, config->hostname);
         }
   
         _statistics->start_time = _statistics->last_reset = time(NULL);
@@@ -4266,18 -4480,18 +4499,18 @@@ static void fix_address_pool(int sid
   //
   // Add a block of addresses to the IP pool to hand out.
   //
- static void add_to_ip_pool(in_addr_t addr, in_addr_t mask)
+ static void add_to_ip_pool(in_addr_t addr, int prefixlen)
   {
         int i;
-       if (mask == 0)
-               mask = 0xffffffff;      // Host route only.
+       if (prefixlen == 0)
+               prefixlen = 32;         // Host route only.
   
-       addr &= mask;
+       addr &= 0xffffffff << (32 - prefixlen);
   
         if (ip_pool_size >= MAXIPPOOL)  // Pool is full!
                 return ;
   
-       for (i = addr ;(i & mask) == addr; ++i)
+       for (i = addr ; i < addr+(1<<(32-prefixlen)); ++i)
         {
                 if ((i & 0xff) == 0 || (i&0xff) == 255)
                         continue;       // Skip 0 and broadcast addresses.
@@@ -4335,7 -4549,7 +4568,7 @@@ static void initippool(
                 {
                         // It's a range
                         int numbits = 0;
-                       in_addr_t start = 0, mask = 0;
+                       in_addr_t start = 0;
   
                         LOG(2, 0, 0, "Adding IP address range %s\n", buf);
                         *p++ = 0;
@@@ -4345,15 -4559,14 +4578,14 @@@
                                 continue;
                         }
                         start = ntohl(inet_addr(pool));
-                       mask = (in_addr_t) (pow(2, numbits) - 1) << (32 - numbits);
   
                         // Add a static route for this pool
-                       LOG(5, 0, 0, "Adding route for address pool %s/%u\n",
-                               fmtaddr(htonl(start), 0), 32 + mask);
+                       LOG(5, 0, 0, "Adding route for address pool %s/%d\n",
+                               fmtaddr(htonl(start), 0), numbits);
   
-                       routeset(0, start, mask, 0, 1);
+                       routeset(0, start, numbits, 0, 1);
   
-                       add_to_ip_pool(start, mask);
+                       add_to_ip_pool(start, numbits);
                 }
                 else
                 {
@@@ -4549,6 -4762,8 +4781,8 @@@ int main(int argc, char *argv[]
                 }
         }
   
+       initnetlink();
+ 
         /* Set up the cluster communications port. */
         if (cluster_init() < 0)
                 exit(1);
@@@ -5000,11 -5215,11 +5234,11 @@@ int sessionsetup(sessionidt s, tunnelid
                 // Add the route for this session.
                 for (r = 0; r < MAXROUTE && session[s].route[r].ip; r++)
                 {
-                       if ((session[s].ip & session[s].route[r].mask) ==
-                           (session[s].route[r].ip & session[s].route[r].mask))
+                       if ((session[s].ip >> (32-session[s].route[r].prefixlen)) ==
+                           (session[s].route[r].ip >> (32-session[s].route[r].prefixlen)))
                                 routed++;
   
-                       routeset(s, session[s].route[r].ip, session[s].route[r].mask, 0, 1);
+                       routeset(s, session[s].route[r].ip, session[s].route[r].prefixlen, 0, 1);
                 }
   
                 // Static IPs need to be routed if not already
@@@ -5075,7 -5290,7 +5309,7 @@@ int load_session(sessionidt s, session
   
         for (i = 0; !newip && i < MAXROUTE && (session[s].route[i].ip || new->route[i].ip); i++)
                 if (new->route[i].ip != session[s].route[i].ip ||
-                   new->route[i].mask != session[s].route[i].mask)
+                   new->route[i].prefixlen != session[s].route[i].prefixlen)
                         newip++;
   
         // needs update
@@@ -5086,11 -5301,11 +5320,11 @@@
                 // remove old routes...
                 for (i = 0; i < MAXROUTE && session[s].route[i].ip; i++)
                 {
-                       if ((session[s].ip & session[s].route[i].mask) ==
-                           (session[s].route[i].ip & session[s].route[i].mask))
+                       if ((session[s].ip >> (32-session[s].route[i].prefixlen)) ==
+                           (session[s].route[i].ip >> (32-session[s].route[i].prefixlen)))
                                 routed++;
   
-                       routeset(s, session[s].route[i].ip, session[s].route[i].mask, 0, 0);
+                       routeset(s, session[s].route[i].ip, session[s].route[i].prefixlen, 0, 0);
                 }
   
                 // ...ip
@@@ -5109,11 -5324,11 +5343,11 @@@
                 // add new routes...
                 for (i = 0; i < MAXROUTE && new->route[i].ip; i++)
                 {
-                       if ((new->ip & new->route[i].mask) ==
-                           (new->route[i].ip & new->route[i].mask))
+                       if ((new->ip >> (32-new->route[i].prefixlen)) ==
+                           (new->route[i].ip >> (32-new->route[i].prefixlen)))
                                 routed++;
   
-                       routeset(s, new->route[i].ip, new->route[i].mask, 0, 1);
+                       routeset(s, new->route[i].ip, new->route[i].prefixlen, 0, 1);
                 }
   
                 // ...ip
diff --combined l2tpns.h

index 4f7fad9,bab2921..5cba32c
--- 1/l2tpns.h
--- 2/l2tpns.h
+++ b/l2tpns.h
@@@ -38,7 -38,6 +38,7 @@@
   #define PPPoE_MRU     1492            // maximum PPPoE MRU (rfc2516: 1500 less PPPoE header (6) and PPP protocol ID (2))
   #define MAXETHER      (MAXMTU+18)     // max packet we try sending to tun
   #define MAXTEL                96              // telephone number
+ +#define MAXHOSTNAME   256             // hostname
   #define MAXUSER               128             // username
   #define MAXPASS               128             // password
   #define MAXPLUGINS    20              // maximum number of plugins to load
@@@ -234,7 -233,7 +234,7 @@@ struct cli_tunnel_actions 
   typedef struct                        // route
   {
         in_addr_t ip;
-       in_addr_t mask;
+       int prefixlen;
   }
   routet;
   
@@@ -617,6 -616,10 +617,10 @@@ struct Tstat
   #define SET_STAT(x, y)
   #endif
   
+ #ifndef IFNAMSIZ
+ # define IFNAMSIZ 16
+ #endif
+ 
   typedef struct
   {
         int             debug;                          // debugging level
@@@ -632,7 -635,7 +636,7 @@@
         int             reload_config;                  // flag to re-read config (set by cli)
         int             multi_read_count;               // amount of packets to read per fd in processing loop
   
-       char            tundevice[10];                  // tun device name
+       char            tundevice[IFNAMSIZ];            // tun device name
         char            log_filename[128];
   
         char            l2tp_secret[64];                // L2TP shared secret
@@@ -713,8 -716,6 +717,8 @@@
   
         int             cluster_master_min_adv;         // Master advertises routes while the number of up to date
                                                         // slaves is less than this value.
+ +      in_addr_t       cli_bind_address;               // bind address for CLI
+ +      char            hostname[MAXHOSTNAME];  // hostname (overridden by -h on command line)
         // Guest change
         char            guest_user[MAXUSER];            // Guest account username
   
@@@ -726,10 -727,7 +730,10 @@@
                 uint16_t as;
                 int keepalive;
                 int hold;
+ +              struct in_addr update_source;
         } neighbour[BGP_NUM_PEERS];
+ +      in_addr_t       nexthop_address;
+ +      struct in6_addr nexthop6_address;
   #endif
   } configt;
   
@@@ -941,6 -939,7 +945,7 @@@ struct event_data 
                 FD_TYPE_DAE,
                 FD_TYPE_RADIUS,
                 FD_TYPE_BGP,
+               FD_TYPE_NETLINK,
         } type;
         int index; // for RADIUS, BGP
   };
author	Benjamin Cama <benoar@dolka.fr>
	Wed, 17 Aug 2011 15:23:07 +0000 (17:23 +0200)
committer	Benjamin Cama <benoar@dolka.fr>
	Wed, 17 Aug 2011 15:23:07 +0000 (17:23 +0200)
		1	2
bgp.c	patch \|	diff1 \|	diff2 \|	blob \| history
bgp.h	patch \|	diff1 \|	diff2 \|	blob \| history
l2tpns.c	patch \|	diff1 \|	diff2 \|	blob \| history
l2tpns.h	patch \|	diff1 \|	diff2 \|	blob \| history