Include endpoint address in accounting dump files.
authorBrendan O'Dea <bod@optus.net>
Sat, 4 Jun 2005 15:42:35 +0000 (15:42 +0000)
committerBrendan O'Dea <bod@optus.net>
Sat, 4 Jun 2005 15:42:35 +0000 (15:42 +0000)
Convert mainloop to use epoll rather than select.

12 files changed:
Changes
Makefile
bgp.c
bgp.h
cli.c
cluster.c
fake_epoll.h [new file with mode: 0644]
l2tpns.c
l2tpns.h
l2tpns.spec
radius.c
util.c

diff --git a/Changes b/Changes
index a7e7740..22b2d2a 100644 (file)
--- a/Changes
+++ b/Changes
@@ -1,9 +1,7 @@
-* Thu Jun 2 2005 Brendan O'Dea <bod@optusnet.com.au> 2.1.0
+* Sun Jun 5 2005 Brendan O'Dea <bod@optusnet.com.au> 2.1.0
 - Add IPv6 support from Jonathan McDowell.
 - Add CHAP support from Jordan Hrycaj.
 - Add interim accounting support from Vladislav Bjelic.
-- Add Acct-Output-Gigawords, Acct-Input-Gigawords attributes to RADIUS
-  accounting packets.
 - Negotiate MRU, default 1458 to avoid fragmentation.
 - Sanity check that cluster_send_session is not called from a child
   process.
 - New config option: allow_duplicate_users which determines whether
   or not to kill older sessions with the same username.
 - Fix byte counters in accounting records.
+- Add Acct-Output-Gigawords, Acct-Input-Gigawords attributes to RADIUS
+  accounting packets.
+- Fix icmp host unreachable to use router address.
+- Include endpoint address in accounting dump files.
+- Convert mainloop to use epoll rather than select.
+- Add note about fragmentation in Docs/manual.html, and a sample
+  iptables rule for MSS clamping.
 - Merge 2.0.22:
   + Show session open time in "show session"/"show user" detailed output.
   + Have slaves with BGP configured drop BGP on receipt of a shutdown
index 8ac1656..9cd6ff5 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -37,6 +37,7 @@ TESTS = generateload bounce
 DEFINES += -DSTATISTICS
 DEFINES += -DSTAT_CALLS
 DEFINES += -DRINGBUFFER
+DEFINES += -DHAVE_EPOLL
 
 DEFINES += -DBGP
 OBJS += bgp.o
diff --git a/bgp.c b/bgp.c
index e5af65c..96e2868 100644 (file)
--- a/bgp.c
+++ b/bgp.c
@@ -10,7 +10,7 @@
  *   nor RFC2385 (which requires a kernel patch on 2.4 kernels).
  */
 
-char const *cvs_id_bgp = "$Id: bgp.c,v 1.9 2004-12-16 08:49:52 bodea Exp $";
+char const *cvs_id_bgp = "$Id: bgp.c,v 1.10 2005-06-04 15:42:35 bodea Exp $";
 
 #include <stdlib.h>
 #include <unistd.h>
@@ -75,6 +75,10 @@ int bgp_setup(int as)
 
            return 0;
        }
+
+       peer->edata.type = FD_TYPE_BGP;
+       peer->edata.index = i;
+       peer->events = 0;
     }
 
     if (as < 1)
@@ -270,6 +274,7 @@ static void bgp_clear(struct bgp_peer *peer)
     peer->inbuf->done = 0;
 
     peer->cli_flag = 0;
+    peer->events = 0;
 
     if (peer->state != peer->next_state)
     {
@@ -487,130 +492,160 @@ void bgp_enable_routing(int enable)
     LOG(4, 0, 0, "%s BGP routing\n", enable ? "Enabled" : "Suspended");
 }
 
-/* return a bitmask indicating if the socket should be added to the
-   read set (1) and or write set (2) for select */
-int bgp_select_state(struct bgp_peer *peer)
+#ifdef HAVE_EPOLL
+# include <sys/epoll.h>
+#else
+# include "fake_epoll.h"
+#endif
+
+/* return a bitmask of the events required to poll this peer's fd */
+int bgp_set_poll()
 {
-    int flags = 0;
+    int i;
 
     if (!bgp_configured)
        return 0;
 
-    if (peer->state == Disabled || peer->state == Idle)
-       return 0;
+    for (i = 0; i < BGP_NUM_PEERS; i++)
+    {
+       struct bgp_peer *peer = &bgp_peers[i];
+       int events = 0;
+
+       if (peer->state == Disabled || peer->state == Idle)
+           continue;
+
+       if (peer->inbuf->done < BGP_MAX_PACKET_SIZE)
+           events |= EPOLLIN;
+
+       if (peer->state == Connect ||           /* connection in progress */
+           peer->update_routes ||              /* routing updates */
+           peer->outbuf->packet.header.len)    /* pending output */
+           events |= EPOLLOUT;
 
-    if (peer->inbuf->done < BGP_MAX_PACKET_SIZE)
-       flags |= 1;
+       if (peer->events != events)
+       {
+           struct epoll_event ev;
 
-    if (peer->state == Connect ||              /* connection in progress */
-       peer->update_routes ||                  /* routing updates */
-       peer->outbuf->packet.header.len)        /* pending output */
-       flags |= 2;
+           ev.events = peer->events = events;
+           ev.data.ptr = &peer->edata;
+           epoll_ctl(epollfd, EPOLL_CTL_MOD, peer->sock, &ev);
+       }
+    }
 
-    return flags;
+    return 1;
 }
 
-/* process bgp peer */
-int bgp_process(struct bgp_peer *peer, int readable, int writable)
+/* process bgp events/timers */
+int bgp_process(uint32_t events[])
 {
+    int i;
+
     if (!bgp_configured)
        return 0;
 
-    if (*peer->name && peer->cli_flag == BGP_CLI_RESTART)
-       return bgp_restart(peer);
+    for (i = 0; i < BGP_NUM_PEERS; i++)
+    {
+       struct bgp_peer *peer = &bgp_peers[i];
 
-    if (peer->state == Disabled)
-       return 1;
+       if (*peer->name && peer->cli_flag == BGP_CLI_RESTART)
+       {
+           bgp_restart(peer);
+           continue;
+       }
 
-    if (peer->cli_flag)
-    {
-       switch (peer->cli_flag)
+       if (peer->state == Disabled)
+           continue;
+
+       if (peer->cli_flag)
        {
-       case BGP_CLI_SUSPEND:
-           if (peer->routing)
+           switch (peer->cli_flag)
            {
-               peer->routing = 0;
-               if (peer->state == Established)
-                   peer->update_routes = 1;
-           }
+           case BGP_CLI_SUSPEND:
+               if (peer->routing)
+               {
+                   peer->routing = 0;
+                   if (peer->state == Established)
+                       peer->update_routes = 1;
+               }
 
-           break;
+               break;
 
-       case BGP_CLI_ENABLE:
-           if (!peer->routing)
-           {
-               peer->routing = 1;
-               if (peer->state == Established)
-                   peer->update_routes = 1;
+           case BGP_CLI_ENABLE:
+               if (!peer->routing)
+               {
+                   peer->routing = 1;
+                   if (peer->state == Established)
+                       peer->update_routes = 1;
+               }
+
+               break;
            }
 
-           break;
+           peer->cli_flag = 0;
        }
 
-       peer->cli_flag = 0;
-    }
-
-    /* handle empty/fill of buffers */
-    if (writable)
-    {
-       int r = 1;
-       if (peer->state == Connect)
-           r = bgp_handle_connect(peer);
-       else if (peer->outbuf->packet.header.len)
-           r = bgp_write(peer);
+       /* handle empty/fill of buffers */
+       if (events[i] & EPOLLOUT)
+       {
+           int r = 1;
+           if (peer->state == Connect)
+               r = bgp_handle_connect(peer);
+           else if (peer->outbuf->packet.header.len)
+               r = bgp_write(peer);
 
-       if (!r)
-           return 0;
-    }
+           if (!r)
+               continue;
+       }
 
-    if (readable)
-    {
-       if (!bgp_read(peer))
-           return 0;
-    }
+       if (events[i] & (EPOLLIN|EPOLLHUP))
+       {
+           if (!bgp_read(peer))
+               continue;
+       }
 
-    /* process input buffer contents */
-    while (peer->inbuf->done >= sizeof(peer->inbuf->packet.header)
-       && !peer->outbuf->packet.header.len) /* may need to queue a response */
-    {
-       if (bgp_handle_input(peer) < 0)
-           return 0;
-    }
+       /* process input buffer contents */
+       while (peer->inbuf->done >= sizeof(peer->inbuf->packet.header)
+           && !peer->outbuf->packet.header.len) /* may need to queue a response */
+       {
+           if (bgp_handle_input(peer) < 0)
+               continue;
+       }
 
-    /* process pending updates */
-    if (peer->update_routes
-       && !peer->outbuf->packet.header.len) /* ditto */
-    {
-       if (!bgp_send_update(peer))
-           return 0;
-    }
+       /* process pending updates */
+       if (peer->update_routes
+           && !peer->outbuf->packet.header.len) /* ditto */
+       {
+           if (!bgp_send_update(peer))
+               continue;
+       }
 
-    /* process timers */
-    if (peer->state == Established)
-    {
-       if (time_now > peer->expire_time)
+       /* process timers */
+       if (peer->state == Established)
        {
-           LOG(1, 0, 0, "No message from BGP peer %s in %ds\n",
-               peer->name, peer->hold);
+           if (time_now > peer->expire_time)
+           {
+               LOG(1, 0, 0, "No message from BGP peer %s in %ds\n",
+                   peer->name, peer->hold);
 
-           bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0);
-           return 0;
-       }
+               bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0);
+               continue;
+           }
 
-       if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len)
-           bgp_send_keepalive(peer);
-    }
-    else if (peer->state == Idle)
-    {
-       if (time_now > peer->retry_time)
-           return bgp_connect(peer);
-    }
-    else if (time_now > peer->state_time + BGP_STATE_TIME)
-    {
-       LOG(1, 0, 0, "%s timer expired for BGP peer %s\n",
-           bgp_state_str(peer->state), peer->name);
+           if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len)
+               bgp_send_keepalive(peer);
+       }
+       else if (peer->state == Idle)
+       {
+           if (time_now > peer->retry_time)
+               bgp_connect(peer);
+       }
+       else if (time_now > peer->state_time + BGP_STATE_TIME)
+       {
+           LOG(1, 0, 0, "%s timer expired for BGP peer %s\n",
+               bgp_state_str(peer->state), peer->name);
 
-       return bgp_restart(peer);
+           bgp_restart(peer);
+       }
     }
 
     return 1;
@@ -661,6 +696,7 @@ static int bgp_connect(struct bgp_peer *peer)
 {
     static int bgp_port = 0;
     struct sockaddr_in addr;
+    struct epoll_event ev;
 
     if (!bgp_port)
     {
@@ -683,6 +719,11 @@ static int bgp_connect(struct bgp_peer *peer)
        return 0;
     }
 
+    /* add to poll set */
+    ev.events = peer->events = EPOLLOUT;
+    ev.data.ptr = &peer->edata;
+    epoll_ctl(epollfd, EPOLL_CTL_ADD, peer->sock, &ev);
+
     /* set to non-blocking */
     fcntl(peer->sock, F_SETFL, fcntl(peer->sock, F_GETFL, 0) | O_NONBLOCK);
 
diff --git a/bgp.h b/bgp.h
index 4c14c7c..55b0ee3 100644 (file)
--- a/bgp.h
+++ b/bgp.h
@@ -1,5 +1,5 @@
 /* BGPv4 (RFC1771) */
-/* $Id: bgp.h,v 1.4 2004-12-16 08:49:52 bodea Exp $ */
+/* $Id: bgp.h,v 1.5 2005-06-04 15:42:35 bodea Exp $ */
 
 #ifndef __BGP_H__
 #define __BGP_H__
@@ -173,6 +173,8 @@ struct bgp_peer {
     int cli_flag;                      /* updates requested from CLI */
     char *path_attrs;                  /* path attrs to send in UPDATE message */
     int path_attr_len;                 /* length of path attrs */
+    uint32_t events;                   /* events to poll */
+    struct event_data edata;           /* poll data */
 };
 
 /* bgp_peer.cli_flag */
@@ -194,8 +196,8 @@ int bgp_restart(struct bgp_peer *peer);
 int bgp_add_route(in_addr_t ip, in_addr_t mask);
 int bgp_del_route(in_addr_t ip, in_addr_t mask);
 void bgp_enable_routing(int enable);
-int bgp_select_state(struct bgp_peer *peer);
-int bgp_process(struct bgp_peer *peer, int readable, int writable);
+int bgp_set_poll(void);
+int bgp_process(uint32_t events[]);
 char const *bgp_state_str(enum bgp_state state);
 
 extern char const *cvs_id_bgp;
diff --git a/cli.c b/cli.c
index bf0987f..e6eb2c5 100644 (file)
--- a/cli.c
+++ b/cli.c
@@ -2,7 +2,7 @@
 // vim: sw=8 ts=8
 
 char const *cvs_name = "$Name:  $";
-char const *cvs_id_cli = "$Id: cli.c,v 1.60 2005-06-02 11:32:30 bodea Exp $";
+char const *cvs_id_cli = "$Id: cli.c,v 1.61 2005-06-04 15:42:35 bodea Exp $";
 
 #include <stdio.h>
 #include <stdarg.h>
@@ -49,15 +49,6 @@ extern struct cli_tunnel_actions *cli_tunnel_actions;
 extern tbft *filter_list;
 extern ip_filtert *ip_filters;
 
-static char *debug_levels[] = {
-       "CRIT",
-       "ERROR",
-       "WARN",
-       "INFO",
-       "CALL",
-       "DATA",
-};
-
 struct
 {
        char critical;
@@ -68,15 +59,24 @@ struct
        char data;
 } debug_flags;
 
-static int debug_session;
-static int debug_tunnel;
+#ifdef RINGBUFFER
+
 static int debug_rb_tail;
+static char *debug_levels[] = {
+       "CRIT",
+       "ERROR",
+       "WARN",
+       "INFO",
+       "CALL",
+       "DATA",
+};
+
+#endif
 
 static int cmd_show_session(struct cli_def *cli, char *command, char **argv, int argc);
 static int cmd_show_tunnels(struct cli_def *cli, char *command, char **argv, int argc);
 static int cmd_show_users(struct cli_def *cli, char *command, char **argv, int argc);
 static int cmd_show_radius(struct cli_def *cli, char *command, char **argv, int argc);
-static int cmd_show_counters(struct cli_def *cli, char *command, char **argv, int argc);
 static int cmd_show_version(struct cli_def *cli, char *command, char **argv, int argc);
 static int cmd_show_pool(struct cli_def *cli, char *command, char **argv, int argc);
 static int cmd_show_run(struct cli_def *cli, char *command, char **argv, int argc);
@@ -84,7 +84,6 @@ static int cmd_show_banana(struct cli_def *cli, char *command, char **argv, int
 static int cmd_show_plugins(struct cli_def *cli, char *command, char **argv, int argc);
 static int cmd_show_throttle(struct cli_def *cli, char *command, char **argv, int argc);
 static int cmd_write_memory(struct cli_def *cli, char *command, char **argv, int argc);
-static int cmd_clear_counters(struct cli_def *cli, char *command, char **argv, int argc);
 static int cmd_drop_user(struct cli_def *cli, char *command, char **argv, int argc);
 static int cmd_drop_tunnel(struct cli_def *cli, char *command, char **argv, int argc);
 static int cmd_drop_session(struct cli_def *cli, char *command, char **argv, int argc);
@@ -102,6 +101,11 @@ static int cmd_uptime(struct cli_def *cli, char *command, char **argv, int argc)
 static int regular_stuff(struct cli_def *cli);
 static void parsemac(char *string, char mac[6]);
 
+#ifdef STATISTICS
+static int cmd_show_counters(struct cli_def *cli, char *command, char **argv, int argc);
+static int cmd_clear_counters(struct cli_def *cli, char *command, char **argv, int argc);
+#endif /* STATISTICS */
+
 #ifdef BGP
 #define MODE_CONFIG_BGP 8
 static int cmd_router_bgp(struct cli_def *cli, char *command, char **argv, int argc);
@@ -312,8 +316,6 @@ void cli_do(int sockfd)
                cli->users = 0;
        }
 
-       debug_session = 0;
-       debug_tunnel = 0;
 #ifdef RINGBUFFER
        debug_rb_tail = ringbuffer->tail;
 #endif
@@ -646,6 +648,7 @@ static int cmd_show_users(struct cli_def *cli, char *command, char **argv, int a
        return CLI_OK;
 }
 
+#ifdef STATISTICS
 static int cmd_show_counters(struct cli_def *cli, char *command, char **argv, int argc)
 {
        if (CLI_HELP_REQUESTED)
@@ -697,7 +700,7 @@ static int cmd_show_counters(struct cli_def *cli, char *command, char **argv, in
        cli_print(cli, "%-30s%u", "multi_read_exceeded",        GET_STAT(multi_read_exceeded));
 
 
-#ifdef STATISTICS
+#ifdef STAT_CALLS
        cli_print(cli, "\n%-30s%-10s", "Counter", "Value");
        cli_print(cli, "-----------------------------------------");
        cli_print(cli, "%-30s%u", "call_processtun",            GET_STAT(call_processtun));
@@ -731,7 +734,7 @@ static int cmd_show_counters(struct cli_def *cli, char *command, char **argv, in
        cli_print(cli, "%-30s%u", "call_radiussend",            GET_STAT(call_radiussend));
        cli_print(cli, "%-30s%u", "call_radiusretry",           GET_STAT(call_radiusretry));
        cli_print(cli, "%-30s%u", "call_random_data",           GET_STAT(call_random_data));
-#endif
+#endif /* STAT_CALLS */
 
        {
                time_t l = GET_STAT(last_reset);
@@ -746,6 +749,19 @@ static int cmd_show_counters(struct cli_def *cli, char *command, char **argv, in
        return CLI_OK;
 }
 
+static int cmd_clear_counters(struct cli_def *cli, char *command, char **argv, int argc)
+{
+       if (CLI_HELP_REQUESTED)
+               return CLI_HELP_NO_ARGS;
+
+       memset(_statistics, 0, sizeof(struct Tstats));
+       SET_STAT(last_reset, time(NULL));
+
+       cli_print(cli, "Counters cleared");
+       return CLI_OK;
+}
+#endif /* STATISTICS */
+
 static int cmd_show_version(struct cli_def *cli, char *command, char **argv, int argc)
 {
        int tag = 0;
@@ -1134,18 +1150,6 @@ static int cmd_show_banana(struct cli_def *cli, char *command, char **argv, int
        return CLI_OK;
 }
 
-static int cmd_clear_counters(struct cli_def *cli, char *command, char **argv, int argc)
-{
-       if (CLI_HELP_REQUESTED)
-               return CLI_HELP_NO_ARGS;
-
-       memset(_statistics, 0, sizeof(struct Tstats));
-       SET_STAT(last_reset, time(NULL));
-
-       cli_print(cli, "Counters cleared");
-       return CLI_OK;
-}
-
 static int cmd_drop_user(struct cli_def *cli, char *command, char **argv, int argc)
 {
        int i;
@@ -1896,10 +1900,10 @@ static int cmd_set(struct cli_def *cli, char *command, char **argv, int argc)
 
 int regular_stuff(struct cli_def *cli)
 {
+#ifdef RINGBUFFER
        int out = 0;
        int i;
 
-#ifdef RINGBUFFER
        for (i = debug_rb_tail; i != ringbuffer->tail; i = (i + 1) % RINGBUFFER_SIZE)
        {
                char *m = ringbuffer->buffer[i].message;
index 8444b78..83d9501 100644 (file)
--- a/cluster.c
+++ b/cluster.c
@@ -1,6 +1,6 @@
 // L2TPNS Clustering Stuff
 
-char const *cvs_id_cluster = "$Id: cluster.c,v 1.40 2005-06-02 11:32:30 bodea Exp $";
+char const *cvs_id_cluster = "$Id: cluster.c,v 1.41 2005-06-04 15:42:35 bodea Exp $";
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -38,7 +38,7 @@ char const *cvs_id_cluster = "$Id: cluster.c,v 1.40 2005-06-02 11:32:30 bodea Ex
  */
 
 // Module variables.
-int cluster_sockfd = 0;                        // The filedescriptor for the cluster communications port.
+extern int cluster_sockfd;             // The filedescriptor for the cluster communications port.
 
 in_addr_t my_address = 0;              // The network address of my ethernet port.
 static int walk_session_number = 0;    // The next session to send when doing the slow table walk.
diff --git a/fake_epoll.h b/fake_epoll.h
new file mode 100644 (file)
index 0000000..b44f846
--- /dev/null
@@ -0,0 +1,179 @@
+/* kludge up some limited epoll semantics using select for 2.4 kernels */
+/* $Id: fake_epoll.h,v 1.1 2005-06-04 15:42:35 bodea Exp $ */
+
+#ifndef __FAKE_EPOLL_H__
+#define __FAKE_EPOLL_H__
+
+#define EPOLLIN                0x01
+#define EPOLLOUT       0x04
+#define EPOLLERR       0x08
+#define EPOLLHUP       0x10
+
+#define EPOLL_CTL_ADD  1
+#define EPOLL_CTL_DEL  2
+#define EPOLL_CTL_MOD  3
+
+struct epoll_event {
+    uint32_t events;
+    union epoll_data {
+       void *ptr;
+       int fd;
+       uint32_t u32;
+       uint64_t u64;
+    } data;
+};
+
+int epoll_ctl(int epfd, int op, int fd, struct epoll_event *event);
+
+#ifdef FAKE_EPOLL_IMPLEMENTATION
+
+#include <sys/select.h>
+
+static fd_set _epoll_read_set;
+static fd_set _epoll_write_set;
+static int _epoll_fds;
+static struct epoll_event *_epoll_data[128];
+
+static int epoll_create(int size __attribute__ ((unused)))
+{
+    static int once = 0;
+    if (once++)
+    {
+       errno = ENFILE; /* only support one instance */
+       return -1;
+    }
+
+    FD_ZERO(&_epoll_read_set);
+    FD_ZERO(&_epoll_write_set);
+    _epoll_fds = 0;
+
+    memset(_epoll_data, 0, sizeof(_epoll_data));
+
+    return 1; /* "descriptor" */
+}
+
+int epoll_ctl(int epfd __attribute__ ((unused)), int op, int fd,
+    struct epoll_event *event)
+{
+    if (fd > (sizeof(_epoll_data)/sizeof(*_epoll_data)) - 1)
+    {
+       errno = EINVAL;
+       return -1;
+    }
+
+    switch (op)
+    {
+    case EPOLL_CTL_ADD:
+       if (event->events & EPOLLIN)
+           FD_SET(fd, &_epoll_read_set);
+
+       if (event->events & EPOLLOUT)
+           FD_SET(fd, &_epoll_write_set);
+
+       if (fd >= _epoll_fds)
+           _epoll_fds = fd + 1;
+
+       if (_epoll_data[fd])
+           free(_epoll_data[fd]);
+
+       if (!(_epoll_data[fd] = malloc(sizeof(*_epoll_data))))
+       {
+           errno = ENOMEM;
+           return -1;
+       }
+
+       memcpy(_epoll_data[fd], &event->data, sizeof(*_epoll_data));
+       break;
+
+    case EPOLL_CTL_MOD:
+       if (event->events & EPOLLIN)
+           FD_SET(fd, &_epoll_read_set);
+       else
+           FD_CLR(fd, &_epoll_read_set);
+
+       if (event->events & EPOLLOUT)
+           FD_SET(fd, &_epoll_write_set);
+       else
+           FD_CLR(fd, &_epoll_write_set);
+
+       memcpy(_epoll_data[fd], &event->data, sizeof(*_epoll_data));
+       break;
+
+    case EPOLL_CTL_DEL:
+       FD_CLR(fd, &_epoll_read_set);
+       FD_CLR(fd, &_epoll_write_set);
+
+       free(_epoll_data[fd]);
+       _epoll_data[fd] = 0;
+
+       if (fd == _epoll_fds - 1)
+       {
+           _epoll_fds = 0;
+           while (fd-- > 0)
+           {
+               if (FD_ISSET(fd, &_epoll_read_set) ||
+                   FD_ISSET(fd, &_epoll_write_set))
+               {
+                   _epoll_fds = fd + 1;
+                   break;
+               }
+           }
+       }
+
+       break;
+    }
+
+    return 0;
+}
+
+static int epoll_wait(int epfd __attribute__ ((unused)),
+    struct epoll_event *events, int maxevents, int timout)
+{
+    fd_set r;
+    fd_set w;
+    struct timeval t;
+    struct timeval *tp;
+    int n;
+    int e;
+    int i;
+
+    memcpy(&r, &_epoll_read_set, sizeof(r));
+    memcpy(&w, &_epoll_write_set, sizeof(w));
+
+    if (timout >= 0)
+    {
+       t.tv_sec = 0;
+       t.tv_usec = timout * 1000;
+       tp = &t;
+    }
+    else
+       tp = 0;
+
+    n = select(_epoll_fds, &r, &w, 0, tp);
+    if (n > maxevents)
+       n = maxevents;
+
+    for (i = e = 0; n > 0 && i < _epoll_fds; i++)
+    {
+       if (!_epoll_data[i])
+           continue;
+
+       events[e].events = 0;
+       if (FD_ISSET(i, &r))
+           events[e].events |= EPOLLIN;
+
+       if (FD_ISSET(i, &w))
+           events[e].events |= EPOLLOUT;
+
+       if (events[e].events)
+       {
+           memcpy(&events[e++].data, _epoll_data[i], sizeof(events[0].data));
+           n--;
+       }
+    }
+
+    return e;
+}
+
+#endif /* FAKE_EPOLL_IMPLEMENTATION */
+#endif /* __FAKE_EPOLL_H__ */
index 9942cf9..b0b51a2 100644 (file)
--- a/l2tpns.c
+++ b/l2tpns.c
@@ -4,7 +4,7 @@
 // Copyright (c) 2002 FireBrick (Andrews & Arnold Ltd / Watchfront Ltd) - GPL licenced
 // vim: sw=8 ts=8
 
-char const *cvs_id_l2tpns = "$Id: l2tpns.c,v 1.107 2005-06-02 11:32:30 bodea Exp $";
+char const *cvs_id_l2tpns = "$Id: l2tpns.c,v 1.108 2005-06-04 15:42:35 bodea Exp $";
 
 #include <arpa/inet.h>
 #include <assert.h>
@@ -52,7 +52,7 @@ char const *cvs_id_l2tpns = "$Id: l2tpns.c,v 1.107 2005-06-02 11:32:30 bodea Exp
 
 #ifdef BGP
 #include "bgp.h"
-#endif /* BGP */
+#endif
 
 // Globals
 configt *config = NULL;                // all configuration
@@ -64,13 +64,14 @@ int snoopfd = -1;           // UDP file handle for sending out intercept data
 int *radfds = NULL;            // RADIUS requests file handles
 int ifrfd = -1;                        // File descriptor for routing, etc
 int ifr6fd = -1;               // File descriptor for IPv6 routing, etc
-static int rand_fd = -1;       // Random data source
+int rand_fd = -1;              // Random data source
+int cluster_sockfd = -1;       // Intra-cluster communications socket.
+int epollfd = -1;              // event polling
 time_t basetime = 0;           // base clock
 char hostname[1000] = "";      // us.
 static int tunidx;             // ifr_ifindex of tun device
 static int syslog_log = 0;     // are we logging to syslog
 static FILE *log_stream = 0;   // file handle for direct logging (i.e. direct into file, not via syslog).
-extern int cluster_sockfd;     // Intra-cluster communications socket.
 uint32_t last_id = 0;          // Unique ID for radius accounting
 
 struct cli_session_actions *cli_session_actions = NULL;        // Pending session changes requested by CLI
@@ -1012,7 +1013,8 @@ static void processipout(uint8_t * buf, int len)
                if (rate++ < config->icmp_rate) // Only send a max of icmp_rate per second.
                {
                        LOG(4, 0, 0, "IP: Sending ICMP host unreachable to %s\n", fmtaddr(*(in_addr_t *)(buf + 12), 0));
-                       host_unreachable(*(in_addr_t *)(buf + 12), *(uint16_t *)(buf + 4), ip, buf, (len < 64) ? 64 : len);
+                       host_unreachable(*(in_addr_t *)(buf + 12), *(uint16_t *)(buf + 4),
+                               config->bind_address ? config->bind_address : my_address, buf, len);
                }
                return;
        }
@@ -2850,13 +2852,13 @@ static void regular_cleanups(double period)
 static int still_busy(void)
 {
        int i;
-       static time_t stopped_bgp = 0;
        static clockt last_talked = 0;
        static clockt start_busy_wait = 0;
 
        if (!config->cluster_iam_master)
        {
 #ifdef BGP
+               static time_t stopped_bgp = 0;
                if (bgp_configured)
                {
                        if (!stopped_bgp)
@@ -2924,41 +2926,89 @@ static int still_busy(void)
        return 0;
 }
 
-static fd_set readset;
-static int readset_n = 0;
+#ifdef HAVE_EPOLL
+# include <sys/epoll.h>
+#else
+# define FAKE_EPOLL_IMPLEMENTATION /* include the functions */
+# include "fake_epoll.h"
+#endif
+
+// the base set of fds polled: control, cli, udp, tun, cluster
+#define BASE_FDS       5
+
+// additional polled fds
+#ifdef BGP
+# define EXTRA_FDS     BGP_NUM_PEERS
+#else
+# define EXTRA_FDS     0
+#endif
 
 // main loop - gets packets on tun or udp and processes them
 static void mainloop(void)
 {
        int i;
        uint8_t buf[65536];
-       struct timeval to;
        clockt next_cluster_ping = 0;   // send initial ping immediately
+       struct epoll_event events[BASE_FDS + RADIUS_FDS + EXTRA_FDS];
+       int maxevent = sizeof(events)/sizeof(*events);
+
+       if ((epollfd = epoll_create(maxevent)) < 0)
+       {
+               LOG(0, 0, 0, "epoll_create failed: %s\n", strerror(errno));
+               exit(1);
+       }
 
        LOG(4, 0, 0, "Beginning of main loop.  udpfd=%d, tunfd=%d, cluster_sockfd=%d, controlfd=%d\n",
                udpfd, tunfd, cluster_sockfd, controlfd);
 
-       FD_ZERO(&readset);
-       FD_SET(udpfd, &readset);
-       FD_SET(tunfd, &readset);
-       FD_SET(controlfd, &readset);
-       FD_SET(clifd, &readset);
-       if (cluster_sockfd) FD_SET(cluster_sockfd, &readset);
-       readset_n = udpfd;
-       if (tunfd > readset_n)          readset_n = tunfd;
-       if (controlfd > readset_n)      readset_n = controlfd;
-       if (clifd > readset_n)          readset_n = clifd;
-       if (cluster_sockfd > readset_n) readset_n = cluster_sockfd;
-
-       while (!main_quit || still_busy())
+       /* setup our fds to poll for input */
        {
-               fd_set r;
-               int n = readset_n;
+               static struct event_data d[BASE_FDS];
+               struct epoll_event e;
+
+               e.events = EPOLLIN;
+               i = 0;
+
+               d[i].type = FD_TYPE_CONTROL;
+               e.data.ptr = &d[i++];
+               epoll_ctl(epollfd, EPOLL_CTL_ADD, controlfd, &e);
+
+               d[i].type = FD_TYPE_CLI;
+               e.data.ptr = &d[i++];
+               epoll_ctl(epollfd, EPOLL_CTL_ADD, clifd, &e);
+
+               d[i].type = FD_TYPE_UDP;
+               e.data.ptr = &d[i++];
+               epoll_ctl(epollfd, EPOLL_CTL_ADD, udpfd, &e);
+
+               d[i].type = FD_TYPE_TUN;
+               e.data.ptr = &d[i++];
+               epoll_ctl(epollfd, EPOLL_CTL_ADD, tunfd, &e);
+
+               d[i].type = FD_TYPE_CLUSTER;
+               e.data.ptr = &d[i++];
+               epoll_ctl(epollfd, EPOLL_CTL_ADD, cluster_sockfd, &e);
+       }
+
 #ifdef BGP
-               fd_set w;
-               int bgp_set[BGP_NUM_PEERS];
+       signal(SIGPIPE, SIG_IGN);
+       bgp_setup(config->as_number);
+       if (config->bind_address)
+               bgp_add_route(config->bind_address, 0xffffffff);
+
+       for (i = 0; i < BGP_NUM_PEERS; i++)
+       {
+               if (config->neighbour[i].name[0])
+                       bgp_start(&bgp_peers[i], config->neighbour[i].name,
+                               config->neighbour[i].as, config->neighbour[i].keepalive,
+                               config->neighbour[i].hold, 0); /* 0 = routing disabled */
+       }
 #endif /* BGP */
+
+       while (!main_quit || still_busy())
+       {
                int more = 0;
+               int n;
 
                if (config->reload_config)
                {
@@ -2966,35 +3016,11 @@ static void mainloop(void)
                        update_config();
                }
 
-               memcpy(&r, &readset, sizeof(fd_set));
-               to.tv_sec = 0;
-               to.tv_usec = 100000; // 1/10th of a second.
-
 #ifdef BGP
-               FD_ZERO(&w);
-               for (i = 0; i < BGP_NUM_PEERS; i++)
-               {
-                       bgp_set[i] = bgp_select_state(&bgp_peers[i]);
-                       if (bgp_set[i] & 1)
-                       {
-                               FD_SET(bgp_peers[i].sock, &r);
-                               if (bgp_peers[i].sock > n)
-                                       n = bgp_peers[i].sock;
-                       }
-
-                       if (bgp_set[i] & 2)
-                       {
-                               FD_SET(bgp_peers[i].sock, &w);
-                               if (bgp_peers[i].sock > n)
-                                       n = bgp_peers[i].sock;
-                       }
-               }
-
-               n = select(n + 1, &r, &w, 0, &to);
-#else /* BGP */
-               n = select(n + 1, &r, 0, 0, &to);
+               bgp_set_poll();
 #endif /* BGP */
 
+               n = epoll_wait(epollfd, events, maxevent, 100); // timeout 100ms (1/10th sec)
                STAT(select_called);
 
                TIME = now(NULL);
@@ -3008,67 +3034,83 @@ static void mainloop(void)
                        main_quit++;
                        break;
                }
-               else if (n)
+
+               if (n)
                {
                        struct sockaddr_in addr;
                        int alen, c, s;
+                       int udp_ready = 0;
+                       int tun_ready = 0;
+                       int cluster_ready = 0;
                        int udp_pkts = 0;
                        int tun_pkts = 0;
                        int cluster_pkts = 0;
+#ifdef BGP
+                       uint32_t bgp_events[BGP_NUM_PEERS];
+                       memset(bgp_events, 0, sizeof(bgp_events));
+#endif /* BGP */
 
-                       // nsctl commands
-                       if (FD_ISSET(controlfd, &r))
+                       for (c = n, i = 0; i < c; i++)
                        {
-                               alen = sizeof(addr);
-                               processcontrol(buf, recvfrom(controlfd, buf, sizeof(buf), MSG_WAITALL, (void *) &addr, &alen), &addr, alen);
-                               n--;
-                       }
+                               struct event_data *d = events[i].data.ptr;
+                               switch (d->type)
+                               {
+                               case FD_TYPE_CONTROL: // nsctl commands
+                                       alen = sizeof(addr);
+                                       processcontrol(buf, recvfrom(controlfd, buf, sizeof(buf), MSG_WAITALL, (void *) &addr, &alen), &addr, alen);
+                                       n--;
+                                       break;
 
-                       // RADIUS responses
-                       if (config->cluster_iam_master)
-                       {
-                               for (i = 0; i < config->num_radfds; i++)
+                               case FD_TYPE_CLI: // CLI connections
                                {
-                                       if (FD_ISSET(radfds[i], &r))
+                                       int cli;
+                                       
+                                       alen = sizeof(addr);
+                                       if ((cli = accept(clifd, (struct sockaddr *)&addr, &alen)) >= 0)
                                        {
-                                               processrad(buf, recv(radfds[i], buf, sizeof(buf), 0), i);
-                                               n--;
+                                               cli_do(cli);
+                                               close(cli);
                                        }
-                               }
-                       }
+                                       else
+                                               LOG(0, 0, 0, "accept error: %s\n", strerror(errno));
 
-                       // CLI connections
-                       if (FD_ISSET(clifd, &r))
-                       {
-                               int cli;
-                               
-                               alen = sizeof(addr);
-                               if ((cli = accept(clifd, (struct sockaddr *)&addr, &alen)) >= 0)
-                               {
-                                       cli_do(cli);
-                                       close(cli);
+                                       n--;
+                                       break;
                                }
-                               else
-                                       LOG(0, 0, 0, "accept error: %s\n", strerror(errno));
 
-                               n--;
-                       }
+                               // these are handled below, with multiple interleaved reads
+                               case FD_TYPE_UDP:       udp_ready++; break;
+                               case FD_TYPE_TUN:       tun_ready++; break;
+                               case FD_TYPE_CLUSTER:   cluster_ready++; break;
+
+                               case FD_TYPE_RADIUS: // RADIUS response
+                                       s = recv(radfds[d->index], buf, sizeof(buf), 0);
+                                       if (s >= 0 && config->cluster_iam_master)
+                                               processrad(buf, s, d->index);
+
+                                       n--;
+                                       break;
 
 #ifdef BGP
-                       for (i = 0; i < BGP_NUM_PEERS; i++)
-                       {
-                               int isr = bgp_set[i] ? FD_ISSET(bgp_peers[i].sock, &r) : 0;
-                               int isw = bgp_set[i] ? FD_ISSET(bgp_peers[i].sock, &w) : 0;
-                               bgp_process(&bgp_peers[i], isr, isw);
-                               if (isr) n--;
-                               if (isw) n--;
+                               case FD_TYPE_BGP:
+                                       bgp_events[d->index] = events[i].events;
+                                       n--;
+                                       break;
+#endif /* BGP */
+
+                               default:
+                                       LOG(0, 0, 0, "Unexpected fd type returned from epoll_wait: %d\n", d->type);
+                               }
                        }
+
+#ifdef BGP
+                       bgp_process(bgp_events);
 #endif /* BGP */
 
                        for (c = 0; n && c < config->multi_read_count; c++)
                        {
                                // L2TP
-                               if (FD_ISSET(udpfd, &r))
+                               if (udp_ready)
                                {
                                        alen = sizeof(addr);
                                        if ((s = recvfrom(udpfd, buf, sizeof(buf), 0, (void *) &addr, &alen)) > 0)
@@ -3078,13 +3120,13 @@ static void mainloop(void)
                                        }
                                        else
                                        {
-                                               FD_CLR(udpfd, &r);
+                                               udp_ready = 0;
                                                n--;
                                        }
                                }
 
                                // incoming IP
-                               if (FD_ISSET(tunfd, &r))
+                               if (tun_ready)
                                {
                                        if ((s = read(tunfd, buf, sizeof(buf))) > 0)
                                        {
@@ -3093,13 +3135,13 @@ static void mainloop(void)
                                        }
                                        else
                                        {
-                                               FD_CLR(tunfd, &r);
+                                               tun_ready = 0;
                                                n--;
                                        }
                                }
 
                                // cluster
-                               if (FD_ISSET(cluster_sockfd, &r))
+                               if (cluster_ready)
                                {
                                        alen = sizeof(addr);
                                        if ((s = recvfrom(cluster_sockfd, buf, sizeof(buf), MSG_WAITALL, (void *) &addr, &alen)) > 0)
@@ -3109,7 +3151,7 @@ static void mainloop(void)
                                        }
                                        else
                                        {
-                                               FD_CLR(cluster_sockfd, &r);
+                                               cluster_ready = 0;
                                                n--;
                                        }
                                }
@@ -3129,7 +3171,7 @@ static void mainloop(void)
                }
 
                        // Runs on every machine (master and slaves).
-               if (cluster_sockfd && next_cluster_ping <= TIME)
+               if (next_cluster_ping <= TIME)
                {
                        // Check to see which of the cluster is still alive..
 
@@ -3707,10 +3749,12 @@ static int dump_session(FILE **f, sessiont *s)
                LOG(3, 0, 0, "Dumping accounting information to %s\n", filename);
                fprintf(*f, "# dslwatch.pl dump file V1.01\n"
                        "# host: %s\n"
+                       "# endpoint: %s\n"
                        "# time: %ld\n"
                        "# uptime: %ld\n"
                        "# format: username ip qos uptxoctets downrxoctets\n",
                        hostname,
+                       fmtaddr(config->bind_address ? config->bind_address : my_address, 0),
                        now,
                        now - basetime);
        }
@@ -3851,19 +3895,6 @@ int main(int argc, char *argv[])
        if (cluster_init() < 0)
                exit(1);
 
-#ifdef BGP
-       signal(SIGPIPE, SIG_IGN);
-       bgp_setup(config->as_number);
-       bgp_add_route(config->bind_address, 0xffffffff);
-       for (i = 0; i < BGP_NUM_PEERS; i++)
-       {
-               if (config->neighbour[i].name[0])
-                       bgp_start(&bgp_peers[i], config->neighbour[i].name,
-                               config->neighbour[i].as, config->neighbour[i].keepalive,
-                               config->neighbour[i].hold, 0); /* 0 = routing disabled */
-       }
-#endif /* BGP */
-
        inittun();
        LOG(1, 0, 0, "Set up on interface %s\n", config->tundevice);
 
@@ -4094,8 +4125,6 @@ static void update_config()
        if (!config->numradiusservers)
                LOG(0, 0, 0, "No RADIUS servers defined!\n");
 
-       config->num_radfds = 1 << RADIUS_SHIFT;
-
        // parse radius_authtypes_s
        config->radius_authtypes = config->radius_authprefer = 0;
        p = config->radius_authtypes_s;
@@ -4853,6 +4882,9 @@ static tunnelidt new_tunnel()
 void become_master(void)
 {
        int s, i;
+       static struct event_data d[RADIUS_FDS];
+       struct epoll_event e;
+
        run_plugins(PLUGIN_BECOME_MASTER, NULL);
 
        // running a bunch of iptables commands is slow and can cause
@@ -4871,11 +4903,14 @@ void become_master(void)
        }
 
        // add radius fds
-       for (i = 0; i < config->num_radfds; i++)
+       e.events = EPOLLIN;
+       for (i = 0; i < RADIUS_FDS; i++)
        {
-               FD_SET(radfds[i], &readset);
-               if (radfds[i] > readset_n)
-                       readset_n = radfds[i];
+               d[i].type = FD_TYPE_RADIUS;
+               d[i].index = i;
+               e.data.ptr = &d[i];
+
+               epoll_ctl(epollfd, EPOLL_CTL_ADD, radfds[i], &e);
        }
 }
 
index 040861c..6d820b5 100644 (file)
--- a/l2tpns.h
+++ b/l2tpns.h
@@ -1,5 +1,5 @@
 // L2TPNS Global Stuff
-// $Id: l2tpns.h,v 1.75 2005-06-02 11:32:31 bodea Exp $
+// $Id: l2tpns.h,v 1.76 2005-06-04 15:42:36 bodea Exp $
 
 #ifndef __L2TPNS_H__
 #define __L2TPNS_H__
@@ -23,6 +23,7 @@
 #define MAXTBFS                6000            // Maximum token bucket filters. Might need up to 2 * session.
 
 #define RADIUS_SHIFT   6
+#define        RADIUS_FDS      (1 << RADIUS_SHIFT)
 #define RADIUS_MASK    ((1 << RADIUS_SHIFT) - 1)
 #define MAXRADIUS      (1 << (8 + RADIUS_SHIFT))
 
@@ -455,7 +456,6 @@ typedef struct
        in_addr_t       radiusserver[MAXRADSERVER];     // radius servers
        uint16_t        radiusport[MAXRADSERVER];       // radius base ports
        uint8_t         numradiusservers;               // radius server count
-       short           num_radfds;                     // Number of radius filehandles allocated
 
        char            radius_authtypes_s[32];         // list of valid authentication types (chap, pap) in order of preference
        int             radius_authtypes;
@@ -692,7 +692,20 @@ extern struct Tstats *_statistics;
 extern in_addr_t my_address;
 extern int tun_write(uint8_t *data, int size);
 extern int clifd;
-
+extern int epollfd;
+
+struct event_data {
+       enum {
+               FD_TYPE_CONTROL,
+               FD_TYPE_CLI,
+               FD_TYPE_UDP,
+               FD_TYPE_TUN,
+               FD_TYPE_CLUSTER,
+               FD_TYPE_RADIUS,
+               FD_TYPE_BGP,
+       } type;
+       int index; // for RADIUS, BGP
+};
 
 #define TIME (config->current_time)
 
index 2503604..4265858 100644 (file)
@@ -43,5 +43,5 @@ rm -rf %{buildroot}
 %attr(644,root,root) /usr/share/man/man[58]/*
 
 %changelog
-* Thu Jun 2 2005 Brendan O'Dea <bod@optusnet.com.au> 2.1.0-1
+* Sun Jun 5 2005 Brendan O'Dea <bod@optusnet.com.au> 2.1.0-1
 - 2.1.0 release, see /usr/share/doc/l2tpns-2.1.0/Changes
index 0569c20..f0500b8 100644 (file)
--- a/radius.c
+++ b/radius.c
@@ -1,6 +1,6 @@
 // L2TPNS Radius Stuff
 
-char const *cvs_id_radius = "$Id: radius.c,v 1.32 2005-06-02 11:32:32 bodea Exp $";
+char const *cvs_id_radius = "$Id: radius.c,v 1.33 2005-06-04 15:42:36 bodea Exp $";
 
 #include <time.h>
 #include <stdio.h>
@@ -29,9 +29,9 @@ extern ip_filtert *ip_filters;
 void initrad(void)
 {
        int i;
-       LOG(3, 0, 0, "Creating %d sockets for RADIUS queries\n", config->num_radfds);
-       radfds = calloc(sizeof(int), config->num_radfds);
-       for (i = 0; i < config->num_radfds; i++)
+       LOG(3, 0, 0, "Creating %d sockets for RADIUS queries\n", RADIUS_FDS);
+       radfds = calloc(sizeof(int), RADIUS_FDS);
+       for (i = 0; i < RADIUS_FDS; i++)
        {
                int flags;
                radfds[i] = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
diff --git a/util.c b/util.c
index 205fe03..0ba92ec 100644 (file)
--- a/util.c
+++ b/util.c
@@ -1,6 +1,6 @@
 /* Misc util functions */
 
-char const *cvs_id_util = "$Id: util.c,v 1.10 2005-01-25 04:19:07 bodea Exp $";
+char const *cvs_id_util = "$Id: util.c,v 1.11 2005-06-04 15:42:36 bodea Exp $";
 
 #include <unistd.h>
 #include <errno.h>
@@ -40,7 +40,7 @@ void *shared_malloc(unsigned int size)
 }
 
 extern int forked;
-extern int udpfd, controlfd, tunfd, snoopfd, ifrfd, ifr6fd, cluster_sockfd;
+extern int udpfd, controlfd, tunfd, snoopfd, ifrfd, ifr6fd, rand_fd, cluster_sockfd;
 extern int *radfds;
 
 pid_t fork_and_close()
@@ -65,7 +65,7 @@ pid_t fork_and_close()
 
        signal(SIGPIPE, SIG_DFL);
        signal(SIGCHLD, SIG_DFL);
-       signal(SIGHUP, SIG_DFL);
+       signal(SIGHUP,  SIG_DFL);
        signal(SIGUSR1, SIG_DFL);
        signal(SIGQUIT, SIG_DFL);
        signal(SIGKILL, SIG_DFL);
@@ -79,10 +79,12 @@ pid_t fork_and_close()
        if (snoopfd != -1)        close(snoopfd);
        if (ifrfd != -1)          close(ifrfd);
        if (ifr6fd != -1)         close(ifr6fd);
+       if (rand_fd != -1)        close(rand_fd);
        if (cluster_sockfd != -1) close(cluster_sockfd);
        if (clifd != -1)          close(clifd);
+       if (epollfd != -1)        close(epollfd);
 
-       for (i = 0; radfds && i < config->num_radfds; i++)
+       for (i = 0; radfds && i < RADIUS_FDS; i++)
                close(radfds[i]);
 #ifdef BGP
        for (i = 0; i < BGP_NUM_PEERS; i++)