more changes to regular_cleanups(), process a slice of each of the tunnel,
[l2tpns.git] / l2tpns.c
index 341cf74..eb95d57 100644 (file)
--- a/l2tpns.c
+++ b/l2tpns.c
@@ -1,10 +1,10 @@
 // L2TP Network Server
 // Adrian Kennard 2002
-// Copyright (c) 2003, 2004 Optus Internet Engineering
+// Copyright (c) 2003, 2004, 2005 Optus Internet Engineering
 // Copyright (c) 2002 FireBrick (Andrews & Arnold Ltd / Watchfront Ltd) - GPL licenced
 // vim: sw=8 ts=8
 
-char const *cvs_id_l2tpns = "$Id: l2tpns.c,v 1.72 2004/12/16 23:40:31 bodea Exp $";
+char const *cvs_id_l2tpns = "$Id: l2tpns.c,v 1.73.2.12 2005/05/30 06:35:19 bodea Exp $";
 
 #include <arpa/inet.h>
 #include <assert.h>
@@ -83,7 +83,7 @@ uint32_t eth_tx = 0;
 static uint32_t ip_pool_size = 1;      // Size of the pool of addresses used for dynamic address allocation.
 time_t time_now = 0;                   // Current time in seconds since epoch.
 static char time_now_string[64] = {0}; // Current time as a string.
-static char main_quit = 0;             // True if we're in the process of exiting.
+char main_quit = 0;                    // True if we're in the process of exiting.
 linked_list *loaded_plugins;
 linked_list *plugins[MAX_PLUGIN_TYPES];
 
@@ -112,15 +112,16 @@ config_descriptt config_values[] = {
        CONFIG("accounting_dir", accounting_dir, STRING),
        CONFIG("setuid", target_uid, INT),
        CONFIG("dump_speed", dump_speed, BOOL),
-       CONFIG("cleanup_interval", cleanup_interval, INT),
        CONFIG("multi_read_count", multi_read_count, INT),
        CONFIG("scheduler_fifo", scheduler_fifo, BOOL),
        CONFIG("lock_pages", lock_pages, BOOL),
        CONFIG("icmp_rate", icmp_rate, INT),
+       CONFIG("packet_limit", max_packets, INT),
        CONFIG("cluster_address", cluster_address, IP),
        CONFIG("cluster_interface", cluster_interface, STRING),
        CONFIG("cluster_hb_interval", cluster_hb_interval, INT),
        CONFIG("cluster_hb_timeout", cluster_hb_timeout, INT),
+       CONFIG("cluster_master_min_adv", cluster_master_min_adv, INT),
        { NULL, 0, 0, 0 },
 };
 
@@ -147,7 +148,7 @@ static sessionidt shut_acct_n = 0;
 
 tunnelt *tunnel = NULL;                        // Array of tunnel structures.
 sessiont *session = NULL;              // Array of session structures.
-sessioncountt *sess_count = NULL;      // Array of partial per-session traffic counters.
+sessionlocalt *sess_local = NULL;      // Array of local per-session counters.
 radiust *radius = NULL;                        // Array of radius structures.
 ippoolt *ip_address_pool = NULL;       // Array of dynamic IP addresses.
 ip_filtert *ip_filters = NULL; // Array of named filters.
@@ -179,6 +180,9 @@ static void processcontrol(uint8_t *buf, int len, struct sockaddr_in *addr, int
 static tunnelidt new_tunnel(void);
 static int unhide_avp(uint8_t *avp, tunnelidt t, sessionidt s, uint16_t length);
 
+// on slaves, alow BGP to withdraw cleanly before exiting
+#define QUIT_DELAY     5
+
 // return internal time (10ths since process startup)
 static clockt now(void)
 {
@@ -478,7 +482,7 @@ sessionidt sessionbyip(in_addr_t ip)
        int s = lookup_ipmap(ip);
        CSTAT(call_sessionbyip);
 
-       if (s > 0 && s < MAXSESSION && session[s].tunnel)
+       if (s > 0 && s < MAXSESSION && session[s].opened)
                return (sessionidt) s;
 
        return 0;
@@ -578,8 +582,11 @@ sessionidt sessionbyuser(char *username)
        int s;
        CSTAT(call_sessionbyuser);
 
-       for (s = 1; s < MAXSESSION ; ++s)
+       for (s = 1; s <= config->cluster_highest_sessionid ; ++s)
        {
+               if (!session[s].opened)
+                       continue;
+
                if (session[s].walled_garden)
                        continue;               // Skip walled garden users.
 
@@ -621,17 +628,16 @@ void send_garp(in_addr_t ip)
        sendarp(ifr.ifr_ifindex, mac, ip);
 }
 
-// Find session by username, 0 for not found
 static sessiont *sessiontbysessionidt(sessionidt s)
 {
-       if (!s || s > MAXSESSION) return NULL;
+       if (!s || s >= MAXSESSION) return NULL;
        return &session[s];
 }
 
 static sessionidt sessionidtbysessiont(sessiont *s)
 {
        sessionidt val = s-session;
-       if (s < session || val > MAXSESSION) return 0;
+       if (s < session || val >= MAXSESSION) return 0;
        return val;
 }
 
@@ -711,7 +717,7 @@ static void processipout(uint8_t * buf, int len)
        tunnelidt t;
        in_addr_t ip;
 
-       char * data = buf;      // Keep a copy of the originals.
+       char *data = buf;       // Keep a copy of the originals.
        int size = len;
 
        uint8_t b[MAXETHER + 20];
@@ -721,13 +727,13 @@ static void processipout(uint8_t * buf, int len)
        if (len < MIN_IP_SIZE)
        {
                LOG(1, 0, 0, "Short IP, %d bytes\n", len);
-               STAT(tunnel_tx_errors);
+               STAT(tun_rx_errors);
                return;
        }
        if (len >= MAXETHER)
        {
                LOG(1, 0, 0, "Oversize IP packet %d bytes\n", len);
-               STAT(tunnel_tx_errors);
+               STAT(tun_rx_errors);
                return;
        }
 
@@ -765,6 +771,45 @@ static void processipout(uint8_t * buf, int len)
        t = session[s].tunnel;
        sp = &session[s];
 
+       // DoS prevention: enforce a maximum number of packets per 0.1s for a session
+       if (config->max_packets > 0)
+       {
+               if (sess_local[s].last_packet_out == TIME)
+               {
+                       int max = config->max_packets;
+
+                       // All packets for throttled sessions are handled by the
+                       // master, so further limit by using the throttle rate.
+                       // A bit of a kludge, since throttle rate is in kbps,
+                       // but should still be generous given our average DSL
+                       // packet size is 200 bytes: a limit of 28kbps equates
+                       // to around 180 packets per second.
+                       if (!config->cluster_iam_master && sp->throttle_out && sp->throttle_out < max)
+                               max = sp->throttle_out;
+
+                       if (++sess_local[s].packets_out > max)
+                       {
+                               sess_local[s].packets_dropped++;
+                               return;
+                       }
+               }
+               else
+               {
+                       if (sess_local[s].packets_dropped)
+                       {
+                               INC_STAT(tun_rx_dropped, sess_local[s].packets_dropped);
+                               LOG(3, s, t, "Dropped %u/%u packets to %s for %suser %s\n",
+                                       sess_local[s].packets_dropped, sess_local[s].packets_out,
+                                       fmtaddr(ip, 0), sp->throttle_out ? "throttled " : "",
+                                       sp->user);
+                       }
+
+                       sess_local[s].last_packet_out = TIME;
+                       sess_local[s].packets_out = 1;
+                       sess_local[s].packets_dropped = 0;
+               }
+       }
+
        // run access-list if any
        if (session[s].filter_out && !ip_filter(buf, len, session[s].filter_out - 1))
                return;
@@ -802,7 +847,7 @@ static void processipout(uint8_t * buf, int len)
        sp->total_cout += len; // byte count
        sp->pout++;
        udp_tx += len;
-       sess_count[s].cout += len;      // To send to master..
+       sess_local[s].cout += len;      // To send to master..
 }
 
 //
@@ -852,7 +897,7 @@ static void send_ipout(sessionidt s, uint8_t *buf, int len)
        sp->total_cout += len; // byte count
        sp->pout++;
        udp_tx += len;
-       sess_count[s].cout += len;      // To send to master..
+       sess_local[s].cout += len;      // To send to master..
 }
 
 // add an AVP (16 bit)
@@ -972,7 +1017,7 @@ static void controladd(controlt * c, tunnelidt t, sessionidt s)
 //
 void throttle_session(sessionidt s, int rate_in, int rate_out)
 {
-       if (!session[s].tunnel)
+       if (!session[s].opened)
                return; // No-one home.
 
        if (!*session[s].user)
@@ -1010,7 +1055,7 @@ void throttle_session(sessionidt s, int rate_in, int rate_out)
 // add/remove filters from session (-1 = no change)
 void filter_session(sessionidt s, int filter_in, int filter_out)
 {
-       if (!session[s].tunnel)
+       if (!session[s].opened)
                return; // No-one home.
 
        if (!*session[s].user)
@@ -1053,9 +1098,9 @@ void sessionshutdown(sessionidt s, char *reason)
 
        CSTAT(call_sessionshutdown);
 
-       if (!session[s].tunnel)
+       if (!session[s].opened)
        {
-               LOG(3, s, session[s].tunnel, "Called sessionshutdown on a session with no tunnel.\n");
+               LOG(3, s, session[s].tunnel, "Called sessionshutdown on an unopened session.\n");
                return;                   // not a live session
        }
 
@@ -1066,7 +1111,7 @@ void sessionshutdown(sessionidt s, char *reason)
                run_plugins(PLUGIN_KILL_SESSION, &data);
        }
 
-       if (session[s].opened && !walled_garden && !session[s].die)
+       if (session[s].ip && !walled_garden && !session[s].die)
        {
                // RADIUS Stop message
                uint16_t r = session[s].radius;
@@ -1075,7 +1120,6 @@ void sessionshutdown(sessionidt s, char *reason)
                        if (!(r = radiusnew(s)))
                        {
                                LOG(1, s, session[s].tunnel, "No free RADIUS sessions for Stop message\n");
-                               STAT(radius_overflow);
                        }
                        else
                        {
@@ -1127,7 +1171,7 @@ void sessionshutdown(sessionidt s, char *reason)
        }
 
        if (!session[s].die)
-               session[s].die = now() + 150; // Clean up in 15 seconds
+               session[s].die = TIME + 150; // Clean up in 15 seconds
 
        // update filter refcounts
        if (session[s].filter_in) ip_filters[session[s].filter_in - 1].used--;
@@ -1147,6 +1191,12 @@ void sendipcp(tunnelidt t, sessionidt s)
        if (!r)
                r = radiusnew(s);
 
+       if (!r)
+       {
+               sessionshutdown(s, "No free RADIUS sessions for IPCP");
+               return;
+       }
+
        if (radius[r].state != RADIUSIPCP)
        {
                radius[r].state = RADIUSIPCP;
@@ -1165,7 +1215,7 @@ void sendipcp(tunnelidt t, sessionidt s)
        if (!q) return;
 
        *q = ConfigReq;
-       q[1] = r << RADIUS_SHIFT;                    // ID, dont care, we only send one type of request
+       q[1] = r >> RADIUS_SHIFT;                    // ID, dont care, we only send one type of request
        *(uint16_t *) (q + 2) = htons(10);
        q[4] = 3;
        q[5] = 6;
@@ -1177,24 +1227,39 @@ void sendipcp(tunnelidt t, sessionidt s)
        session[s].flags &= ~SF_IPCP_ACKED;     // Clear flag.
 }
 
+static void sessionclear(sessionidt s)
+{
+       memset(&session[s], 0, sizeof(session[s]));
+       memset(&sess_local[s], 0, sizeof(sess_local[s]));
+       memset(&cli_session_actions[s], 0, sizeof(cli_session_actions[s]));
+
+       session[s].tunnel = T_FREE;     // Mark it as free.
+       session[s].next = sessionfree;
+       sessionfree = s;
+}
+
 // kill a session now
-static void sessionkill(sessionidt s, char *reason)
+void sessionkill(sessionidt s, char *reason)
 {
 
        CSTAT(call_sessionkill);
 
-       session[s].die = now();
+       if (!session[s].opened) // not alive
+               return;
+
+       if (session[s].next)
+       {
+               LOG(0, s, session[s].tunnel, "Tried to kill a session with next pointer set (%d)\n", session[s].next);
+               return;
+       }
+
+       session[s].die = TIME;
        sessionshutdown(s, reason);  // close radius/routes, etc.
        if (session[s].radius)
                radiusclear(session[s].radius, s); // cant send clean accounting data, session is killed
 
        LOG(2, s, session[s].tunnel, "Kill session %d (%s): %s\n", s, session[s].user, reason);
-
-       memset(&session[s], 0, sizeof(session[s]));
-       session[s].tunnel = T_FREE;     // Mark it as free.
-       session[s].next = sessionfree;
-       sessionfree = s;
-       cli_session_actions[s].action = 0;
+       sessionclear(s);
        cluster_send_session(s);
 }
 
@@ -1225,7 +1290,7 @@ static void tunnelkill(tunnelidt t, char *reason)
                controlfree = c;
        }
        // kill sessions
-       for (s = 1; s < MAXSESSION; s++)
+       for (s = 1; s <= config->cluster_highest_sessionid ; ++s)
                if (session[s].tunnel == t)
                        sessionkill(s, reason);
 
@@ -1252,12 +1317,12 @@ static void tunnelshutdown(tunnelidt t, char *reason)
        LOG(1, 0, t, "Shutting down tunnel %d (%s)\n", t, reason);
 
        // close session
-       for (s = 1; s < MAXSESSION; s++)
+       for (s = 1; s <= config->cluster_highest_sessionid ; ++s)
                if (session[s].tunnel == t)
                        sessionshutdown(s, reason);
 
        tunnel[t].state = TUNNELDIE;
-       tunnel[t].die = now() + 700; // Clean up in 70 seconds
+       tunnel[t].die = TIME + 700; // Clean up in 70 seconds
        cluster_send_tunnel(t);
        // TBA - should we wait for sessions to stop?
        {                            // Send StopCCN
@@ -1796,7 +1861,8 @@ void processudp(uint8_t * buf, int len, struct sockaddr_in *addr)
                                        if (!sessionfree)
                                        {
                                                STAT(session_overflow);
-                                               tunnelshutdown(t, "No free sessions");
+                                               LOG(1, 0, t, "No free sessions\n");
+                                               return;
                                        }
                                        else
                                        {
@@ -1814,13 +1880,13 @@ void processudp(uint8_t * buf, int len, struct sockaddr_in *addr)
                                                if (!(r = radiusnew(s)))
                                                {
                                                        LOG(1, s, t, "No free RADIUS sessions for ICRQ\n");
-                                                       sessionkill(s, "no free RADIUS sesions");
+                                                       sessionclear(s);
                                                        return;
                                                }
 
                                                c = controlnew(11); // sending ICRP
                                                session[s].id = sessionid++;
-                                               session[s].opened = time(NULL);
+                                               session[s].opened = time_now;
                                                session[s].tunnel = t;
                                                session[s].far = asession;
                                                session[s].last_packet = time_now;
@@ -1903,7 +1969,7 @@ void processudp(uint8_t * buf, int len, struct sockaddr_in *addr)
                        l -= 2;
                }
 
-               if (s && !session[s].tunnel)    // Is something wrong??
+               if (s && !session[s].opened)    // Is something wrong??
                {
                        if (!config->cluster_iam_master)
                        {
@@ -1912,10 +1978,7 @@ void processudp(uint8_t * buf, int len, struct sockaddr_in *addr)
                                return;
                        }
 
-
-                       LOG(1, s, t, "UDP packet contains session %d but no session[%d].tunnel "
-                                    "exists (LAC said tunnel = %d).  Dropping packet.\n", s, s, t);
-
+                       LOG(1, s, t, "UDP packet contains session which is not opened.  Dropping packet.\n");
                        STAT(tunnel_rx_errors);
                        return;
                }
@@ -1998,42 +2061,64 @@ static void processtun(uint8_t * buf, int len)
        // Else discard.
 }
 
-//
-// Maximum number of actions to complete.
-// This is to avoid sending out too many packets
-// at once.
-#define MAX_ACTIONS 500
-
-static int regular_cleanups(void)
+// Handle retries, timeouts.  Runs every 1/10th sec, want to ensure
+// that we look at the whole of the tunnel, radius and session tables
+// every second
+static void regular_cleanups(int period)
 {
-       static sessionidt s = 0;        // Next session to check for actions on.
-       tunnelidt t;
-       int count=0,i;
-       uint16_t r;
-       static clockt next_acct = 0;
-       static clockt next_shut_acct = 0;
+       // Next tunnel, radius and session to check for actions on.
+       static tunnelidt t = 0;
+       static int r = 0;
+       static sessionidt s = 0;
+
+       int t_actions = 0;
+       int r_actions = 0;
+       int s_actions = 0;
+
+       int t_slice;
+       int r_slice;
+       int s_slice;
+
+       int i;
        int a;
 
-       LOG(3, 0, 0, "Begin regular cleanup\n");
+       // paranoia
+       if (period < 1) period = 1;
+
+       // divide up tables into period * 1/10th sec slices
+       t_slice = config->cluster_highest_tunnelid  / 10.0 * period + 0.5;
+       r_slice = (MAXRADIUS - 1)                   / 10.0 * period + 0.5;
+       s_slice = config->cluster_highest_sessionid / 10.0 * period + 0.5;
 
-       for (r = 1; r < MAXRADIUS; r++)
-       {
-               if (!radius[r].state)
-                       continue;
-               if (radius[r].retry)
-               {
-                       if (radius[r].retry <= TIME)
-                               radiusretry(r);
-               } else
-                       radius[r].retry = backoff(radius[r].try+1);     // Is this really needed? --mo
-       }
-       for (t = 1; t <= config->cluster_highest_tunnelid; t++)
+       if (t_slice < 1)
+           t_slice = 1;
+       else if (t_slice > config->cluster_highest_tunnelid)
+           t_slice = config->cluster_highest_tunnelid;
+
+       if (r_slice < 1)
+           r_slice = 1;
+       else if (r_slice > (MAXRADIUS - 1))
+           r_slice = MAXRADIUS - 1;
+
+       if (s_slice < 1)
+           s_slice = 1;
+       else if (s_slice > config->cluster_highest_sessionid)
+           s_slice = config->cluster_highest_sessionid;
+
+       LOG(4, 0, 0, "Begin regular cleanup (last %d/10s ago)\n", period);
+
+       for (i = 0; i < t_slice; i++)
        {
+               t++;
+               if (t > config->cluster_highest_tunnelid)
+                       t = 1;
+
                // check for expired tunnels
                if (tunnel[t].die && tunnel[t].die <= TIME)
                {
                        STAT(tunnel_timeout);
                        tunnelkill(t, "Expired");
+                       t_actions++;
                        continue;
                }
                // check for message resend
@@ -2053,6 +2138,8 @@ static int regular_cleanups(void)
                                                tunnelsend(c->buf, c->length, t);
                                                c = c->next;
                                        }
+
+                               t_actions++;
                        }
                }
                // Send hello
@@ -2061,6 +2148,7 @@ static int regular_cleanups(void)
                        controlt *c = controlnew(6); // sending HELLO
                        controladd(c, t, 0); // send the message
                        LOG(3, 0, t, "Sending HELLO message\n");
+                       t_actions++;
                }
 
                // Check for tunnel changes requested from the CLI
@@ -2071,19 +2159,34 @@ static int regular_cleanups(void)
                        {
                                LOG(2, 0, t, "Dropping tunnel by CLI\n");
                                tunnelshutdown(t, "Requested by administrator");
+                               t_actions++;
                        }
                }
+       }
 
+       for (i = 0; i < r_slice; i++)
+       {
+               r++;
+               if (r >= MAXRADIUS)
+                       r = 1;
+
+               if (!radius[r].state)
+                       continue;
+
+               if (radius[r].retry <= TIME)
+               {
+                       radiusretry(r);
+                       r_actions++;
+               }
        }
 
-       count = 0;
-       for (i = 1; i <= config->cluster_highest_sessionid; i++)
+       for (i = 0; i < s_slice; i++)
        {
                s++;
                if (s > config->cluster_highest_sessionid)
                        s = 1;
 
-               if (!session[s].tunnel) // Session isn't in use
+               if (!session[s].opened) // Session isn't in use
                        continue;
 
                if (!session[s].die && session[s].ip && !(session[s].flags & SF_IPCP_ACKED))
@@ -2091,13 +2194,14 @@ static int regular_cleanups(void)
                        // IPCP has not completed yet. Resend
                        LOG(3, s, session[s].tunnel, "No ACK for initial IPCP ConfigReq... resending\n");
                        sendipcp(session[s].tunnel, s);
+                       s_actions++;
                }
 
                // check for expired sessions
                if (session[s].die && session[s].die <= TIME)
                {
                        sessionkill(s, "Expired");
-                       if (++count >= MAX_ACTIONS) break;
+                       s_actions++;
                        continue;
                }
 
@@ -2106,7 +2210,7 @@ static int regular_cleanups(void)
                {
                        sessionshutdown(s, "No response to LCP ECHO requests");
                        STAT(session_timeout);
-                       if (++count >= MAX_ACTIONS) break;
+                       s_actions++;
                        continue;
                }
 
@@ -2126,7 +2230,7 @@ static int regular_cleanups(void)
                        LOG(4, s, session[s].tunnel, "No data in %d seconds, sending LCP ECHO\n",
                                        (int)(time_now - session[s].last_packet));
                        tunnelsend(b, 24, session[s].tunnel); // send it
-                       if (++count >= MAX_ACTIONS) break;
+                       s_actions++;
                }
 
                // Check for actions requested from the CLI
@@ -2140,6 +2244,7 @@ static int regular_cleanups(void)
                                LOG(2, s, session[s].tunnel, "Dropping session by CLI\n");
                                sessionshutdown(s, "Requested by administrator");
                                a = 0; // dead, no need to check for other actions
+                               s_actions++;
                        }
 
                        if (a & CLI_SESS_NOSNOOP)
@@ -2147,6 +2252,7 @@ static int regular_cleanups(void)
                                LOG(2, s, session[s].tunnel, "Unsnooping session by CLI\n");
                                session[s].snoop_ip = 0;
                                session[s].snoop_port = 0;
+                               s_actions++;
                                send++;
                        }
                        else if (a & CLI_SESS_SNOOP)
@@ -2157,6 +2263,7 @@ static int regular_cleanups(void)
 
                                session[s].snoop_ip = cli_session_actions[s].snoop_ip;
                                session[s].snoop_port = cli_session_actions[s].snoop_port;
+                               s_actions++;
                                send++;
                        }
 
@@ -2164,6 +2271,7 @@ static int regular_cleanups(void)
                        {
                                LOG(2, s, session[s].tunnel, "Un-throttling session by CLI\n");
                                throttle_session(s, 0, 0);
+                               s_actions++;
                                send++;
                        }
                        else if (a & CLI_SESS_THROTTLE)
@@ -2173,6 +2281,7 @@ static int regular_cleanups(void)
                                    cli_session_actions[s].throttle_out);
 
                                throttle_session(s, cli_session_actions[s].throttle_in, cli_session_actions[s].throttle_out);
+                               s_actions++;
                                send++;
                        }
 
@@ -2180,6 +2289,7 @@ static int regular_cleanups(void)
                        {
                                LOG(2, s, session[s].tunnel, "Un-filtering session by CLI\n");
                                filter_session(s, 0, 0);
+                               s_actions++;
                                send++;
                        }
                        else if (a & CLI_SESS_FILTER)
@@ -2189,39 +2299,17 @@ static int regular_cleanups(void)
                                    cli_session_actions[s].filter_out);
 
                                filter_session(s, cli_session_actions[s].filter_in, cli_session_actions[s].filter_out);
+                               s_actions++;
                                send++;
                        }
 
                        if (send)
                                cluster_send_session(s);
-
-                       if (++count >= MAX_ACTIONS) break;
                }
        }
 
-       if (*config->accounting_dir)
-       {
-               if (next_acct <= TIME)
-               {
-                       // Dump accounting data
-                       next_acct = TIME + ACCT_TIME;
-                       next_shut_acct = TIME + ACCT_SHUT_TIME;
-                       dump_acct_info(1);
-               }
-               else if (next_shut_acct <= TIME)
-               {
-                       // Dump accounting data for shutdown sessions
-                       next_shut_acct = TIME + ACCT_SHUT_TIME;
-                       if (shut_acct_n)
-                               dump_acct_info(0);
-               }
-       }
-
-       if (count >= MAX_ACTIONS)
-               return 1;       // Didn't finish!
-
-       LOG(3, 0, 0, "End regular cleanup (%d actions), next in %d seconds\n", count, config->cleanup_interval);
-       return 0;
+       LOG(3, 0, 0, "End regular cleanup: scanned %d/%d/%d tunnels/radius/sessions, %d/%d/%d actions\n",
+               t_slice, r_slice, s_slice, t_actions, r_actions, s_actions);
 }
 
 
@@ -2232,8 +2320,39 @@ static int regular_cleanups(void)
 static int still_busy(void)
 {
        int i;
+       static time_t stopped_bgp = 0;
        static clockt last_talked = 0;
        static clockt start_busy_wait = 0;
+
+       if (!config->cluster_iam_master)
+       {
+#ifdef BGP
+               if (bgp_configured)
+               {
+                       if (!stopped_bgp)
+                       {
+                               LOG(1, 0, 0, "Shutting down in %d seconds, stopping BGP...\n", QUIT_DELAY);
+
+                               for (i = 0; i < BGP_NUM_PEERS; i++)
+                                       if (bgp_peers[i].state == Established)
+                                               bgp_stop(&bgp_peers[i]);
+
+                               stopped_bgp = time_now;
+
+                               // we don't want to become master
+                               cluster_send_ping(0);
+
+                               return 1;
+                       }
+
+                       if (time_now < (stopped_bgp + QUIT_DELAY))
+                               return 1;
+               }
+#endif /* BGP */
+
+               return 0;
+       }
+
        if (start_busy_wait == 0)
                start_busy_wait = TIME;
 
@@ -2285,7 +2404,6 @@ static void mainloop(void)
        uint8_t buf[65536];
        struct timeval to;
        clockt next_cluster_ping = 0;   // send initial ping immediately
-       time_t next_clean = time_now + config->cleanup_interval;
 
        LOG(4, 0, 0, "Beginning of main loop.  udpfd=%d, tunfd=%d, cluster_sockfd=%d, controlfd=%d\n",
                udpfd, tunfd, cluster_sockfd, controlfd);
@@ -2367,14 +2485,11 @@ static void mainloop(void)
                        int tun_pkts = 0;
                        int cluster_pkts = 0;
 
-                       INC_STAT(select_ready, n);
-
                        // nsctl commands
                        if (FD_ISSET(controlfd, &r))
                        {
                                alen = sizeof(addr);
                                processcontrol(buf, recvfrom(controlfd, buf, sizeof(buf), MSG_WAITALL, (void *) &addr, &alen), &addr, alen);
-                               STAT(select_processed);
                                n--;
                        }
 
@@ -2386,7 +2501,6 @@ static void mainloop(void)
                                        if (FD_ISSET(radfds[i], &r))
                                        {
                                                processrad(buf, recv(radfds[i], buf, sizeof(buf), 0), i);
-                                               STAT(select_processed);
                                                n--;
                                        }
                                }
@@ -2406,21 +2520,17 @@ static void mainloop(void)
                                else
                                        LOG(0, 0, 0, "accept error: %s\n", strerror(errno));
 
-                               STAT(select_processed);
                                n--;
                        }
 
 #ifdef BGP
                        for (i = 0; i < BGP_NUM_PEERS; i++)
                        {
-                               int isr = bgp_set[i] ? !!FD_ISSET(bgp_peers[i].sock, &r) : 0;
-                               int isw = bgp_set[i] ? !!FD_ISSET(bgp_peers[i].sock, &w) : 0;
+                               int isr = bgp_set[i] ? FD_ISSET(bgp_peers[i].sock, &r) : 0;
+                               int isw = bgp_set[i] ? FD_ISSET(bgp_peers[i].sock, &w) : 0;
                                bgp_process(&bgp_peers[i], isr, isw);
-                               if (isr || isw)
-                               {
-                                       INC_STAT(select_processed, isr + isw);
-                                       n -= (isr + isw);
-                               }
+                               if (isr) n--;
+                               if (isw) n--;
                        }
 #endif /* BGP */
 
@@ -2433,7 +2543,6 @@ static void mainloop(void)
                                        if ((s = recvfrom(udpfd, buf, sizeof(buf), 0, (void *) &addr, &alen)) > 0)
                                        {
                                                processudp(buf, s, &addr);
-                                               STAT(select_processed);
                                                udp_pkts++;
                                        }
                                        else
@@ -2449,7 +2558,6 @@ static void mainloop(void)
                                        if ((s = read(tunfd, buf, sizeof(buf))) > 0)
                                        {
                                                processtun(buf, s);
-                                               STAT(select_processed);
                                                tun_pkts++;
                                        }
                                        else
@@ -2466,7 +2574,6 @@ static void mainloop(void)
                                        if ((s = recvfrom(cluster_sockfd, buf, sizeof(buf), MSG_WAITALL, (void *) &addr, &alen)) > 0)
                                        {
                                                processcluster(buf, s, addr.sin_addr.s_addr);
-                                               STAT(select_processed);
                                                cluster_pkts++;
                                        }
                                        else
@@ -2477,9 +2584,18 @@ static void mainloop(void)
                                }
                        }
 
+                       if (udp_pkts > 1 || tun_pkts > 1 || cluster_pkts > 1)
+                               STAT(multi_read_used);
+
                        if (c >= config->multi_read_count)
+                       {
                                LOG(3, 0, 0, "Reached multi_read_count (%d); processed %d udp, %d tun and %d cluster packets\n",
                                        config->multi_read_count, udp_pkts, tun_pkts, cluster_pkts);
+
+                               STAT(multi_read_exceeded);
+                       }
+
+                       TIME = now();
                }
 
                        // Runs on every machine (master and slaves).
@@ -2495,38 +2611,60 @@ static void mainloop(void)
 
                        master_update_counts();         // If we're a slave, send our byte counters to our master.
 
+                       TIME = now();
                        if (config->cluster_iam_master && !config->cluster_iam_uptodate)
                                next_cluster_ping = TIME + 1; // out-of-date slaves, do fast updates
                        else
                                next_cluster_ping = TIME + config->cluster_hb_interval;
                }
 
+               if (!config->cluster_iam_master)
+                       continue;
+
                        // Run token bucket filtering queue..
                        // Only run it every 1/10th of a second.
-                       // Runs on all machines both master and slave.
                {
                        static clockt last_run = 0;
                        if (last_run != TIME)
                        {
-                               last_run = TIME;
                                tbf_run_timer();
+                               last_run = TIME;
+                               TIME = now();
                        }
                }
 
-               /* Handle timeouts. Make sure that this gets run anyway, even if there was
-                * something to read, else under load this will never actually run....
-                *
-                */
-               if (config->cluster_iam_master && next_clean <= time_now)
+                       // Handle timeouts, retries etc.
                {
-                       if (regular_cleanups())
+                       clockt last_clean = 0;
+                       if (last_clean != TIME)
                        {
-                               // Did it finish?
-                               next_clean = time_now + 1 ;     // Didn't finish. Check quickly.
+                               if (!last_clean)
+                                       last_clean = TIME - 10;
+
+                               regular_cleanups(TIME - last_clean);
+                               last_clean = TIME;
+                               TIME = now();
                        }
-                       else
+               }
+
+               if (*config->accounting_dir)
+               {
+                       static clockt next_acct = 0;
+                       static clockt next_shut_acct = 0;
+
+                       if (next_acct <= TIME)
                        {
-                               next_clean = time_now + config->cleanup_interval; // Did. Move to next interval.
+                               // Dump accounting data
+                               next_acct = TIME + ACCT_TIME;
+                               next_shut_acct = TIME + ACCT_SHUT_TIME;
+                               dump_acct_info(1);
+                       }
+                       else if (next_shut_acct <= TIME)
+                       {
+                               // Dump accounting data for shutdown sessions
+                               next_shut_acct = TIME + ACCT_SHUT_TIME;
+                               if (shut_acct_n)
+                                       dump_acct_info(0);
                        }
                }
        }
@@ -2541,6 +2679,7 @@ static void mainloop(void)
 
        //
        // Important!!! We MUST not process any packets past this point!
+       LOG(1, 0, 0, "Clean shutdown complete\n");
 }
 
 static void stripdomain(char *host)
@@ -2629,6 +2768,7 @@ static void initdata(int optdebug, char *optconfig)
        config->debug = optdebug;
        config->num_tbfs = MAXTBFS;
        config->rl_rate = 28; // 28kbps
+       config->cluster_master_min_adv = 1;
 
        if (!(tunnel = shared_malloc(sizeof(tunnelt) * MAXTUNNEL)))
        {
@@ -2641,9 +2781,9 @@ static void initdata(int optdebug, char *optconfig)
                exit(1);
        }
 
-       if (!(sess_count = shared_malloc(sizeof(sessioncountt) * MAXSESSION)))
+       if (!(sess_local = shared_malloc(sizeof(sessionlocalt) * MAXSESSION)))
        {
-               LOG(0, 0, 0, "Error doing malloc for sessions_count: %s\n", strerror(errno));
+               LOG(0, 0, 0, "Error doing malloc for sess_local: %s\n", strerror(errno));
                exit(1);
        }
 
@@ -2695,7 +2835,7 @@ memset(ip_filters, 0, sizeof(ip_filtert) * MAXFILTER);
        memset(ip_address_pool, 0, sizeof(ippoolt) * MAXIPPOOL);
 
                // Put all the sessions on the free list marked as undefined.
-       for (i = 1; i < MAXSESSION - 1; i++)
+       for (i = 1; i < MAXSESSION; i++)
        {
                session[i].next = i + 1;
                session[i].tunnel = T_UNDEF;    // mark it as not filled in.
@@ -2704,7 +2844,7 @@ memset(ip_filters, 0, sizeof(ip_filtert) * MAXFILTER);
        sessionfree = 1;
 
                // Mark all the tunnels as undefined (waiting to be filled in by a download).
-       for (i = 1; i < MAXTUNNEL- 1; i++)
+       for (i = 1; i < MAXTUNNEL; i++)
                tunnel[i].state = TUNNELUNDEF;  // mark it as not filled in.
 
        if (!*hostname)
@@ -2830,7 +2970,7 @@ void rebuild_address_pool(void)
        for (i = 0; i < MAXSESSION; ++i)
        {
                int ipid;
-               if (!session[i].ip || !session[i].tunnel)
+               if (!(session[i].opened && session[i].ip))
                        continue;
                ipid = - lookup_ipmap(htonl(session[i].ip));
 
@@ -3132,7 +3272,7 @@ int main(int argc, char *argv[])
        init_tbf(config->num_tbfs);
 
        LOG(0, 0, 0, "L2TPNS version " VERSION "\n");
-       LOG(0, 0, 0, "Copyright (c) 2003, 2004 Optus Internet Engineering\n");
+       LOG(0, 0, 0, "Copyright (c) 2003, 2004, 2005 Optus Internet Engineering\n");
        LOG(0, 0, 0, "Copyright (c) 2002 FireBrick (Andrews & Arnold Ltd / Watchfront Ltd) - GPL licenced\n");
        {
                struct rlimit rlim;
@@ -3220,14 +3360,6 @@ int main(int argc, char *argv[])
 
        mainloop();
 
-#ifdef BGP
-       /* try to shut BGP down cleanly; with luck the sockets will be
-          writable since we're out of the select */
-       for (i = 0; i < BGP_NUM_PEERS; i++)
-               if (bgp_peers[i].state == Established)
-                       bgp_stop(&bgp_peers[i]);
-#endif /* BGP */
-
        /* remove plugins (so cleanup code gets run) */
        plugins_done();
 
@@ -3602,7 +3734,7 @@ static void update_config()
        if (!config->numradiusservers)
                LOG(0, 0, 0, "No RADIUS servers defined!\n");
 
-       config->num_radfds = 2 << RADIUS_SHIFT;
+       config->num_radfds = 1 << RADIUS_SHIFT;
 
        // Update plugins
        for (i = 0; i < MAXPLUGINS; i++)
@@ -3622,7 +3754,6 @@ static void update_config()
                }
        }
        memcpy(config->old_plugins, config->plugins, sizeof(config->plugins));
-       if (!config->cleanup_interval) config->cleanup_interval = 10;
        if (!config->multi_read_count) config->multi_read_count = 10;
        if (!config->cluster_address) config->cluster_address = inet_addr(DEFAULT_MCAST_ADDR);
        if (!*config->cluster_interface)
@@ -3703,7 +3834,7 @@ int sessionsetup(tunnelidt t, sessionidt s)
 
        LOG(3, s, t, "Doing session setup for session\n");
 
-       if (!session[s].ip || session[s].ip == 0xFFFFFFFE)
+       if (!session[s].ip)
        {
                assign_ip_address(s);
                if (!session[s].ip)
@@ -4301,7 +4432,7 @@ void become_master(void)
        {
                for (s = 1; s <= config->cluster_highest_sessionid ; ++s)
                {
-                       if (!session[s].tunnel) // Not an in-use session.
+                       if (!session[s].opened) // Not an in-use session.
                                continue;
 
                        run_plugins(PLUGIN_NEW_SESSION_MASTER, &session[s]);
@@ -4333,7 +4464,7 @@ int cmd_show_hist_idle(struct cli_def *cli, char *command, char **argv, int argc
        for (s = 1; s <= config->cluster_highest_sessionid ; ++s)
        {
                int idle;
-               if (!session[s].tunnel)
+               if (!session[s].opened)
                        continue;
 
                idle = time_now - session[s].last_packet;
@@ -4371,7 +4502,7 @@ int cmd_show_hist_open(struct cli_def *cli, char *command, char **argv, int argc
        for (s = 1; s <= config->cluster_highest_sessionid ; ++s)
        {
                int open = 0, d;
-               if (!session[s].tunnel)
+               if (!session[s].opened)
                        continue;
 
                d = time_now - session[s].opened;