Merge from Master
[l2tpns.git] / grpsess.c
index 68ec968..5528453 100644 (file)
--- a/grpsess.c
+++ b/grpsess.c
@@ -9,9 +9,12 @@
 #include <string.h>
 #include <sys/socket.h>
 #include <linux/rtnetlink.h>
+#include <netinet/ip6.h>
 
+#include "dhcp6.h"
 #include "l2tpns.h"
 #include "util.h"
+#include "cluster.h"
 
 #ifdef BGP
 #include "bgp.h"
@@ -22,7 +25,15 @@ union grp_iphash {
        union grp_iphash *idx;
 } grp_ip_hash[256];                    // Mapping from IP address to group structures.
 
-static groupidt gnextgrpid = 0;
+groupidt gnextgrpid = 0;
+
+typedef struct
+{
+       sessionidt sid_loaddist[0x10000];
+}
+local_group;
+
+local_group *grp_local = NULL;         // Array of local_group structures.
 
 // Find gruop by IP, < 1 for not found
 //
@@ -84,7 +95,7 @@ groupidt grp_groupbyip(in_addr_t ip)
 //
 // This adds it to the routing table, advertises it
 // via BGP if enabled, and stuffs it into the
-// 'sessionbyip' cache.
+// 'groupbyip' cache.
 //
 // 'ip' must be in _host_ order.
 //
@@ -128,7 +139,7 @@ static void grp_routeset(groupidt g, in_addr_t ip, int prefixlen, int add)
        n_ip = htonl(ip);
        netlink_addattr(&req.nh, RTA_DST, &n_ip, sizeof(n_ip));
 
-       LOG(1, 0, 0, "Route (Group) %s %s/%d\n", add ? "add" : "del", fmtaddr(htonl(ip), 0), prefixlen);
+       LOG(3, 0, 0, "Route (Group) %s %s/%d\n", add ? "add" : "del", fmtaddr(htonl(ip), 0), prefixlen);
 
        if (netlink_send(&req.nh) < 0)
                LOG(0, 0, 0, "grp_routeset() error in sending netlink message: %s\n", strerror(errno));
@@ -153,7 +164,10 @@ static void grp_routeset(groupidt g, in_addr_t ip, int prefixlen, int add)
                        g = 0;  // Caching the session as '0' is the same as uncaching.
 
                for (i = ip; i < ip+(1<<(32-prefixlen)) ; ++i)
+               {
                        grp_cache_ipmap(i, g);
+                       if (!g) cache_ipmap(i, 0);
+               }
        }
 }
 
@@ -212,7 +226,7 @@ void grp_removesession(groupidt g, sessionidt s)
 
                                if (gnextgrpid == g)
                                {
-                                       gnextgrpid = 0;
+                                       gnextgrpid = grpsession[g].prev;
                                }
                                else
                                {
@@ -228,6 +242,7 @@ void grp_removesession(groupidt g, sessionidt s)
                                }
 
                                memset(&grpsession[g], 0, sizeof(grpsession[0]));
+                               grpsession[g].state = GROUPEFREE;
                        }
                        else
                        {
@@ -236,6 +251,8 @@ void grp_removesession(groupidt g, sessionidt s)
                                                &grpsession[g].sesslist[i+1],
                                                (grpsession[g].nbsession - i) * sizeof(grpsession[g].sesslist[i]));
                        }
+
+                       cluster_send_groupe(g);
                        return;
                }
        }
@@ -270,6 +287,7 @@ static int grp_addsession(groupidt g, sessionidt s, uint8_t weight)
                        // it's the first session of the group, set to next group
                        grpsession[g].prev = gnextgrpid;
                        gnextgrpid = g;
+                       grpsession[g].state = GROUPEOPEN;
                }
                grpsession[g].sesslist[i].sid = s;
                grpsession[g].sesslist[i].weight = weight;
@@ -356,6 +374,9 @@ void grp_processvendorspecific(sessionidt s, uint8_t *pvs)
                        return;
                }
 
+               if (grpid > config->cluster_highest_groupeid)
+                       config->cluster_highest_groupeid = grpid;
+
                n++;
        }
 
@@ -420,6 +441,8 @@ void grp_processvendorspecific(sessionidt s, uint8_t *pvs)
 // Init data structures
 void grp_initdata()
 {
+       int i;
+
        // Set default value (10s)
        config->grp_txrate_average_time = 10;
 
@@ -430,6 +453,18 @@ void grp_initdata()
        }
 
        memset(grpsession, 0, sizeof(grpsession[0]) * MAXGROUPE);
+       for (i = 1; i < MAXGROUPE; i++)
+       {
+               grpsession[i].state = GROUPEUNDEF;
+       }
+
+       if (!(grp_local = shared_malloc(sizeof(local_group) * MAXGROUPE)))
+       {
+               LOG(0, 0, 0, "Error doing malloc for grp_local: %s\n", strerror(errno));
+               exit(1);
+       }
+       memset(grp_local, 0, sizeof(grp_local[0]) * MAXGROUPE);
+
 }
 
 // Update time_changed of the group
@@ -443,69 +478,161 @@ void grp_time_changed()
        }
 }
 
+// Uncache all IP of a session
+static void grp_uncache_ipsession(groupidt g, sessionidt s)
+{
+       int i;
+       uint8_t *a;
+       in_addr_t ip;
+       in_addr_t n_ip, j;
+       int prefixlen;
+       union iphash *h;
+
+       for (i = 0; i < grpsession[g].nbroutesgrp; i++)
+       {
+               if (grpsession[g].route[i].ip != 0)
+               {
+                       prefixlen = grpsession[g].route[i].prefixlen;
+                       ip = grpsession[g].route[i].ip & (0xffffffff << (32 - prefixlen));      // Force the ip to be the first one in the route.
+
+                       for (j = ip; j < ip+(1<<(32-prefixlen)) ; ++j)
+                       {
+                               n_ip = htonl(j); // To network order
+                               a = (uint8_t *) &n_ip;
+                               h = ip_hash;
+
+                               if (!(h = h[*a++].idx)) continue;
+                               if (!(h = h[*a++].idx)) continue;
+                               if (!(h = h[*a++].idx)) continue;
+
+                               if (s == h[*a].sess)
+                               {
+                                       h[*a].sess = 0;
+                                       //LOG(3, s, session[s].tunnel, "UnCaching ip address %s\n", fmtaddr(n_ip, 0));
+                               }
+                       }
+               }
+       }
+}
+
+uint16_t guint16_index_loadlist;
 // return the next session can be used on the group
-sessionidt grp_getnextsession(groupidt g, in_addr_t ip)
+sessionidt grp_getnextsession(groupidt g, in_addr_t ip, in_addr_t ip_src)
 {
        sessionidt s = 0, s2 = 0, s3 = 0;
        int i;
-       uint32_t ltime_changed = 0;
-       uint32_t mintxrate = 0xFFFFFFFF;
+       uint32_t ltime_changed = 0, mintxrate = 0xFFFFFFFF, maxtxrate = 0;
+       uint32_t txrate = 0;
 
        if (g >= MAXGROUPE)
                return 0;
 
-       if ((s = sessionbyip(ip)))
+       if (grpsession[g].time_changed >= config->grp_txrate_average_time)
        {
-               if ( (session[s].ppp.phase > Establish) &&
-                        (time_now - session[s].last_packet <= (config->echo_timeout + 2)) )
+               // recalculation txrate
+               ltime_changed = grpsession[g].time_changed;
+               grpsession[g].time_changed = 0;
+               for (i = 0; i < grpsession[g].nbsession; i++)
                {
-                       int recaltxrate = 0;
-
-                       for (i = 0; i < grpsession[g].nbsession; i++)
+                       if ((s2 = grpsession[g].sesslist[i].sid))
                        {
-                               if (s == grpsession[g].sesslist[i].sid)
+                               uint32_t coutgrp_delta = 0;
+
+                               if (session[s2].cout >= grpsession[g].sesslist[i].prev_coutgrp)
+                                       coutgrp_delta = session[s2].cout - grpsession[g].sesslist[i].prev_coutgrp;
+                               grpsession[g].sesslist[i].prev_coutgrp = session[s2].cout;
+
+                               txrate = (txrate + (coutgrp_delta/ltime_changed)) >> 1;
+                               grpsession[g].sesslist[i].tx_rate = txrate;
+
+                               txrate = grpsession[g].sesslist[i].tx_rate/grpsession[g].sesslist[i].weight;
+                               if (txrate < mintxrate)
                                {
-                                       if ((time_now - grpsession[g].sesslist[i].mark_time) > config->grp_txrate_average_time)
+                                       if ( session[s2].ppp.phase > Establish &&
+                                               (time_now - session[s2].last_packet <= (config->echo_timeout + 1)) )
                                        {
-                                               grpsession[g].sesslist[i].mark_time = time_now;
-                                               recaltxrate = 1;
-                                               break;
+                                               grpsession[g].smin = s2;
+                                               mintxrate = txrate;
                                        }
                                }
-                       }
 
-                       if (!recaltxrate)
-                               return s;
+                               if (txrate > maxtxrate)
+                               {
+                                       if ( session[s2].ppp.phase > Establish &&
+                                       (time_now - session[s2].last_packet <= (config->echo_timeout + 1)) )
+                                       {
+                                               grpsession[g].smax = s2;
+                                               maxtxrate = txrate;
+                                       }
+                               }
+                       }
                }
        }
 
-       //if (grpsession[g].time_changed > config->grp_txrate_average_time)
-       if (grpsession[g].time_changed > 1)
+       if ((s = sessionbyip(ip)))
        {
-               ltime_changed = grpsession[g].time_changed;
-               grpsession[g].time_changed = 1;
+               uint8_t *as = (uint8_t *) &ip_src;
+               uint8_t *ad = (uint8_t *) &ip;
+               uint16_t ai = ad[3];
+               ai <<= 8;
+               ai |= as[3];
+
+               s = grp_local[g].sid_loaddist[ai];
+               if (!s)
+               {
+                       s = grpsession[g].smin;
+                       grp_local[g].sid_loaddist[ai] = s;
+               }
+
+               if (g != grp_groupbysession(s))
+               {
+                       // This session does not belong to this group
+                       LOG(3, s, session[s].tunnel, "Warning, the session does not belong to group %d\n", g);
+                       s = 0;
+                       grp_local[g].sid_loaddist[ai] = 0;
+               }
+               else if ( (session[s].ppp.phase > Establish) &&
+                        (time_now - session[s].last_packet <= (config->echo_timeout + 1)) )
+               {
+                       grp_local[g].sid_loaddist[guint16_index_loadlist++] = 0;
+                       return s;
+               }
+               else
+               {
+                       s = 0;
+                       grp_local[g].sid_loaddist[ai] = 0;
+               }
        }
 
-       for (i = 0; i < grpsession[g].nbsession; i++)
+       if (!s)
        {
-               if ((s2 = grpsession[g].sesslist[i].sid))
-               {
-                       s3 = s2;
-                       if (ltime_changed)
-                       {
-                               grpsession[g].sesslist[i].tx_rate = session[s2].coutgrp_delta/ltime_changed;
-                               session[s2].coutgrp_delta = grpsession[g].sesslist[i].tx_rate;
-                               //LOG(3, s2, session[s2].tunnel, "TX Rate: %d session weight: %d\n", grpsession[g].sesslist[i].tx_rate, grpsession[g].sesslist[i].weight);
-                       }
+               // random between 0 and nbsession-1
+               uint indexsess = (rand() % grpsession[g].nbsession);
+
+               if (indexsess >= grpsession[g].nbsession)
+                       indexsess = 0; //Sanity checks.
 
-                       if ( session[s2].ppp.phase > Establish &&
-                               (time_now - session[s2].last_packet <= (config->echo_timeout + 2)) )
+               s2 = grpsession[g].sesslist[indexsess].sid;
+               if (s2 &&
+                       (session[s2].ppp.phase > Establish) &&
+                       (time_now - session[s2].last_packet <= (config->echo_timeout + 1)))
+               {
+                       s = s2;
+               }
+               else
+               {
+                       for (i = 0; i < grpsession[g].nbsession; i++)
                        {
-                               uint32_t txrate = grpsession[g].sesslist[i].tx_rate/grpsession[g].sesslist[i].weight;
-                               if (txrate < mintxrate)
+                               if ((s2 = grpsession[g].sesslist[i].sid))
                                {
-                                       s = s2;
-                                       mintxrate = txrate;
+                                       s3 = s2;
+
+                                       if ( session[s2].ppp.phase > Establish &&
+                                               (time_now - session[s2].last_packet <= (config->echo_timeout + 1)) )
+                                       {
+                                               s = s2;
+                                               break;
+                                       }
                                }
                        }
                }
@@ -519,3 +646,66 @@ sessionidt grp_getnextsession(groupidt g, in_addr_t ip)
 
        return s;
 }
+
+// load a groupe receive from master
+int grp_cluster_load_groupe(groupidt g, groupsesst *new)
+{
+       int i;
+       int updategroup = 0;
+
+       if (g >= MAXGROUPE)
+       {
+               LOG(0, 0, 0, "ERROR: Received a group id > MAXGROUPE!\n");
+               return 0;
+       }
+
+       if ((grpsession[g].nbroutesgrp != new->nbroutesgrp) ||
+               (grpsession[g].nbsession != new->nbsession))
+       {
+               updategroup = 1;
+       }
+
+       if (!updategroup)
+       {
+               // Check session list
+               for (i = 0; i < grpsession[g].nbsession; i++)
+               {
+                       if (grpsession[g].sesslist[i].sid != new->sesslist[i].sid)
+                       {
+                               updategroup = 1;
+                               break;
+                       }
+               }
+       }
+
+       if (!updategroup)
+       {
+               // Check routes list
+               for (i = 0; i < grpsession[g].nbroutesgrp; i++)
+               {
+                       if (grpsession[g].route[i].ip != new->route[i].ip)
+                       {
+                               updategroup = 1;
+                               break;
+                       }
+               }
+       }
+
+       // needs update
+       if (updategroup)
+       {
+               // Del all routes
+               grp_setgrouproute(g, 0);
+       }
+
+       memcpy(&grpsession[g], new, sizeof(grpsession[g]));     // Copy over..
+
+       // needs update
+       if (updategroup)
+       {
+               // Add all routes
+               grp_setgrouproute(g, 1);
+       }
+
+       return 1;
+}