X-Git-Url: http://git.sameswireless.fr/l2tpns.git/blobdiff_plain/ca4801db5521d0dc9f7c7f854f62f27c55420558..af5792cb6a1ee10e11eb7439242834b967e98db8:/cluster.c diff --git a/cluster.c b/cluster.c index c529fea..4014e81 100644 --- a/cluster.c +++ b/cluster.c @@ -1,6 +1,6 @@ // L2TPNS Clustering Stuff -char const *cvs_id_cluster = "$Id: cluster.c,v 1.39 2005/05/26 12:17:30 bodea Exp $"; +char const *cvs_id_cluster = "$Id: cluster.c,v 1.49 2005/12/05 14:10:42 bodea Exp $"; #include #include @@ -38,7 +38,7 @@ char const *cvs_id_cluster = "$Id: cluster.c,v 1.39 2005/05/26 12:17:30 bodea Ex */ // Module variables. -int cluster_sockfd = 0; // The filedescriptor for the cluster communications port. +extern int cluster_sockfd; // The filedescriptor for the cluster communications port. in_addr_t my_address = 0; // The network address of my ethernet port. static int walk_session_number = 0; // The next session to send when doing the slow table walk. @@ -56,7 +56,7 @@ static struct { static struct { int seq; int size; - char data[MAX_HEART_SIZE]; + uint8_t data[MAX_HEART_SIZE]; } past_hearts[HB_HISTORY_SIZE]; // Ring buffer of heartbeats that we've recently sent out. Needed so // we can re-transmit if needed. @@ -127,6 +127,15 @@ int cluster_init() opt = 0; // Turn off multicast loopback. setsockopt(cluster_sockfd, IPPROTO_IP, IP_MULTICAST_LOOP, &opt, sizeof(opt)); + if (config->cluster_mcast_ttl != 1) + { + uint8_t ttl = 0; + if (config->cluster_mcast_ttl > 0) + ttl = config->cluster_mcast_ttl < 256 ? config->cluster_mcast_ttl : 255; + + setsockopt(cluster_sockfd, IPPROTO_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); + } + if (setsockopt(cluster_sockfd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)) < 0) { LOG(0, 0, 0, "Failed to setsockopt (join mcast group): %s\n", strerror(errno)); @@ -178,7 +187,7 @@ static int cluster_send_data(void *data, int datalen) // Maintains the format. Assumes that the caller // has passed in a big enough buffer! // -static void add_type(char **p, int type, int more, char *data, int size) +static void add_type(uint8_t **p, int type, int more, uint8_t *data, int size) { *((uint32_t *) (*p)) = type; *p += sizeof(uint32_t); @@ -231,7 +240,7 @@ static void cluster_uptodate(void) // Send a unicast UDP packet to a peer with 'data' as the // contents. // -static int peer_send_data(in_addr_t peer, char *data, int size) +static int peer_send_data(in_addr_t peer, uint8_t *data, int size) { struct sockaddr_in addr = {0}; @@ -259,10 +268,10 @@ static int peer_send_data(in_addr_t peer, char *data, int size) // // Send a structured message to a peer with a single element of type 'type'. // -static int peer_send_message(in_addr_t peer, int type, int more, char *data, int size) +static int peer_send_message(in_addr_t peer, int type, int more, uint8_t *data, int size) { - char buf[65536]; // Vast overkill. - char *p = buf; + uint8_t buf[65536]; // Vast overkill. + uint8_t *p = buf; LOG(4, 0, 0, "Sending message to peer (type %d, more %d, size %d)\n", type, more, size); add_type(&p, type, more, data, size); @@ -270,16 +279,11 @@ static int peer_send_message(in_addr_t peer, int type, int more, char *data, int return peer_send_data(peer, buf, (p-buf) ); } -// -// Forward a state changing packet to the master. -// -// The master just processes the payload as if it had -// received it off the tun device. -// -int master_forward_packet(char *data, int size, in_addr_t addr, int port) +// send a packet to the master +static int _forward_packet(uint8_t *data, int size, in_addr_t addr, int port, int type) { - char buf[65536]; // Vast overkill. - char *p = buf; + uint8_t buf[65536]; // Vast overkill. + uint8_t *p = buf; if (!config->cluster_master_address) // No election has been held yet. Just skip it. return -1; @@ -287,13 +291,30 @@ int master_forward_packet(char *data, int size, in_addr_t addr, int port) LOG(4, 0, 0, "Forwarding packet from %s to master (size %d)\n", fmtaddr(addr, 0), size); STAT(c_forwarded); - add_type(&p, C_FORWARD, addr, (char *) &port, sizeof(port)); + add_type(&p, type, addr, (uint8_t *) &port, sizeof(port)); // ick. should be uint16_t memcpy(p, data, size); p += size; return peer_send_data(config->cluster_master_address, buf, (p - buf)); } +// +// Forward a state changing packet to the master. +// +// The master just processes the payload as if it had +// received it off the tun device. +// +int master_forward_packet(uint8_t *data, int size, in_addr_t addr, int port) +{ + return _forward_packet(data, size, addr, port, C_FORWARD); +} + +// Forward a DAE RADIUS packet to the master. +int master_forward_dae_packet(uint8_t *data, int size, in_addr_t addr, int port) +{ + return _forward_packet(data, size, addr, port, C_FORWARD_DAE); +} + // // Forward a throttled packet to the master for handling. // @@ -301,10 +322,10 @@ int master_forward_packet(char *data, int size, in_addr_t addr, int port) // token bucket queue, and lets normal processing take care // of it. // -int master_throttle_packet(int tbfid, char *data, int size) +int master_throttle_packet(int tbfid, uint8_t *data, int size) { - char buf[65536]; // Vast overkill. - char *p = buf; + uint8_t buf[65536]; // Vast overkill. + uint8_t *p = buf; if (!config->cluster_master_address) // No election has been held yet. Just skip it. return -1; @@ -326,10 +347,10 @@ int master_throttle_packet(int tbfid, char *data, int size) // // (Note that this must be called with the tun header // as the start of the data). -int master_garden_packet(sessionidt s, char *data, int size) +int master_garden_packet(sessionidt s, uint8_t *data, int size) { - char buf[65536]; // Vast overkill. - char *p = buf; + uint8_t buf[65536]; // Vast overkill. + uint8_t *p = buf; if (!config->cluster_master_address) // No election has been held yet. Just skip it. return -1; @@ -346,7 +367,7 @@ int master_garden_packet(sessionidt s, char *data, int size) // Send a chunk of data as a heartbeat.. // We save it in the history buffer as we do so. // -static void send_heartbeat(int seq, char *data, int size) +static void send_heartbeat(int seq, uint8_t *data, int size) { int i; @@ -368,8 +389,8 @@ static void send_heartbeat(int seq, char *data, int size) // void cluster_send_ping(time_t basetime) { - char buff[100 + sizeof(pingt)]; - char *p = buff; + uint8_t buff[100 + sizeof(pingt)]; + uint8_t *p = buff; pingt x; if (config->cluster_iam_master && basetime) // We're heartbeating so no need to ping. @@ -382,7 +403,7 @@ void cluster_send_ping(time_t basetime) x.undef = config->cluster_undefined_sessions + config->cluster_undefined_tunnels; x.basetime = basetime; - add_type(&p, C_PING, basetime, (char *) &x, sizeof(x)); + add_type(&p, C_PING, basetime, (uint8_t *) &x, sizeof(x)); cluster_send_data(buff, (p-buff) ); } @@ -402,10 +423,16 @@ void master_update_counts(void) if (config->cluster_iam_master) // Only happens on the slaves. return; - if (!config->cluster_master_address) // If we don't have a master, skip it for a while. + if (!config->cluster_master_address) // If we don't have a master, skip it for a while. + return; + + // C_BYTES format changed in 2.1.0 (cluster version 5) + // during upgrade from previous versions, hang onto our counters + // for a bit until the new master comes up + if (config->cluster_last_hb_ver < 5) return; - i = MAX_B_RECS * 5; // Examine max 2000 sessions; + i = MAX_B_RECS * 5; // Examine max 3000 sessions; if (config->cluster_highest_sessionid > i) i = config->cluster_highest_sessionid; @@ -416,17 +443,20 @@ void master_update_counts(void) walk_session_number = 1; if (!sess_local[walk_session_number].cin && !sess_local[walk_session_number].cout) - continue; // Unused. Skip it. + continue; // Unchanged. Skip it. b[c].sid = walk_session_number; - b[c].in = sess_local[walk_session_number].cin; - b[c].out = sess_local[walk_session_number].cout; - - if (++c > MAX_B_RECS) // Send a max of 400 elements in a packet. - break; + b[c].pin = sess_local[walk_session_number].pin; + b[c].pout = sess_local[walk_session_number].pout; + b[c].cin = sess_local[walk_session_number].cin; + b[c].cout = sess_local[walk_session_number].cout; // Reset counters. + sess_local[walk_session_number].pin = sess_local[walk_session_number].pout = 0; sess_local[walk_session_number].cin = sess_local[walk_session_number].cout = 0; + + if (++c > MAX_B_RECS) // Send a max of 600 elements in a packet. + break; } if (!c) // Didn't find any that changes. Get out of here! @@ -435,7 +465,7 @@ void master_update_counts(void) // Forward the data to the master. LOG(4, 0, 0, "Sending byte counters to master (%d elements)\n", c); - peer_send_message(config->cluster_master_address, C_BYTES, c, (char *) &b, sizeof(b[0]) * c); + peer_send_message(config->cluster_master_address, C_BYTES, c, (uint8_t *) &b, sizeof(b[0]) * c); return; } @@ -524,9 +554,14 @@ void cluster_check_master(void) return; // Everything's ok! config->cluster_last_hb = TIME + 1; // Just the one election thanks. + config->cluster_master_address = 0; LOG(0, 0, 0, "Master timed out! Holding election...\n"); + // In the process of shutting down, can't be master + if (main_quit) + return; + for (i = have_peers = 0; i < num_peers; i++) { if ((peers[i].timestamp + config->cluster_hb_timeout) < t) @@ -555,7 +590,6 @@ void cluster_check_master(void) // to become a master!!! config->cluster_iam_master = 1; - config->cluster_master_address = 0; LOG(0, 0, 0, "I am declaring myself the master!\n"); @@ -610,13 +644,17 @@ void cluster_check_master(void) // Reset die relative to our uptime rather than the old master's if (session[i].die) session[i].die = TIME; - // Accumulate un-sent byte counters. - session[i].cin += sess_local[i].cin; - session[i].cout += sess_local[i].cout; - session[i].total_cin += sess_local[i].cin; - session[i].total_cout += sess_local[i].cout; + // Accumulate un-sent byte/packet counters. + increment_counter(&session[i].cin, &session[i].cin_wrap, sess_local[i].cin); + increment_counter(&session[i].cout, &session[i].cout_wrap, sess_local[i].cout); + session[i].cin_delta += sess_local[i].cin; + session[i].cout_delta += sess_local[i].cout; + + session[i].pin += sess_local[i].pin; + session[i].pout += sess_local[i].pout; sess_local[i].cin = sess_local[i].cout = 0; + sess_local[i].pin = sess_local[i].pout = 0; sess_local[i].radius = 0; // Reset authentication as the radius blocks aren't up to date. @@ -709,7 +747,7 @@ static void cluster_check_sessions(int highsession, int freesession_ptr, int hig cluster_uptodate(); } -static int hb_add_type(char **p, int type, int id) +static int hb_add_type(uint8_t **p, int type, int id) { switch (type) { case C_CSESSION: { // Compressed C_SESSION. @@ -723,13 +761,13 @@ static int hb_add_type(char **p, int type, int id) // Did we compress the full structure, and is the size actually // reduced?? if ( (d - orig) == sizeof(sessiont) && size < sizeof(sessiont) ) { - add_type(p, C_CSESSION, id, (char *) c, size); + add_type(p, C_CSESSION, id, c, size); break; } // Failed to compress : Fall through. } - case C_SESSION: add_type(p, C_SESSION, id, - (char *) &session[id], sizeof(sessiont)); + case C_SESSION: + add_type(p, C_SESSION, id, (uint8_t *) &session[id], sizeof(sessiont)); break; case C_CTUNNEL: { // Compressed C_TUNNEL @@ -748,8 +786,8 @@ static int hb_add_type(char **p, int type, int id) } // Failed to compress : Fall through. } - case C_TUNNEL: add_type(p, C_TUNNEL, id, - (char *) &tunnel[id], sizeof(tunnelt)); + case C_TUNNEL: + add_type(p, C_TUNNEL, id, (uint8_t *) &tunnel[id], sizeof(tunnelt)); break; default: LOG(0, 0, 0, "Found an invalid type in heart queue! (%d)\n", type); @@ -765,9 +803,9 @@ static int hb_add_type(char **p, int type, int id) void cluster_heartbeat() { int i, count = 0, tcount = 0; - char buff[MAX_HEART_SIZE + sizeof(heartt) + sizeof(int) ]; + uint8_t buff[MAX_HEART_SIZE + sizeof(heartt) + sizeof(int) ]; heartt h; - char *p = buff; + uint8_t *p = buff; if (!config->cluster_iam_master) // Only the master does this. return; @@ -791,7 +829,7 @@ void cluster_heartbeat() h.timeout = config->cluster_hb_timeout; h.table_version = config->cluster_table_version; - add_type(&p, C_HEARTBEAT, HB_VERSION, (char *) &h, sizeof(h)); + add_type(&p, C_HEARTBEAT, HB_VERSION, (uint8_t *) &h, sizeof(h)); for (i = 0; i < config->cluster_num_changes; ++i) { hb_add_type(&p, cluster_changes[i].type, cluster_changes[i].id); @@ -1005,7 +1043,6 @@ static int cluster_add_peer(in_addr_t peer, time_t basetime, pingt *pp, int size config->cluster_master_address = 0; config->cluster_last_hb = 0; // Force an election. cluster_check_master(); - return 0; } if (i >= num_peers) @@ -1052,6 +1089,16 @@ static int cluster_set_master(in_addr_t peer, in_addr_t master) fmtaddr(master, 1)); config->cluster_master_address = master; + if (master) + { + // catchup with new master + peer_send_message(master, C_LASTSEEN, config->cluster_seq_number, NULL, 0); + + // delay next election + config->cluster_last_hb = TIME; + } + + // run election (or reset "probed" if master was set) cluster_check_master(); return 0; } @@ -1061,7 +1108,7 @@ static int cluster_set_master(in_addr_t peer, in_addr_t master) // Note that we don't mark the session as dirty; We rely on // the slow table walk to propogate this back out to the slaves. // -static int cluster_handle_bytes(char *data, int size) +static int cluster_handle_bytes(uint8_t *data, int size) { bytest *b; @@ -1078,13 +1125,16 @@ static int cluster_handle_bytes(char *data, int size) return -1; /* Abort processing */ } - session[b->sid].total_cin += b->in; - session[b->sid].total_cout += b->out; + session[b->sid].pin += b->pin; + session[b->sid].pout += b->pout; - session[b->sid].cin += b->in; - session[b->sid].cout += b->out; + increment_counter(&session[b->sid].cin, &session[b->sid].cin_wrap, b->cin); + increment_counter(&session[b->sid].cout, &session[b->sid].cout_wrap, b->cout); - if (b->in) + session[b->sid].cin_delta += b->cin; + session[b->sid].cout_delta += b->cout; + + if (b->cin) session[b->sid].last_packet = time_now; // Reset idle timer! size -= sizeof(*b); @@ -1197,6 +1247,9 @@ struct oldsession { uint32_t tx_connect_speed; uint32_t rx_connect_speed; uint32_t flags; +#define SF_IPCP_ACKED 1 // Has this session seen an IPCP Ack? +#define SF_LCP_ACKED 2 // LCP negotiated +#define SF_CCP_ACKED 4 // CCP negotiated in_addr_t snoop_ip; uint16_t snoop_port; uint16_t sid; @@ -1215,20 +1268,17 @@ static uint8_t *convert_session(struct oldsession *old) new.next = old->next; new.far = old->far; new.tunnel = old->tunnel; - new.l2tp_flags = old->l2tp_flags; - new.flags = old->flags; + new.flags = old->l2tp_flags; new.ip = old->ip; new.ip_pool_index = old->ip_pool_index; new.unique_id = old->unique_id; - new.nr = old->nr; - new.ns = old->ns; new.magic = old->magic; - new.cin = old->cin; - new.cout = old->cout; new.pin = old->pin; new.pout = old->pout; - new.total_cin = old->total_cin; - new.total_cout = old->total_cout; + new.cin = old->total_cin; + new.cout = old->total_cout; + new.cin_delta = old->cin; + new.cout_delta = old->cout; new.throttle_in = old->throttle_in; new.throttle_out = old->throttle_out; new.filter_in = old->filter_in; @@ -1256,6 +1306,21 @@ static uint8_t *convert_session(struct oldsession *old) for (i = 0; i < MAXROUTE; i++) memcpy(&new.route[i], &old->route[i], sizeof(new.route[i])); + if (new.opened) + { + new.ppp.phase = Establish; + if (old->flags & (SF_IPCP_ACKED|SF_LCP_ACKED)) + { + new.ppp.phase = Network; + new.ppp.lcp = Opened; + new.ppp.ipcp = (old->flags & SF_IPCP_ACKED) ? Opened : Starting; + new.ppp.ccp = (old->flags & SF_CCP_ACKED) ? Opened : Stopped; + } + + // no PPPv6 in old session + new.ppp.ipv6cp = Stopped; + } + return (uint8_t *) &new; } @@ -1348,8 +1413,7 @@ static int cluster_process_heartbeat(uint8_t *data, int size, int more, uint8_t // Note that after a clean failover, the cluster_master_address // is cleared, so this doesn't run. // - if (config->cluster_master_address && addr != config->cluster_master_address - && (config->cluster_last_hb + config->cluster_hb_timeout - 11) > TIME) { + if (config->cluster_master_address && addr != config->cluster_master_address) { LOG(0, 0, 0, "Ignoring stray heartbeat from %s, current master %s has not yet timed out (last heartbeat %.1f seconds ago).\n", fmtaddr(addr, 0), fmtaddr(config->cluster_master_address, 1), 0.1 * (TIME - config->cluster_last_hb)); @@ -1360,6 +1424,7 @@ static int cluster_process_heartbeat(uint8_t *data, int size, int more, uint8_t config->cluster_seq_number = h->seq; config->cluster_last_hb = TIME; // Reset to ensure that we don't become master!! + config->cluster_last_hb_ver = hb_ver; // remember what cluster version the master is using if (config->cluster_seq_number != h->seq) { // Out of sequence heartbeat! static int lastseen_seq = 0; @@ -1486,7 +1551,7 @@ static int cluster_process_heartbeat(uint8_t *data, int size, int more, uint8_t s -= (p - orig_p); if (size != sizeof(tunnelt) ) { // Ouch! Very very bad! - LOG(0, 0, 0, "DANGER: Received a CSESSION that didn't decompress correctly!\n"); + LOG(0, 0, 0, "DANGER: Received a CTUNNEL that didn't decompress correctly!\n"); // Now what? Should exit! No-longer up to date! break; } @@ -1531,10 +1596,10 @@ shortpacket: // We got a packet on the cluster port! // Handle pings, lastseens, and heartbeats! // -int processcluster(char *data, int size, in_addr_t addr) +int processcluster(uint8_t *data, int size, in_addr_t addr) { int type, more; - char *p = data; + uint8_t *p = data; int s = size; if (addr == my_address) @@ -1556,7 +1621,8 @@ int processcluster(char *data, int size, in_addr_t addr) p += sizeof(uint32_t); s -= sizeof(uint32_t); - switch (type) { + switch (type) + { case C_PING: // Update the peers table. return cluster_add_peer(addr, more, (pingt *) p, s); @@ -1566,24 +1632,36 @@ int processcluster(char *data, int size, in_addr_t addr) case C_LASTSEEN: // Catch up a slave (slave missed a packet). return cluster_catchup_slave(more, addr); - case C_FORWARD: { // Forwarded control packet. pass off to processudp. - struct sockaddr_in a; - a.sin_addr.s_addr = more; - - a.sin_port = *(int *) p; - s -= sizeof(int); - p += sizeof(int); + case C_FORWARD: // Forwarded control packet. pass off to processudp. + case C_FORWARD_DAE: // Forwarded DAE packet. pass off to processdae. + if (!config->cluster_iam_master) + { + LOG(0, 0, 0, "I'm not the master, but I got a C_FORWARD_%s from %s?\n", + type == C_FORWARD_DAE ? "_DAE" : "", fmtaddr(addr, 0)); - if (!config->cluster_iam_master) { // huh? - LOG(0, 0, 0, "I'm not the master, but I got a C_FORWARD from %s?\n", fmtaddr(addr, 0)); return -1; } + else + { + struct sockaddr_in a; + a.sin_addr.s_addr = more; + + a.sin_port = *(int *) p; + s -= sizeof(int); + p += sizeof(int); + + LOG(4, 0, 0, "Got a forwarded %spacket... (%s:%d)\n", + type == C_FORWARD_DAE ? "DAE " : "", fmtaddr(more, 0), a.sin_port); + + STAT(recv_forward); + if (type == C_FORWARD_DAE) + processdae(p, s, &a, sizeof(a)); + else + processudp(p, s, &a); + + return 0; + } - LOG(4, 0, 0, "Got a forwarded packet... (%s:%d)\n", fmtaddr(more, 0), a.sin_port); - STAT(recv_forward); - processudp(p, s, &a); - return 0; - } case C_THROTTLE: { // Receive a forwarded packet from a slave. if (!config->cluster_iam_master) { LOG(0, 0, 0, "I'm not the master, but I got a C_THROTTLE from %s?\n", fmtaddr(addr, 0)); @@ -1760,7 +1838,7 @@ static int rle_decompress(uint8_t **src_p, int ssize, uint8_t *dst, int dsize) { int count; int orig_dsize = dsize; - char *src = *src_p; + uint8_t *src = *src_p; while (ssize >0 && dsize > 0) { // While there's more to decompress, and there's room in the decompress buffer... count = *src++; --ssize; // get the count byte from the source.