X-Git-Url: http://git.sameswireless.fr/l2tpns.git/blobdiff_plain/7b9fe631a04e8a92b0c8f949709dc1a88a1063bc..aa77d4f89aa291e851c9be8a64ecfdb1434e83c9:/cluster.c diff --git a/cluster.c b/cluster.c index 554b4a8..8f9c01e 100644 --- a/cluster.c +++ b/cluster.c @@ -1,6 +1,6 @@ // L2TPNS Clustering Stuff -char const *cvs_id_cluster = "$Id: cluster.c,v 1.26.2.7 2005/05/22 04:15:32 bodea Exp $"; +char const *cvs_id_cluster = "$Id: cluster.c,v 1.26.2.12 2005/07/03 02:58:06 bodea Exp $"; #include #include @@ -192,7 +192,7 @@ static void add_type(char **p, int type, int more, char *data, int size) } // advertise our presence via BGP or gratuitous ARP -static void advertise(void) +static void advertise_routes(void) { #ifdef BGP if (bgp_configured) @@ -203,6 +203,15 @@ static void advertise(void) send_garp(config->bind_address); // Start taking traffic. } +// withdraw our routes (BGP only) +static void withdraw_routes(void) +{ +#ifdef BGP + if (bgp_configured) + bgp_enable_routing(0); +#endif /* BGP */ +} + static void cluster_uptodate(void) { if (config->cluster_iam_uptodate) @@ -214,7 +223,7 @@ static void cluster_uptodate(void) config->cluster_iam_uptodate = 1; LOG(0, 0, 0, "Now uptodate with master.\n"); - advertise(); + advertise_routes(); } // @@ -354,9 +363,9 @@ static void send_heartbeat(int seq, char *data, int size) } // -// Send an 'i am alive' message to every machine in the cluster, or to a single peer +// Send an 'i am alive' message to every machine in the cluster. // -static void send_ping(time_t basetime, in_addr_t peer) +void cluster_send_ping(time_t basetime) { char buff[100 + sizeof(pingt)]; char *p = buff; @@ -365,29 +374,15 @@ static void send_ping(time_t basetime, in_addr_t peer) if (config->cluster_iam_master && basetime) // We're heartbeating so no need to ping. return; + LOG(5, 0, 0, "Sending cluster ping...\n"); + x.ver = 1; x.addr = config->bind_address; x.undef = config->cluster_undefined_sessions + config->cluster_undefined_tunnels; x.basetime = basetime; add_type(&p, C_PING, basetime, (char *) &x, sizeof(x)); - - if (peer) - peer_send_data(peer, buff, (p-buff)); - else - cluster_send_data(buff, (p-buff) ); -} - -void cluster_send_ping(time_t basetime) -{ - LOG(5, 0, 0, "Sending cluster ping...\n"); - send_ping(0, basetime); -} - -void peer_send_ping(in_addr_t peer, time_t basetime) -{ - LOG(5, 0, 0, "Sending unicast ping to %s...\n", fmtaddr(peer, 0)); - send_ping(peer, basetime); + cluster_send_data(buff, (p-buff) ); } // @@ -470,17 +465,22 @@ void cluster_check_slaves(void) continue; // Shutdown peer! Skip them. if (peers[i].uptodate) - have_peers = 1; - - if (!peers[i].uptodate) + have_peers++; + else config->cluster_iam_uptodate = 0; // Start fast heartbeats } -#ifdef BGP - // in a cluster, withdraw/add routes when we get a peer/lose all peers - if (bgp_configured && have_peers != had_peers) - bgp_enable_routing(!have_peers); -#endif /* BGP */ + // in a cluster, withdraw/add routes when we get a peer/lose peers + if (have_peers != had_peers) + { + if (had_peers < config->cluster_master_min_adv && + have_peers >= config->cluster_master_min_adv) + withdraw_routes(); + + else if (had_peers >= config->cluster_master_min_adv && + have_peers < config->cluster_master_min_adv) + advertise_routes(); + } } // @@ -493,6 +493,7 @@ void cluster_check_master(void) int last_free = 0; clockt t = TIME; static int probed = 0; + int have_peers; if (config->cluster_iam_master) return; // Only runs on the slaves... @@ -522,10 +523,15 @@ void cluster_check_master(void) return; // Everything's ok! config->cluster_last_hb = TIME + 1; // Just the one election thanks. + config->cluster_master_address = 0; LOG(0, 0, 0, "Master timed out! Holding election...\n"); - for (i = 0; i < num_peers; i++) + // In the process of shutting down, can't be master + if (main_quit) + return; + + for (i = have_peers = 0; i < num_peers; i++) { if ((peers[i].timestamp + config->cluster_hb_timeout) < t) continue; // Stale peer! Skip them. @@ -543,6 +549,9 @@ void cluster_check_master(void) LOG(1, 0, 0, "Expecting %s to become master\n", fmtaddr(peers[i].peer, 0)); return; // They'll win the election. Wait for them to come up. } + + if (peers[i].uptodate) + have_peers++; } // Wow. it's been ages since I last heard a heartbeat @@ -550,10 +559,14 @@ void cluster_check_master(void) // to become a master!!! config->cluster_iam_master = 1; - config->cluster_master_address = 0; LOG(0, 0, 0, "I am declaring myself the master!\n"); + if (have_peers < config->cluster_master_min_adv) + advertise_routes(); + else + withdraw_routes(); + if (config->cluster_seq_number == -1) config->cluster_seq_number = 0; @@ -634,14 +647,6 @@ void cluster_check_master(void) config->cluster_undefined_tunnels = 0; config->cluster_iam_uptodate = 1; // assume all peers are up-to-date - if (!num_peers) // lone master - advertise(); -#ifdef BGP - else if (bgp_configured) - bgp_enable_routing(0); -#endif /* BGP */ - - // FIXME. We need to fix up the tunnel control message // queue here! There's a number of other variables we // should also update. @@ -917,9 +922,10 @@ static int cluster_catchup_slave(int seq, in_addr_t slave) LOG(1, 0, 0, "Slave %s sent LASTSEEN with seq %d\n", fmtaddr(slave, 0), seq); if (!config->cluster_iam_master) { - LOG(1, 0, 0, "Got LASTSEEN but I'm not a master! Sending a PING.\n"); - // Send a ping to the slave so they know we're no longer a master - peer_send_ping(slave, basetime); + LOG(1, 0, 0, "Got LASTSEEN but I'm not a master! Redirecting it to %s.\n", + fmtaddr(config->cluster_master_address, 0)); + + peer_send_message(slave, C_MASTER, config->cluster_master_address, NULL, 0); return 0; } @@ -1034,6 +1040,20 @@ static int cluster_add_peer(in_addr_t peer, time_t basetime, pingt *pp, int size return 1; } +// A slave responds with C_MASTER when it gets a message which should have gone to a master. +static int cluster_set_master(in_addr_t peer, in_addr_t master) +{ + if (config->cluster_iam_master) // Sanity... + return 0; + + LOG(3, 0, 0, "Peer %s set the master to %s...\n", fmtaddr(peer, 0), + fmtaddr(master, 1)); + + config->cluster_master_address = master; + cluster_check_master(); + return 0; +} + /* Handle the slave updating the byte counters for the master. */ // // Note that we don't mark the session as dirty; We rely on @@ -1223,8 +1243,7 @@ static int cluster_process_heartbeat(uint8_t *data, int size, int more, uint8_t // Note that after a clean failover, the cluster_master_address // is cleared, so this doesn't run. // - if (config->cluster_master_address && addr != config->cluster_master_address - && (config->cluster_last_hb + config->cluster_hb_timeout - 11) > TIME) { + if (config->cluster_master_address && addr != config->cluster_master_address) { LOG(0, 0, 0, "Ignoring stray heartbeat from %s, current master %s has not yet timed out (last heartbeat %.1f seconds ago).\n", fmtaddr(addr, 0), fmtaddr(config->cluster_master_address, 1), 0.1 * (TIME - config->cluster_last_hb)); @@ -1408,10 +1427,13 @@ int processcluster(char *data, int size, in_addr_t addr) s -= sizeof(uint32_t); switch (type) { - case C_PING: // Update the peers table. + case C_PING: // Update the peers table. return cluster_add_peer(addr, more, (pingt *) p, s); - case C_LASTSEEN: // Catch up a slave (slave missed a packet). + case C_MASTER: // Our master is wrong + return cluster_set_master(addr, more); + + case C_LASTSEEN: // Catch up a slave (slave missed a packet). return cluster_catchup_slave(more, addr); case C_FORWARD: { // Forwarded control packet. pass off to processudp. @@ -1452,6 +1474,11 @@ int processcluster(char *data, int size, in_addr_t addr) return 0; case C_BYTES: + if (!config->cluster_iam_master) { + LOG(0, 0, 0, "I'm not the master, but I got a C_BYTES from %s?\n", fmtaddr(addr, 0)); + return -1; + } + return cluster_handle_bytes(p, s); case C_KILL: // The master asked us to die!? (usually because we're too out of date).