Don't assume BGP peer sends back negotiated hold time, pick smallest
[l2tpns.git] / bgp.c
1 /*
2 * BGPv4
3 * Used to advertise routes for upstream (l2tp port, rather than gratiutious
4 * arp) and downstream--allowing routers to load-balance both.
5 *
6 * Implementation limitations:
7 * - We never listen for incoming connections (session always initiated by us).
8 * - Any routes advertised by the peer are accepted, but ignored.
9 * - No password support; neither RFC1771 (which no-one seems to do anyway)
10 * nor RFC2385 (which requires a kernel patch on 2.4 kernels).
11 */
12
13 char const *cvs_id_bgp = "$Id: bgp.c,v 1.7 2004/11/15 06:49:56 bodea Exp $";
14
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <string.h>
18 #include <time.h>
19 #include <errno.h>
20 #include <sys/socket.h>
21 #include <netinet/in.h>
22 #include <arpa/inet.h>
23 #include <netdb.h>
24 #include <fcntl.h>
25
26 #include "l2tpns.h"
27 #include "bgp.h"
28 #include "util.h"
29
30 static void bgp_clear(struct bgp_peer *peer);
31 static void bgp_set_retry(struct bgp_peer *peer);
32 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx);
33 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
34 struct bgp_route_list *new);
35
36 static void bgp_free_routes(struct bgp_route_list *routes);
37 static char const *bgp_msg_type_str(u8 type);
38 static int bgp_connect(struct bgp_peer *peer);
39 static int bgp_handle_connect(struct bgp_peer *peer);
40 static int bgp_write(struct bgp_peer *peer);
41 static int bgp_read(struct bgp_peer *peer);
42 static int bgp_handle_input(struct bgp_peer *peer);
43 static int bgp_send_open(struct bgp_peer *peer);
44 static int bgp_send_keepalive(struct bgp_peer *peer);
45 static int bgp_send_update(struct bgp_peer *peer);
46 static int bgp_send_notification(struct bgp_peer *peer, u8 code, u8 subcode);
47
48 static u16 our_as;
49 static struct bgp_route_list *bgp_routes = 0;
50
51 int bgp_configured = 0;
52 struct bgp_peer *bgp_peers = 0;
53
54 /* prepare peer structure, globals */
55 int bgp_setup(int as)
56 {
57 int i;
58 struct bgp_peer *peer;
59
60 for (i = 0; i < BGP_NUM_PEERS; i++)
61 {
62 peer = &bgp_peers[i];
63 memset(peer, 0, sizeof(*peer));
64
65 peer->addr = INADDR_NONE;
66 peer->sock = -1;
67 peer->state = peer->next_state = Disabled;
68
69 if (!((peer->outbuf = malloc(sizeof(*peer->outbuf)))
70 && (peer->inbuf = malloc(sizeof(*peer->inbuf)))))
71 {
72 LOG(0, 0, 0, 0, "Can't allocate buffers for bgp peer (%s)\n",
73 strerror(errno));
74
75 return 0;
76 }
77 }
78
79 if (as < 1)
80 as = 0;
81
82 if ((our_as = as))
83 return 0;
84
85 bgp_routes = 0;
86 bgp_configured = 0; /* set by bgp_start */
87
88 return 1;
89 }
90
91 /* start connection with a peer */
92 int bgp_start(struct bgp_peer *peer, char *name, int as, int keepalive, int hold, int enable)
93 {
94 struct hostent *h;
95 int ibgp;
96 int i;
97 struct bgp_path_attr a;
98 char path_attrs[64];
99 char *p = path_attrs;
100 in_addr_t ip;
101 u32 metric = htonl(BGP_METRIC);
102 u32 no_export = htonl(BGP_COMMUNITY_NO_EXPORT);
103
104 if (!our_as)
105 return 0;
106
107 if (peer->state != Disabled)
108 bgp_halt(peer);
109
110 snprintf(peer->name, sizeof(peer->name), "%s", name);
111
112 if (!(h = gethostbyname(name)) || h->h_addrtype != AF_INET)
113 {
114 LOG(0, 0, 0, 0, "Can't get address for BGP peer %s (%s)\n",
115 name, h ? "no address" : hstrerror(h_errno));
116
117 return 0;
118 }
119
120 memcpy(&peer->addr, h->h_addr, sizeof(peer->addr));
121 peer->as = as > 0 ? as : our_as;
122 ibgp = peer->as == our_as;
123
124 /* set initial timer values */
125 peer->init_keepalive = keepalive == -1 ? BGP_KEEPALIVE_TIME : keepalive;
126 peer->init_hold = hold == -1 ? BGP_HOLD_TIME : hold;
127
128 if (peer->init_hold < 3)
129 peer->init_hold = 3;
130
131 if (peer->init_keepalive * 3 > peer->init_hold)
132 peer->init_keepalive = peer->init_hold / 3;
133
134 /* clear buffers, go to Idle state */
135 peer->next_state = Idle;
136 bgp_clear(peer);
137
138 /* set initial routing state */
139 peer->routing = enable;
140
141 /* all our routes use the same attributes, so prepare it in advance */
142 if (peer->path_attrs)
143 free(peer->path_attrs);
144
145 peer->path_attr_len = 0;
146
147 /* ORIGIN */
148 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
149 a.code = BGP_PATH_ATTR_CODE_ORIGIN;
150 a.data.s.len = 1;
151 a.data.s.value[0] = BGP_PATH_ATTR_CODE_ORIGIN_IGP;
152
153 #define ADD_ATTRIBUTE() do { \
154 i = BGP_PATH_ATTR_SIZE(a); \
155 memcpy(p, &a, i); \
156 p += i; \
157 peer->path_attr_len += i; } while (0)
158
159 ADD_ATTRIBUTE();
160
161 /* AS_PATH */
162 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
163 a.code = BGP_PATH_ATTR_CODE_AS_PATH;
164 if (ibgp)
165 {
166 /* empty path */
167 a.data.s.len = 0;
168 }
169 else
170 {
171 /* just our AS */
172 struct {
173 u8 type;
174 u8 len;
175 u16 value;
176 } as_path = {
177 BGP_PATH_ATTR_CODE_AS_PATH_AS_SEQUENCE,
178 1,
179 htons(our_as),
180 };
181
182 a.data.s.len = sizeof(as_path);
183 memcpy(&a.data.s.value, &as_path, sizeof(as_path));
184 }
185
186 ADD_ATTRIBUTE();
187
188 /* NEXT_HOP */
189 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
190 a.code = BGP_PATH_ATTR_CODE_NEXT_HOP;
191 ip = my_address; /* we're it */
192 a.data.s.len = sizeof(ip);
193 memcpy(a.data.s.value, &ip, sizeof(ip));
194
195 ADD_ATTRIBUTE();
196
197 /* MULTI_EXIT_DISC */
198 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL;
199 a.code = BGP_PATH_ATTR_CODE_MULTI_EXIT_DISC;
200 a.data.s.len = sizeof(metric);
201 memcpy(a.data.s.value, &metric, sizeof(metric));
202
203 ADD_ATTRIBUTE();
204
205 if (ibgp)
206 {
207 u32 local_pref = htonl(BGP_LOCAL_PREF);
208
209 /* LOCAL_PREF */
210 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
211 a.code = BGP_PATH_ATTR_CODE_LOCAL_PREF;
212 a.data.s.len = sizeof(local_pref);
213 memcpy(a.data.s.value, &local_pref, sizeof(local_pref));
214
215 ADD_ATTRIBUTE();
216 }
217
218 /* COMMUNITIES */
219 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL | BGP_PATH_ATTR_FLAG_TRANS;
220 a.code = BGP_PATH_ATTR_CODE_COMMUNITIES;
221 a.data.s.len = sizeof(no_export);
222 memcpy(a.data.s.value, &no_export, sizeof(no_export));
223
224 ADD_ATTRIBUTE();
225
226 if (!(peer->path_attrs = malloc(peer->path_attr_len)))
227 {
228 LOG(0, 0, 0, 0, "Can't allocate path_attrs for %s (%s)\n",
229 name, strerror(errno));
230
231 return 0;
232 }
233
234 memcpy(peer->path_attrs, path_attrs, peer->path_attr_len);
235
236 LOG(4, 0, 0, 0, "Initiating BGP connection to %s (routing %s)\n",
237 name, enable ? "enabled" : "suspended");
238
239 /* we have at least one peer configured */
240 bgp_configured = 1;
241
242 /* connect */
243 return bgp_connect(peer);
244 }
245
246 /* clear counters, timers, routes and buffers; close socket; move to
247 next_state, which may be Disabled or Idle */
248 static void bgp_clear(struct bgp_peer *peer)
249 {
250 if (peer->sock != -1)
251 {
252 close(peer->sock);
253 peer->sock = -1;
254 }
255
256 peer->keepalive_time = 0;
257 peer->expire_time = 0;
258
259 peer->keepalive = peer->init_keepalive;
260 peer->hold = peer->init_hold;
261
262 bgp_free_routes(peer->routes);
263 peer->routes = 0;
264
265 peer->outbuf->packet.header.len = 0;
266 peer->outbuf->done = 0;
267 peer->inbuf->packet.header.len = 0;
268 peer->inbuf->done = 0;
269
270 peer->cli_flag = 0;
271
272 if (peer->state != peer->next_state)
273 {
274 peer->state = peer->next_state;
275 peer->state_time = time_now;
276
277 LOG(4, 0, 0, 0, "BGP peer %s: state %s\n", peer->name,
278 bgp_state_str(peer->next_state));
279 }
280 }
281
282 /* initiate a clean shutdown */
283 void bgp_stop(struct bgp_peer *peer)
284 {
285 LOG(4, 0, 0, 0, "Terminating BGP connection to %s\n", peer->name);
286 bgp_send_notification(peer, BGP_ERR_CEASE, 0);
287 }
288
289 /* drop connection (if any) and set state to Disabled */
290 void bgp_halt(struct bgp_peer *peer)
291 {
292 LOG(4, 0, 0, 0, "Aborting BGP connection to %s\n", peer->name);
293 peer->next_state = Disabled;
294 bgp_clear(peer);
295 }
296
297 /* drop connection (if any) and set to Idle for connection retry */
298 int bgp_restart(struct bgp_peer *peer)
299 {
300 peer->next_state = Idle;
301 bgp_clear(peer);
302
303 /* restart now */
304 peer->retry_time = time_now;
305 peer->retry_count = 0;
306
307 /* connect */
308 return bgp_connect(peer);
309 }
310
311 static void bgp_set_retry(struct bgp_peer *peer)
312 {
313 if (peer->retry_count++ < BGP_MAX_RETRY)
314 {
315 peer->retry_time = time_now + (BGP_RETRY_BACKOFF * peer->retry_count);
316 peer->next_state = Idle;
317 bgp_clear(peer);
318 }
319 else
320 bgp_halt(peer); /* give up */
321 }
322
323 /* convert ip/mask to CIDR notation */
324 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx)
325 {
326 int i;
327 u32 b;
328
329 /* convert to prefix notation */
330 pfx->len = 32;
331 pfx->prefix = ip;
332
333 if (!mask) /* bogus */
334 mask = 0xffffffff;
335
336 for (i = 0; i < 32 && ((b = ntohl(1 << i)), !(mask & b)); i++)
337 {
338 pfx->len--;
339 pfx->prefix &= ~b;
340 }
341 }
342
343 /* insert route into list; sorted */
344 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
345 struct bgp_route_list *new)
346 {
347 struct bgp_route_list *p = head;
348 struct bgp_route_list *e = 0;
349
350 while (p && memcmp(&p->dest, &new->dest, sizeof(p->dest)) < 0)
351 {
352 e = p;
353 p = p->next;
354 }
355
356 if (e)
357 {
358 new->next = e->next;
359 e->next = new;
360 }
361 else
362 {
363 new->next = head;
364 head = new;
365 }
366
367 return head;
368 }
369
370 /* add route to list for peers */
371 /*
372 * Note: this doesn't do route aggregation, nor drop routes if a less
373 * specific match already exists (partly because I'm lazy, but also so
374 * that if that route is later deleted we don't have to be concerned
375 * about adding back the more specific one).
376 */
377 int bgp_add_route(in_addr_t ip, in_addr_t mask)
378 {
379 struct bgp_route_list *r = bgp_routes;
380 struct bgp_route_list add;
381 int i;
382
383 bgp_cidr(ip, mask, &add.dest);
384 add.next = 0;
385
386 /* check for duplicate */
387 while (r)
388 {
389 i = memcmp(&r->dest, &add.dest, sizeof(r->dest));
390 if (!i)
391 return 1; /* already covered */
392
393 if (i > 0)
394 break;
395
396 r = r->next;
397 }
398
399 /* insert into route list; sorted */
400 if (!(r = malloc(sizeof(*r))))
401 {
402 LOG(0, 0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
403 inet_toa(add.dest.prefix), add.dest.len, strerror(errno));
404
405 return 0;
406 }
407
408 memcpy(r, &add, sizeof(*r));
409 bgp_routes = bgp_insert_route(bgp_routes, r);
410
411 /* flag established peers for update */
412 for (i = 0; i < BGP_NUM_PEERS; i++)
413 if (bgp_peers[i].state == Established)
414 bgp_peers[i].update_routes = 1;
415
416 LOG(4, 0, 0, 0, "Registered BGP route %s/%d\n", inet_toa(add.dest.prefix),
417 add.dest.len);
418
419 return 1;
420 }
421
422 /* remove route from list for peers */
423 int bgp_del_route(in_addr_t ip, in_addr_t mask)
424 {
425 struct bgp_route_list *r = bgp_routes;
426 struct bgp_route_list *e = 0;
427 struct bgp_route_list del;
428 int i;
429
430 bgp_cidr(ip, mask, &del.dest);
431 del.next = 0;
432
433 /* find entry in routes list and remove */
434 while (r)
435 {
436 i = memcmp(&r->dest, &del.dest, sizeof(r->dest));
437 if (!i)
438 {
439 if (e)
440 e->next = r->next;
441 else
442 bgp_routes = r->next;
443
444 free(r);
445 break;
446 }
447
448 e = r;
449
450 if (i > 0)
451 r = 0; /* stop */
452 else
453 r = r->next;
454 }
455
456 /* not found */
457 if (!r)
458 return 1;
459
460 /* flag established peers for update */
461 for (i = 0; i < BGP_NUM_PEERS; i++)
462 if (bgp_peers[i].state == Established)
463 bgp_peers[i].update_routes = 1;
464
465 LOG(4, 0, 0, 0, "Removed BGP route %s/%d\n", inet_toa(del.dest.prefix),
466 del.dest.len);
467
468 return 1;
469 }
470
471 /* enable or disable routing */
472 void bgp_enable_routing(int enable)
473 {
474 int i;
475
476 for (i = 0; i < BGP_NUM_PEERS; i++)
477 {
478 bgp_peers[i].routing = enable;
479
480 /* flag established peers for update */
481 if (bgp_peers[i].state == Established)
482 bgp_peers[i].update_routes = 1;
483 }
484
485 LOG(4, 0, 0, 0, "%s BGP routing\n", enable ? "Enabled" : "Suspended");
486 }
487
488 /* return a bitmask indicating if the socket should be added to the
489 read set (1) and or write set (2) for select */
490 int bgp_select_state(struct bgp_peer *peer)
491 {
492 int flags = 0;
493
494 if (!bgp_configured)
495 return 0;
496
497 if (peer->state == Disabled || peer->state == Idle)
498 return 0;
499
500 if (peer->inbuf->done < BGP_MAX_PACKET_SIZE)
501 flags |= 1;
502
503 if (peer->state == Connect || /* connection in progress */
504 peer->update_routes || /* routing updates */
505 peer->outbuf->packet.header.len) /* pending output */
506 flags |= 2;
507
508 return flags;
509 }
510
511 /* process bgp peer */
512 int bgp_process(struct bgp_peer *peer, int readable, int writable)
513 {
514 if (!bgp_configured)
515 return 0;
516
517 if (*peer->name && peer->cli_flag == BGP_CLI_RESTART)
518 return bgp_restart(peer);
519
520 if (peer->state == Disabled)
521 return 1;
522
523 if (peer->cli_flag)
524 {
525 switch (peer->cli_flag)
526 {
527 case BGP_CLI_SUSPEND:
528 if (peer->routing)
529 {
530 peer->routing = 0;
531 if (peer->state == Established)
532 peer->update_routes = 1;
533 }
534
535 break;
536
537 case BGP_CLI_ENABLE:
538 if (!peer->routing)
539 {
540 peer->routing = 1;
541 if (peer->state == Established)
542 peer->update_routes = 1;
543 }
544
545 break;
546 }
547
548 peer->cli_flag = 0;
549 }
550
551 /* handle empty/fill of buffers */
552 if (writable)
553 {
554 int r = 1;
555 if (peer->state == Connect)
556 r = bgp_handle_connect(peer);
557 else if (peer->outbuf->packet.header.len)
558 r = bgp_write(peer);
559
560 if (!r)
561 return 0;
562 }
563
564 if (readable)
565 {
566 if (!bgp_read(peer))
567 return 0;
568 }
569
570 /* process input buffer contents */
571 while (peer->inbuf->done >= sizeof(peer->inbuf->packet.header)
572 && !peer->outbuf->packet.header.len) /* may need to queue a response */
573 {
574 if (bgp_handle_input(peer) < 0)
575 return 0;
576 }
577
578 /* process pending updates */
579 if (peer->update_routes
580 && !peer->outbuf->packet.header.len) /* ditto */
581 {
582 if (!bgp_send_update(peer))
583 return 0;
584 }
585
586 /* process timers */
587 if (peer->state == Established)
588 {
589 if (time_now > peer->expire_time)
590 {
591 LOG(1, 0, 0, 0, "No message from BGP peer %s in %ds\n",
592 peer->name, peer->hold);
593
594 bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0);
595 return 0;
596 }
597
598 if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len)
599 bgp_send_keepalive(peer);
600 }
601 else if (peer->state == Idle)
602 {
603 if (time_now > peer->retry_time)
604 return bgp_connect(peer);
605 }
606 else if (time_now > peer->state_time + BGP_STATE_TIME)
607 {
608 LOG(1, 0, 0, 0, "%s timer expired for BGP peer %s\n",
609 bgp_state_str(peer->state), peer->name);
610
611 return bgp_restart(peer);
612 }
613
614 return 1;
615 }
616
617 static void bgp_free_routes(struct bgp_route_list *routes)
618 {
619 struct bgp_route_list *tmp;
620
621 while ((tmp = routes))
622 {
623 routes = tmp->next;
624 free(tmp);
625 }
626 }
627
628 char const *bgp_state_str(enum bgp_state state)
629 {
630 switch (state)
631 {
632 case Disabled: return "Disabled";
633 case Idle: return "Idle";
634 case Connect: return "Connect";
635 case Active: return "Active";
636 case OpenSent: return "OpenSent";
637 case OpenConfirm: return "OpenConfirm";
638 case Established: return "Established";
639 }
640
641 return "?";
642 }
643
644 static char const *bgp_msg_type_str(u8 type)
645 {
646 switch (type)
647 {
648 case BGP_MSG_OPEN: return "OPEN";
649 case BGP_MSG_UPDATE: return "UPDATE";
650 case BGP_MSG_NOTIFICATION: return "NOTIFICATION";
651 case BGP_MSG_KEEPALIVE: return "KEEPALIVE";
652 }
653
654 return "?";
655 }
656
657 /* attempt to connect to peer */
658 static int bgp_connect(struct bgp_peer *peer)
659 {
660 static int bgp_port = 0;
661 struct sockaddr_in addr;
662
663 if (!bgp_port)
664 {
665 struct servent *serv;
666 if (!(serv = getservbyname("bgp", "tcp")))
667 {
668 LOG(0, 0, 0, 0, "Can't get bgp service (%s)\n", strerror(errno));
669 return 0;
670 }
671
672 bgp_port = serv->s_port;
673 }
674
675 if ((peer->sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
676 {
677 LOG(0, 0, 0, 0, "Can't create a socket for BGP peer %s (%s)\n",
678 peer->name, strerror(errno));
679
680 peer->state = peer->next_state = Disabled;
681 return 0;
682 }
683
684 /* set to non-blocking */
685 fcntl(peer->sock, F_SETFL, fcntl(peer->sock, F_GETFL, 0) | O_NONBLOCK);
686
687 /* try connect */
688 memset(&addr, 0, sizeof(addr));
689 addr.sin_family = AF_INET;
690 addr.sin_port = bgp_port;
691 addr.sin_addr.s_addr = peer->addr;
692
693 while (connect(peer->sock, (struct sockaddr *) &addr, sizeof(addr)) == -1)
694 {
695 if (errno == EINTR) /* SIGALARM handler */
696 continue;
697
698 if (errno != EINPROGRESS)
699 {
700 LOG(1, 0, 0, 0, "Can't connect to BGP peer %s (%s)\n",
701 inet_ntoa(addr.sin_addr), strerror(errno));
702
703 bgp_set_retry(peer);
704 return 0;
705 }
706
707 peer->state = Connect;
708 peer->state_time = time_now;
709
710 LOG(4, 0, 0, 0, "BGP peer %s: state Connect\n", peer->name);
711 return 1;
712 }
713
714 peer->state = Active;
715 peer->state_time = time_now;
716 peer->retry_time = peer->retry_count = 0;
717
718 LOG(4, 0, 0, 0, "BGP peer %s: state Active\n", inet_ntoa(addr.sin_addr));
719
720 return bgp_send_open(peer);
721 }
722
723 /* complete partial connection (state = Connect) */
724 static int bgp_handle_connect(struct bgp_peer *peer)
725 {
726 int err = 0;
727 int len = sizeof(int);
728 getsockopt(peer->sock, SOL_SOCKET, SO_ERROR, &err, &len);
729 if (err)
730 {
731 LOG(1, 0, 0, 0, "Can't connect to BGP peer %s (%s)\n", peer->name,
732 strerror(err));
733
734 bgp_set_retry(peer);
735 return 0;
736 }
737
738 peer->state = Active;
739 peer->state_time = time_now;
740
741 LOG(4, 0, 0, 0, "BGP peer %s: state Active\n", peer->name);
742
743 return bgp_send_open(peer);
744 }
745
746 /* initiate a write */
747 static int bgp_write(struct bgp_peer *peer)
748 {
749 int len = htons(peer->outbuf->packet.header.len);
750 int r;
751
752 while ((r = write(peer->sock, &peer->outbuf->packet + peer->outbuf->done,
753 len - peer->outbuf->done)) == -1)
754 {
755 if (errno == EINTR)
756 continue;
757
758 if (errno == EAGAIN)
759 return 1;
760
761 if (errno == EPIPE)
762 LOG(1, 0, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
763 else
764 LOG(1, 0, 0, 0, "Can't write to BGP peer %s (%s)\n", peer->name,
765 strerror(errno));
766
767 bgp_set_retry(peer);
768 return 0;
769 }
770
771 if (r < len)
772 {
773 peer->outbuf->done += r;
774 return 1;
775 }
776
777 LOG(4, 0, 0, 0, "Sent %s to BGP peer %s\n",
778 bgp_msg_type_str(peer->outbuf->packet.header.type), peer->name);
779
780 peer->outbuf->packet.header.len = 0;
781 peer->outbuf->done = 0;
782
783 if (peer->state == Established)
784 peer->keepalive_time = time_now + peer->keepalive;
785
786 if (peer->state != peer->next_state)
787 {
788 if (peer->next_state == Disabled || peer->next_state == Idle)
789 {
790 bgp_clear(peer);
791 return 0;
792 }
793
794 peer->state = peer->next_state;
795 peer->state_time = time_now;
796
797 LOG(4, 0, 0, 0, "BGP peer %s: state %s\n", peer->name,
798 bgp_state_str(peer->state));
799 }
800
801 return 1;
802 }
803
804 /* initiate a read */
805 static int bgp_read(struct bgp_peer *peer)
806 {
807 int r;
808
809 while ((r = read(peer->sock, &peer->inbuf->packet + peer->inbuf->done,
810 BGP_MAX_PACKET_SIZE - peer->inbuf->done)) < 1)
811 {
812 if (!r)
813 {
814 LOG(1, 0, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
815 }
816 else
817 {
818 if (errno == EINTR)
819 continue;
820
821 if (errno == EAGAIN)
822 return 1;
823
824 LOG(1, 0, 0, 0, "Can't read from BGP peer %s (%s)\n", peer->name,
825 strerror(errno));
826 }
827
828 bgp_set_retry(peer);
829 return 0;
830 }
831
832 peer->inbuf->done += r;
833 return 1;
834 }
835
836 /* process buffered packets */
837 static int bgp_handle_input(struct bgp_peer *peer)
838 {
839 struct bgp_packet *p = &peer->inbuf->packet;
840 int len = ntohs(p->header.len);
841
842 if (len > BGP_MAX_PACKET_SIZE)
843 {
844 LOG(1, 0, 0, 0, "Bad header length from BGP %s\n", peer->name);
845 bgp_send_notification(peer, BGP_ERR_HEADER, BGP_ERR_HDR_BAD_LEN);
846 return 0;
847 }
848
849 if (peer->inbuf->done < len)
850 return 0;
851
852 LOG(4, 0, 0, 0, "Received %s from BGP peer %s\n",
853 bgp_msg_type_str(p->header.type), peer->name);
854
855 switch (p->header.type)
856 {
857 case BGP_MSG_OPEN:
858 {
859 struct bgp_data_open data;
860 int hold;
861 int i;
862
863 for (i = 0; i < sizeof(p->header.marker); i++)
864 {
865 if ((unsigned char) p->header.marker[i] != 0xff)
866 {
867 LOG(1, 0, 0, 0, "Invalid marker from BGP peer %s\n",
868 peer->name);
869
870 bgp_send_notification(peer, BGP_ERR_HEADER,
871 BGP_ERR_HDR_NOT_SYNC);
872
873 return 0;
874 }
875 }
876
877 if (peer->state != OpenSent)
878 {
879 LOG(1, 0, 0, 0, "OPEN from BGP peer %s in %s state\n",
880 peer->name, bgp_state_str(peer->state));
881
882 bgp_send_notification(peer, BGP_ERR_FSM, 0);
883 return 0;
884 }
885
886 memcpy(&data, p->data, len - sizeof(p->header));
887
888 if (data.version != BGP_VERSION)
889 {
890 LOG(1, 0, 0, 0, "Bad version (%d) sent by BGP peer %s\n",
891 (int) data.version, peer->name);
892
893 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_VERSION);
894 return 0;
895 }
896
897 if (ntohs(data.as) != peer->as)
898 {
899 LOG(1, 0, 0, 0, "Bad AS sent by BGP peer %s (got %d, "
900 "expected %d)\n", peer->name, (int) htons(data.as),
901 (int) peer->as);
902
903 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_BAD_AS);
904 return 0;
905 }
906
907 if ((hold = ntohs(data.hold_time)) < 3)
908 {
909 LOG(1, 0, 0, 0, "Bad hold time (%d) from BGP peer %s\n",
910 hold, peer->name);
911
912 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_HOLD_TIME);
913 return 0;
914 }
915
916 /* pick lowest hold time */
917 if (hold < peer->hold)
918 peer->hold = hold;
919
920 /* adjust our keepalive based on negotiated hold value */
921 if (peer->keepalive * 3 > peer->hold)
922 peer->keepalive = peer->hold / 3;
923
924 /* next transition requires an exchange of keepalives */
925 bgp_send_keepalive(peer);
926
927 /* FIXME: may need to check for optional params */
928 }
929
930 break;
931
932 case BGP_MSG_KEEPALIVE:
933 if (peer->state == OpenConfirm)
934 {
935 peer->state = peer->next_state = Established;
936 peer->state_time = time_now;
937 peer->keepalive_time = time_now + peer->keepalive;
938 peer->update_routes = 1;
939 peer->retry_count = 0;
940 peer->retry_time = 0;
941
942 LOG(4, 0, 0, 0, "BGP peer %s: state Established\n", peer->name);
943 }
944
945 break;
946
947 case BGP_MSG_NOTIFICATION:
948 if (len > sizeof(p->header))
949 {
950 struct bgp_data_notification *notification =
951 (struct bgp_data_notification *) p->data;
952
953 if (notification->error_code == BGP_ERR_CEASE)
954 {
955 LOG(4, 0, 0, 0, "BGP peer %s sent CEASE\n", peer->name);
956 bgp_halt(peer);
957 return 0;
958 }
959
960 /* FIXME: should handle more notifications */
961 LOG(4, 0, 0, 0, "BGP peer %s sent unhandled NOTIFICATION %d\n",
962 peer->name, (int) notification->error_code);
963 }
964
965 break;
966 }
967
968 /* reset timer */
969 peer->expire_time = time_now + peer->hold;
970
971 /* see if there's another message in the same packet/buffer */
972 if (peer->inbuf->done > len)
973 {
974 peer->inbuf->done -= len;
975 memmove(p, (char *) p + len, peer->inbuf->done);
976 }
977 else
978 {
979 peer->inbuf->packet.header.len = 0;
980 peer->inbuf->done = 0;
981 }
982
983 return peer->inbuf->done;
984 }
985
986 /* send/buffer OPEN message */
987 static int bgp_send_open(struct bgp_peer *peer)
988 {
989 struct bgp_data_open data;
990 u16 len = sizeof(peer->outbuf->packet.header);
991
992 memset(peer->outbuf->packet.header.marker, 0xff,
993 sizeof(peer->outbuf->packet.header.marker));
994
995 peer->outbuf->packet.header.type = BGP_MSG_OPEN;
996
997 data.version = BGP_VERSION;
998 data.as = htons(our_as);
999 data.hold_time = htons(peer->hold);
1000 data.identifier = my_address;
1001 data.opt_len = 0;
1002
1003 memcpy(peer->outbuf->packet.data, &data, BGP_DATA_OPEN_SIZE);
1004 len += BGP_DATA_OPEN_SIZE;
1005
1006 peer->outbuf->packet.header.len = htons(len);
1007 peer->outbuf->done = 0;
1008 peer->next_state = OpenSent;
1009
1010 return bgp_write(peer);
1011 }
1012
1013 /* send/buffer KEEPALIVE message */
1014 static int bgp_send_keepalive(struct bgp_peer *peer)
1015 {
1016 memset(peer->outbuf->packet.header.marker, 0xff,
1017 sizeof(peer->outbuf->packet.header.marker));
1018
1019 peer->outbuf->packet.header.type = BGP_MSG_KEEPALIVE;
1020 peer->outbuf->packet.header.len =
1021 htons(sizeof(peer->outbuf->packet.header));
1022
1023 peer->outbuf->done = 0;
1024 peer->next_state = (peer->state == OpenSent) ? OpenConfirm : peer->state;
1025
1026 return bgp_write(peer);
1027 }
1028
1029 /* send/buffer UPDATE message */
1030 static int bgp_send_update(struct bgp_peer *peer)
1031 {
1032 u16 unf_len = 0;
1033 u16 attr_len;
1034 u16 len = sizeof(peer->outbuf->packet.header);
1035 struct bgp_route_list *have = peer->routes;
1036 struct bgp_route_list *want = peer->routing ? bgp_routes : 0;
1037 struct bgp_route_list *e = 0;
1038 struct bgp_route_list *add = 0;
1039 int s;
1040
1041 char *data = (char *) &peer->outbuf->packet.data;
1042
1043 /* need leave room for attr_len, bgp_path_attrs and one prefix */
1044 char *max = (char *) &peer->outbuf->packet.data
1045 + sizeof(peer->outbuf->packet.data)
1046 - sizeof(attr_len) - peer->path_attr_len - sizeof(struct bgp_ip_prefix);
1047
1048 /* skip over unf_len */
1049 data += sizeof(unf_len);
1050 len += sizeof(unf_len);
1051
1052 memset(peer->outbuf->packet.header.marker, 0xff,
1053 sizeof(peer->outbuf->packet.header.marker));
1054
1055 peer->outbuf->packet.header.type = BGP_MSG_UPDATE;
1056
1057 peer->update_routes = 0; /* tentatively clear */
1058
1059 /* find differences */
1060 while ((have || want) && data < (max - sizeof(struct bgp_ip_prefix)))
1061 {
1062 if (have)
1063 s = want
1064 ? memcmp(&have->dest, &want->dest, sizeof(have->dest))
1065 : -1;
1066 else
1067 s = 1;
1068
1069 if (s < 0) /* found one to delete */
1070 {
1071 struct bgp_route_list *tmp = have;
1072 have = have->next;
1073
1074 s = BGP_IP_PREFIX_SIZE(tmp->dest);
1075 memcpy(data, &tmp->dest, s);
1076 data += s;
1077 unf_len += s;
1078 len += s;
1079
1080 LOG(5, 0, 0, 0, "Withdrawing route %s/%d from BGP peer %s\n",
1081 inet_toa(tmp->dest.prefix), tmp->dest.len, peer->name);
1082
1083 free(tmp);
1084
1085 if (e)
1086 e->next = have;
1087 else
1088 peer->routes = have;
1089 }
1090 else
1091 {
1092 if (!s) /* same */
1093 {
1094 e = have; /* stash the last found to relink above */
1095 have = have->next;
1096 want = want->next;
1097 }
1098 else if (s > 0) /* addition reqd. */
1099 {
1100 if (add)
1101 {
1102 peer->update_routes = 1; /* only one add per packet */
1103 if (!have)
1104 break;
1105 }
1106 else
1107 add = want;
1108
1109 if (want)
1110 want = want->next;
1111 }
1112 }
1113 }
1114
1115 if (have || want)
1116 peer->update_routes = 1; /* more to do */
1117
1118 /* anything changed? */
1119 if (!(unf_len || add))
1120 return 1;
1121
1122 /* go back and insert unf_len */
1123 unf_len = htons(unf_len);
1124 memcpy(&peer->outbuf->packet.data, &unf_len, sizeof(unf_len));
1125
1126 if (add)
1127 {
1128 if (!(e = malloc(sizeof(*e))))
1129 {
1130 LOG(0, 0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
1131 inet_toa(add->dest.prefix), add->dest.len, strerror(errno));
1132
1133 return 0;
1134 }
1135
1136 memcpy(e, add, sizeof(*e));
1137 e->next = 0;
1138 peer->routes = bgp_insert_route(peer->routes, e);
1139
1140 attr_len = htons(peer->path_attr_len);
1141 memcpy(data, &attr_len, sizeof(attr_len));
1142 data += sizeof(attr_len);
1143 len += sizeof(attr_len);
1144
1145 memcpy(data, peer->path_attrs, peer->path_attr_len);
1146 data += peer->path_attr_len;
1147 len += peer->path_attr_len;
1148
1149 s = BGP_IP_PREFIX_SIZE(add->dest);
1150 memcpy(data, &add->dest, s);
1151 data += s;
1152 len += s;
1153
1154 LOG(5, 0, 0, 0, "Advertising route %s/%d to BGP peer %s\n",
1155 inet_toa(add->dest.prefix), add->dest.len, peer->name);
1156 }
1157 else
1158 {
1159 attr_len = 0;
1160 memcpy(data, &attr_len, sizeof(attr_len));
1161 data += sizeof(attr_len);
1162 len += sizeof(attr_len);
1163 }
1164
1165 peer->outbuf->packet.header.len = htons(len);
1166 peer->outbuf->done = 0;
1167
1168 return bgp_write(peer);
1169 }
1170
1171 /* send/buffer NOTIFICATION message */
1172 static int bgp_send_notification(struct bgp_peer *peer, u8 code, u8 subcode)
1173 {
1174 struct bgp_data_notification data;
1175 u16 len = 0;
1176
1177 data.error_code = code;
1178 len += sizeof(data.error_code);
1179
1180 data.error_subcode = subcode;
1181 len += sizeof(data.error_code);
1182
1183 memset(peer->outbuf->packet.header.marker, 0xff,
1184 sizeof(peer->outbuf->packet.header.marker));
1185
1186 peer->outbuf->packet.header.type = BGP_MSG_NOTIFICATION;
1187 peer->outbuf->packet.header.len =
1188 htons(sizeof(peer->outbuf->packet.header) + len);
1189
1190 memcpy(peer->outbuf->packet.data, &data, len);
1191
1192 peer->outbuf->done = 0;
1193 peer->next_state = code == BGP_ERR_CEASE ? Disabled : Idle;
1194
1195 /* we're dying; ignore any pending input */
1196 peer->inbuf->packet.header.len = 0;
1197 peer->inbuf->done = 0;
1198
1199 return bgp_write(peer);
1200 }