restart BGP on receipt of CEASE
[l2tpns.git] / bgp.c
1 /*
2 * BGPv4
3 * Used to advertise routes for upstream (l2tp port, rather than gratiutious
4 * arp) and downstream--allowing routers to load-balance both.
5 *
6 * Implementation limitations:
7 * - We never listen for incoming connections (session always initiated by us).
8 * - Any routes advertised by the peer are accepted, but ignored.
9 * - No password support; neither RFC1771 (which no-one seems to do anyway)
10 * nor RFC2385 (which requires a kernel patch on 2.4 kernels).
11 */
12
13 char const *cvs_id_bgp = "$Id: bgp.c,v 1.12 2005/09/02 23:39:36 bodea Exp $";
14
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <string.h>
18 #include <time.h>
19 #include <errno.h>
20 #include <sys/socket.h>
21 #include <netinet/in.h>
22 #include <arpa/inet.h>
23 #include <netdb.h>
24 #include <fcntl.h>
25
26 #include "l2tpns.h"
27 #include "bgp.h"
28 #include "util.h"
29
30 static void bgp_clear(struct bgp_peer *peer);
31 static void bgp_set_retry(struct bgp_peer *peer);
32 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx);
33 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
34 struct bgp_route_list *new);
35
36 static void bgp_free_routes(struct bgp_route_list *routes);
37 static char const *bgp_msg_type_str(uint8_t type);
38 static int bgp_connect(struct bgp_peer *peer);
39 static int bgp_handle_connect(struct bgp_peer *peer);
40 static int bgp_write(struct bgp_peer *peer);
41 static int bgp_read(struct bgp_peer *peer);
42 static int bgp_handle_input(struct bgp_peer *peer);
43 static int bgp_send_open(struct bgp_peer *peer);
44 static int bgp_send_keepalive(struct bgp_peer *peer);
45 static int bgp_send_update(struct bgp_peer *peer);
46 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
47 uint8_t subcode);
48
49 static uint16_t our_as;
50 static struct bgp_route_list *bgp_routes = 0;
51
52 int bgp_configured = 0;
53 struct bgp_peer *bgp_peers = 0;
54
55 /* prepare peer structure, globals */
56 int bgp_setup(int as)
57 {
58 int i;
59 struct bgp_peer *peer;
60
61 for (i = 0; i < BGP_NUM_PEERS; i++)
62 {
63 peer = &bgp_peers[i];
64 memset(peer, 0, sizeof(*peer));
65
66 peer->addr = INADDR_NONE;
67 peer->sock = -1;
68 peer->state = peer->next_state = Disabled;
69
70 if (!((peer->outbuf = malloc(sizeof(*peer->outbuf)))
71 && (peer->inbuf = malloc(sizeof(*peer->inbuf)))))
72 {
73 LOG(0, 0, 0, "Can't allocate buffers for bgp peer (%s)\n",
74 strerror(errno));
75
76 return 0;
77 }
78
79 peer->edata.type = FD_TYPE_BGP;
80 peer->edata.index = i;
81 peer->events = 0;
82 }
83
84 if (as < 1)
85 as = 0;
86
87 if ((our_as = as))
88 return 0;
89
90 bgp_routes = 0;
91 bgp_configured = 0; /* set by bgp_start */
92
93 return 1;
94 }
95
96 /* start connection with a peer */
97 int bgp_start(struct bgp_peer *peer, char *name, int as, int keepalive,
98 int hold, int enable)
99 {
100 struct hostent *h;
101 int ibgp;
102 int i;
103 struct bgp_path_attr a;
104 char path_attrs[64];
105 char *p = path_attrs;
106 in_addr_t ip;
107 uint32_t metric = htonl(BGP_METRIC);
108 uint32_t no_export = htonl(BGP_COMMUNITY_NO_EXPORT);
109
110 if (!our_as)
111 return 0;
112
113 if (peer->state != Disabled)
114 bgp_halt(peer);
115
116 snprintf(peer->name, sizeof(peer->name), "%s", name);
117
118 if (!(h = gethostbyname(name)) || h->h_addrtype != AF_INET)
119 {
120 LOG(0, 0, 0, "Can't get address for BGP peer %s (%s)\n",
121 name, h ? "no address" : hstrerror(h_errno));
122
123 return 0;
124 }
125
126 memcpy(&peer->addr, h->h_addr, sizeof(peer->addr));
127 peer->as = as > 0 ? as : our_as;
128 ibgp = peer->as == our_as;
129
130 /* set initial timer values */
131 peer->init_keepalive = keepalive == -1 ? BGP_KEEPALIVE_TIME : keepalive;
132 peer->init_hold = hold == -1 ? BGP_HOLD_TIME : hold;
133
134 if (peer->init_hold < 3)
135 peer->init_hold = 3;
136
137 if (peer->init_keepalive * 3 > peer->init_hold)
138 peer->init_keepalive = peer->init_hold / 3;
139
140 /* clear buffers, go to Idle state */
141 peer->next_state = Idle;
142 bgp_clear(peer);
143
144 /* set initial routing state */
145 peer->routing = enable;
146
147 /* all our routes use the same attributes, so prepare it in advance */
148 if (peer->path_attrs)
149 free(peer->path_attrs);
150
151 peer->path_attr_len = 0;
152
153 /* ORIGIN */
154 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
155 a.code = BGP_PATH_ATTR_CODE_ORIGIN;
156 a.data.s.len = 1;
157 a.data.s.value[0] = BGP_PATH_ATTR_CODE_ORIGIN_IGP;
158
159 #define ADD_ATTRIBUTE() do { \
160 i = BGP_PATH_ATTR_SIZE(a); \
161 memcpy(p, &a, i); \
162 p += i; \
163 peer->path_attr_len += i; } while (0)
164
165 ADD_ATTRIBUTE();
166
167 /* AS_PATH */
168 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
169 a.code = BGP_PATH_ATTR_CODE_AS_PATH;
170 if (ibgp)
171 {
172 /* empty path */
173 a.data.s.len = 0;
174 }
175 else
176 {
177 /* just our AS */
178 struct {
179 uint8_t type;
180 uint8_t len;
181 uint16_t value;
182 } as_path = {
183 BGP_PATH_ATTR_CODE_AS_PATH_AS_SEQUENCE,
184 1,
185 htons(our_as),
186 };
187
188 a.data.s.len = sizeof(as_path);
189 memcpy(&a.data.s.value, &as_path, sizeof(as_path));
190 }
191
192 ADD_ATTRIBUTE();
193
194 /* NEXT_HOP */
195 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
196 a.code = BGP_PATH_ATTR_CODE_NEXT_HOP;
197 ip = my_address; /* we're it */
198 a.data.s.len = sizeof(ip);
199 memcpy(a.data.s.value, &ip, sizeof(ip));
200
201 ADD_ATTRIBUTE();
202
203 /* MULTI_EXIT_DISC */
204 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL;
205 a.code = BGP_PATH_ATTR_CODE_MULTI_EXIT_DISC;
206 a.data.s.len = sizeof(metric);
207 memcpy(a.data.s.value, &metric, sizeof(metric));
208
209 ADD_ATTRIBUTE();
210
211 if (ibgp)
212 {
213 uint32_t local_pref = htonl(BGP_LOCAL_PREF);
214
215 /* LOCAL_PREF */
216 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
217 a.code = BGP_PATH_ATTR_CODE_LOCAL_PREF;
218 a.data.s.len = sizeof(local_pref);
219 memcpy(a.data.s.value, &local_pref, sizeof(local_pref));
220
221 ADD_ATTRIBUTE();
222 }
223
224 /* COMMUNITIES */
225 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL | BGP_PATH_ATTR_FLAG_TRANS;
226 a.code = BGP_PATH_ATTR_CODE_COMMUNITIES;
227 a.data.s.len = sizeof(no_export);
228 memcpy(a.data.s.value, &no_export, sizeof(no_export));
229
230 ADD_ATTRIBUTE();
231
232 if (!(peer->path_attrs = malloc(peer->path_attr_len)))
233 {
234 LOG(0, 0, 0, "Can't allocate path_attrs for %s (%s)\n",
235 name, strerror(errno));
236
237 return 0;
238 }
239
240 memcpy(peer->path_attrs, path_attrs, peer->path_attr_len);
241
242 LOG(4, 0, 0, "Initiating BGP connection to %s (routing %s)\n",
243 name, enable ? "enabled" : "suspended");
244
245 /* we have at least one peer configured */
246 bgp_configured = 1;
247
248 /* connect */
249 return bgp_connect(peer);
250 }
251
252 /* clear counters, timers, routes and buffers; close socket; move to
253 next_state, which may be Disabled or Idle */
254 static void bgp_clear(struct bgp_peer *peer)
255 {
256 if (peer->sock != -1)
257 {
258 close(peer->sock);
259 peer->sock = -1;
260 }
261
262 peer->keepalive_time = 0;
263 peer->expire_time = 0;
264
265 peer->keepalive = peer->init_keepalive;
266 peer->hold = peer->init_hold;
267
268 bgp_free_routes(peer->routes);
269 peer->routes = 0;
270
271 peer->outbuf->packet.header.len = 0;
272 peer->outbuf->done = 0;
273 peer->inbuf->packet.header.len = 0;
274 peer->inbuf->done = 0;
275
276 peer->cli_flag = 0;
277 peer->events = 0;
278
279 if (peer->state != peer->next_state)
280 {
281 peer->state = peer->next_state;
282 peer->state_time = time_now;
283
284 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
285 bgp_state_str(peer->next_state));
286 }
287 }
288
289 /* initiate a clean shutdown */
290 void bgp_stop(struct bgp_peer *peer)
291 {
292 LOG(4, 0, 0, "Terminating BGP connection to %s\n", peer->name);
293 bgp_send_notification(peer, BGP_ERR_CEASE, 0);
294 }
295
296 /* drop connection (if any) and set state to Disabled */
297 void bgp_halt(struct bgp_peer *peer)
298 {
299 LOG(4, 0, 0, "Aborting BGP connection to %s\n", peer->name);
300 peer->next_state = Disabled;
301 bgp_clear(peer);
302 }
303
304 /* drop connection (if any) and set to Idle for connection retry */
305 int bgp_restart(struct bgp_peer *peer)
306 {
307 peer->next_state = Idle;
308 bgp_clear(peer);
309
310 /* restart now */
311 peer->retry_time = time_now;
312 peer->retry_count = 0;
313
314 /* connect */
315 return bgp_connect(peer);
316 }
317
318 static void bgp_set_retry(struct bgp_peer *peer)
319 {
320 if (peer->retry_count++ < BGP_MAX_RETRY)
321 {
322 peer->retry_time = time_now + (BGP_RETRY_BACKOFF * peer->retry_count);
323 peer->next_state = Idle;
324 bgp_clear(peer);
325 }
326 else
327 bgp_halt(peer); /* give up */
328 }
329
330 /* convert ip/mask to CIDR notation */
331 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx)
332 {
333 int i;
334 uint32_t b;
335
336 /* convert to prefix notation */
337 pfx->len = 32;
338 pfx->prefix = ip;
339
340 if (!mask) /* bogus */
341 mask = 0xffffffff;
342
343 for (i = 0; i < 32 && ((b = ntohl(1 << i)), !(mask & b)); i++)
344 {
345 pfx->len--;
346 pfx->prefix &= ~b;
347 }
348 }
349
350 /* insert route into list; sorted */
351 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
352 struct bgp_route_list *new)
353 {
354 struct bgp_route_list *p = head;
355 struct bgp_route_list *e = 0;
356
357 while (p && memcmp(&p->dest, &new->dest, sizeof(p->dest)) < 0)
358 {
359 e = p;
360 p = p->next;
361 }
362
363 if (e)
364 {
365 new->next = e->next;
366 e->next = new;
367 }
368 else
369 {
370 new->next = head;
371 head = new;
372 }
373
374 return head;
375 }
376
377 /* add route to list for peers */
378 /*
379 * Note: this doesn't do route aggregation, nor drop routes if a less
380 * specific match already exists (partly because I'm lazy, but also so
381 * that if that route is later deleted we don't have to be concerned
382 * about adding back the more specific one).
383 */
384 int bgp_add_route(in_addr_t ip, in_addr_t mask)
385 {
386 struct bgp_route_list *r = bgp_routes;
387 struct bgp_route_list add;
388 int i;
389
390 bgp_cidr(ip, mask, &add.dest);
391 add.next = 0;
392
393 /* check for duplicate */
394 while (r)
395 {
396 i = memcmp(&r->dest, &add.dest, sizeof(r->dest));
397 if (!i)
398 return 1; /* already covered */
399
400 if (i > 0)
401 break;
402
403 r = r->next;
404 }
405
406 /* insert into route list; sorted */
407 if (!(r = malloc(sizeof(*r))))
408 {
409 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
410 fmtaddr(add.dest.prefix, 0), add.dest.len, strerror(errno));
411
412 return 0;
413 }
414
415 memcpy(r, &add, sizeof(*r));
416 bgp_routes = bgp_insert_route(bgp_routes, r);
417
418 /* flag established peers for update */
419 for (i = 0; i < BGP_NUM_PEERS; i++)
420 if (bgp_peers[i].state == Established)
421 bgp_peers[i].update_routes = 1;
422
423 LOG(4, 0, 0, "Registered BGP route %s/%d\n",
424 fmtaddr(add.dest.prefix, 0), add.dest.len);
425
426 return 1;
427 }
428
429 /* remove route from list for peers */
430 int bgp_del_route(in_addr_t ip, in_addr_t mask)
431 {
432 struct bgp_route_list *r = bgp_routes;
433 struct bgp_route_list *e = 0;
434 struct bgp_route_list del;
435 int i;
436
437 bgp_cidr(ip, mask, &del.dest);
438 del.next = 0;
439
440 /* find entry in routes list and remove */
441 while (r)
442 {
443 i = memcmp(&r->dest, &del.dest, sizeof(r->dest));
444 if (!i)
445 {
446 if (e)
447 e->next = r->next;
448 else
449 bgp_routes = r->next;
450
451 free(r);
452 break;
453 }
454
455 e = r;
456
457 if (i > 0)
458 r = 0; /* stop */
459 else
460 r = r->next;
461 }
462
463 /* not found */
464 if (!r)
465 return 1;
466
467 /* flag established peers for update */
468 for (i = 0; i < BGP_NUM_PEERS; i++)
469 if (bgp_peers[i].state == Established)
470 bgp_peers[i].update_routes = 1;
471
472 LOG(4, 0, 0, "Removed BGP route %s/%d\n",
473 fmtaddr(del.dest.prefix, 0), del.dest.len);
474
475 return 1;
476 }
477
478 /* enable or disable routing */
479 void bgp_enable_routing(int enable)
480 {
481 int i;
482
483 for (i = 0; i < BGP_NUM_PEERS; i++)
484 {
485 bgp_peers[i].routing = enable;
486
487 /* flag established peers for update */
488 if (bgp_peers[i].state == Established)
489 bgp_peers[i].update_routes = 1;
490 }
491
492 LOG(4, 0, 0, "%s BGP routing\n", enable ? "Enabled" : "Suspended");
493 }
494
495 #ifdef HAVE_EPOLL
496 # include <sys/epoll.h>
497 #else
498 # include "fake_epoll.h"
499 #endif
500
501 /* return a bitmask of the events required to poll this peer's fd */
502 int bgp_set_poll()
503 {
504 int i;
505
506 if (!bgp_configured)
507 return 0;
508
509 for (i = 0; i < BGP_NUM_PEERS; i++)
510 {
511 struct bgp_peer *peer = &bgp_peers[i];
512 int events = 0;
513
514 if (peer->state == Disabled || peer->state == Idle)
515 continue;
516
517 if (peer->inbuf->done < BGP_MAX_PACKET_SIZE)
518 events |= EPOLLIN;
519
520 if (peer->state == Connect || /* connection in progress */
521 peer->update_routes || /* routing updates */
522 peer->outbuf->packet.header.len) /* pending output */
523 events |= EPOLLOUT;
524
525 if (peer->events != events)
526 {
527 struct epoll_event ev;
528
529 ev.events = peer->events = events;
530 ev.data.ptr = &peer->edata;
531 epoll_ctl(epollfd, EPOLL_CTL_MOD, peer->sock, &ev);
532 }
533 }
534
535 return 1;
536 }
537
538 /* process bgp events/timers */
539 int bgp_process(uint32_t events[])
540 {
541 int i;
542
543 if (!bgp_configured)
544 return 0;
545
546 for (i = 0; i < BGP_NUM_PEERS; i++)
547 {
548 struct bgp_peer *peer = &bgp_peers[i];
549
550 if (*peer->name && peer->cli_flag == BGP_CLI_RESTART)
551 {
552 bgp_restart(peer);
553 continue;
554 }
555
556 if (peer->state == Disabled)
557 continue;
558
559 if (peer->cli_flag)
560 {
561 switch (peer->cli_flag)
562 {
563 case BGP_CLI_SUSPEND:
564 if (peer->routing)
565 {
566 peer->routing = 0;
567 if (peer->state == Established)
568 peer->update_routes = 1;
569 }
570
571 break;
572
573 case BGP_CLI_ENABLE:
574 if (!peer->routing)
575 {
576 peer->routing = 1;
577 if (peer->state == Established)
578 peer->update_routes = 1;
579 }
580
581 break;
582 }
583
584 peer->cli_flag = 0;
585 }
586
587 /* handle empty/fill of buffers */
588 if (events[i] & EPOLLOUT)
589 {
590 int r = 1;
591 if (peer->state == Connect)
592 r = bgp_handle_connect(peer);
593 else if (peer->outbuf->packet.header.len)
594 r = bgp_write(peer);
595
596 if (!r)
597 continue;
598 }
599
600 if (events[i] & (EPOLLIN|EPOLLHUP))
601 {
602 if (!bgp_read(peer))
603 continue;
604 }
605
606 /* process input buffer contents */
607 while (peer->inbuf->done >= sizeof(peer->inbuf->packet.header)
608 && !peer->outbuf->packet.header.len) /* may need to queue a response */
609 {
610 if (bgp_handle_input(peer) < 0)
611 continue;
612 }
613
614 /* process pending updates */
615 if (peer->update_routes
616 && !peer->outbuf->packet.header.len) /* ditto */
617 {
618 if (!bgp_send_update(peer))
619 continue;
620 }
621
622 /* process timers */
623 if (peer->state == Established)
624 {
625 if (time_now > peer->expire_time)
626 {
627 LOG(1, 0, 0, "No message from BGP peer %s in %ds\n",
628 peer->name, peer->hold);
629
630 bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0);
631 continue;
632 }
633
634 if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len)
635 bgp_send_keepalive(peer);
636 }
637 else if (peer->state == Idle)
638 {
639 if (time_now > peer->retry_time)
640 bgp_connect(peer);
641 }
642 else if (time_now > peer->state_time + BGP_STATE_TIME)
643 {
644 LOG(1, 0, 0, "%s timer expired for BGP peer %s\n",
645 bgp_state_str(peer->state), peer->name);
646
647 bgp_restart(peer);
648 }
649 }
650
651 return 1;
652 }
653
654 static void bgp_free_routes(struct bgp_route_list *routes)
655 {
656 struct bgp_route_list *tmp;
657
658 while ((tmp = routes))
659 {
660 routes = tmp->next;
661 free(tmp);
662 }
663 }
664
665 char const *bgp_state_str(enum bgp_state state)
666 {
667 switch (state)
668 {
669 case Disabled: return "Disabled";
670 case Idle: return "Idle";
671 case Connect: return "Connect";
672 case Active: return "Active";
673 case OpenSent: return "OpenSent";
674 case OpenConfirm: return "OpenConfirm";
675 case Established: return "Established";
676 }
677
678 return "?";
679 }
680
681 static char const *bgp_msg_type_str(uint8_t type)
682 {
683 switch (type)
684 {
685 case BGP_MSG_OPEN: return "OPEN";
686 case BGP_MSG_UPDATE: return "UPDATE";
687 case BGP_MSG_NOTIFICATION: return "NOTIFICATION";
688 case BGP_MSG_KEEPALIVE: return "KEEPALIVE";
689 }
690
691 return "?";
692 }
693
694 /* attempt to connect to peer */
695 static int bgp_connect(struct bgp_peer *peer)
696 {
697 static int bgp_port = 0;
698 struct sockaddr_in addr;
699 struct epoll_event ev;
700
701 if (!bgp_port)
702 {
703 struct servent *serv;
704 if (!(serv = getservbyname("bgp", "tcp")))
705 {
706 LOG(0, 0, 0, "Can't get bgp service (%s)\n", strerror(errno));
707 return 0;
708 }
709
710 bgp_port = serv->s_port;
711 }
712
713 if ((peer->sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
714 {
715 LOG(0, 0, 0, "Can't create a socket for BGP peer %s (%s)\n",
716 peer->name, strerror(errno));
717
718 peer->state = peer->next_state = Disabled;
719 return 0;
720 }
721
722 /* add to poll set */
723 ev.events = peer->events = EPOLLOUT;
724 ev.data.ptr = &peer->edata;
725 epoll_ctl(epollfd, EPOLL_CTL_ADD, peer->sock, &ev);
726
727 /* set to non-blocking */
728 fcntl(peer->sock, F_SETFL, fcntl(peer->sock, F_GETFL, 0) | O_NONBLOCK);
729
730 /* try connect */
731 memset(&addr, 0, sizeof(addr));
732 addr.sin_family = AF_INET;
733 addr.sin_port = bgp_port;
734 addr.sin_addr.s_addr = peer->addr;
735
736 while (connect(peer->sock, (struct sockaddr *) &addr, sizeof(addr)) == -1)
737 {
738 if (errno == EINTR) /* SIGALARM handler */
739 continue;
740
741 if (errno != EINPROGRESS)
742 {
743 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n",
744 inet_ntoa(addr.sin_addr), strerror(errno));
745
746 bgp_set_retry(peer);
747 return 0;
748 }
749
750 peer->state = Connect;
751 peer->state_time = time_now;
752
753 LOG(4, 0, 0, "BGP peer %s: state Connect\n", peer->name);
754 return 1;
755 }
756
757 peer->state = Active;
758 peer->state_time = time_now;
759 peer->retry_time = peer->retry_count = 0;
760
761 LOG(4, 0, 0, "BGP peer %s: state Active\n", inet_ntoa(addr.sin_addr));
762
763 return bgp_send_open(peer);
764 }
765
766 /* complete partial connection (state = Connect) */
767 static int bgp_handle_connect(struct bgp_peer *peer)
768 {
769 int err = 0;
770 socklen_t len = sizeof(int);
771 getsockopt(peer->sock, SOL_SOCKET, SO_ERROR, &err, &len);
772 if (err)
773 {
774 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n", peer->name,
775 strerror(err));
776
777 bgp_set_retry(peer);
778 return 0;
779 }
780
781 peer->state = Active;
782 peer->state_time = time_now;
783
784 LOG(4, 0, 0, "BGP peer %s: state Active\n", peer->name);
785
786 return bgp_send_open(peer);
787 }
788
789 /* initiate a write */
790 static int bgp_write(struct bgp_peer *peer)
791 {
792 int len = htons(peer->outbuf->packet.header.len);
793 int r;
794
795 while ((r = write(peer->sock, &peer->outbuf->packet + peer->outbuf->done,
796 len - peer->outbuf->done)) == -1)
797 {
798 if (errno == EINTR)
799 continue;
800
801 if (errno == EAGAIN)
802 return 1;
803
804 if (errno == EPIPE)
805 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
806 else
807 LOG(1, 0, 0, "Can't write to BGP peer %s (%s)\n", peer->name,
808 strerror(errno));
809
810 bgp_set_retry(peer);
811 return 0;
812 }
813
814 if (r < len)
815 {
816 peer->outbuf->done += r;
817 return 1;
818 }
819
820 LOG(4, 0, 0, "Sent %s to BGP peer %s\n",
821 bgp_msg_type_str(peer->outbuf->packet.header.type), peer->name);
822
823 peer->outbuf->packet.header.len = 0;
824 peer->outbuf->done = 0;
825
826 if (peer->state == Established)
827 peer->keepalive_time = time_now + peer->keepalive;
828
829 if (peer->state != peer->next_state)
830 {
831 if (peer->next_state == Disabled || peer->next_state == Idle)
832 {
833 bgp_clear(peer);
834 return 0;
835 }
836
837 peer->state = peer->next_state;
838 peer->state_time = time_now;
839
840 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
841 bgp_state_str(peer->state));
842 }
843
844 return 1;
845 }
846
847 /* initiate a read */
848 static int bgp_read(struct bgp_peer *peer)
849 {
850 int r;
851
852 while ((r = read(peer->sock, &peer->inbuf->packet + peer->inbuf->done,
853 BGP_MAX_PACKET_SIZE - peer->inbuf->done)) < 1)
854 {
855 if (!r)
856 {
857 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
858 }
859 else
860 {
861 if (errno == EINTR)
862 continue;
863
864 if (errno == EAGAIN)
865 return 1;
866
867 LOG(1, 0, 0, "Can't read from BGP peer %s (%s)\n", peer->name,
868 strerror(errno));
869 }
870
871 bgp_set_retry(peer);
872 return 0;
873 }
874
875 peer->inbuf->done += r;
876 return 1;
877 }
878
879 /* process buffered packets */
880 static int bgp_handle_input(struct bgp_peer *peer)
881 {
882 struct bgp_packet *p = &peer->inbuf->packet;
883 int len = ntohs(p->header.len);
884
885 if (len > BGP_MAX_PACKET_SIZE)
886 {
887 LOG(1, 0, 0, "Bad header length from BGP %s\n", peer->name);
888 bgp_send_notification(peer, BGP_ERR_HEADER, BGP_ERR_HDR_BAD_LEN);
889 return 0;
890 }
891
892 if (peer->inbuf->done < len)
893 return 0;
894
895 LOG(4, 0, 0, "Received %s from BGP peer %s\n",
896 bgp_msg_type_str(p->header.type), peer->name);
897
898 switch (p->header.type)
899 {
900 case BGP_MSG_OPEN:
901 {
902 struct bgp_data_open data;
903 int hold;
904 int i;
905
906 for (i = 0; i < sizeof(p->header.marker); i++)
907 {
908 if ((unsigned char) p->header.marker[i] != 0xff)
909 {
910 LOG(1, 0, 0, "Invalid marker from BGP peer %s\n",
911 peer->name);
912
913 bgp_send_notification(peer, BGP_ERR_HEADER,
914 BGP_ERR_HDR_NOT_SYNC);
915
916 return 0;
917 }
918 }
919
920 if (peer->state != OpenSent)
921 {
922 LOG(1, 0, 0, "OPEN from BGP peer %s in %s state\n",
923 peer->name, bgp_state_str(peer->state));
924
925 bgp_send_notification(peer, BGP_ERR_FSM, 0);
926 return 0;
927 }
928
929 memcpy(&data, p->data, len - sizeof(p->header));
930
931 if (data.version != BGP_VERSION)
932 {
933 LOG(1, 0, 0, "Bad version (%d) sent by BGP peer %s\n",
934 (int) data.version, peer->name);
935
936 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_VERSION);
937 return 0;
938 }
939
940 if (ntohs(data.as) != peer->as)
941 {
942 LOG(1, 0, 0, "Bad AS sent by BGP peer %s (got %d, "
943 "expected %d)\n", peer->name, (int) htons(data.as),
944 (int) peer->as);
945
946 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_BAD_AS);
947 return 0;
948 }
949
950 if ((hold = ntohs(data.hold_time)) < 3)
951 {
952 LOG(1, 0, 0, "Bad hold time (%d) from BGP peer %s\n",
953 hold, peer->name);
954
955 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_HOLD_TIME);
956 return 0;
957 }
958
959 /* pick lowest hold time */
960 if (hold < peer->hold)
961 peer->hold = hold;
962
963 /* adjust our keepalive based on negotiated hold value */
964 if (peer->keepalive * 3 > peer->hold)
965 peer->keepalive = peer->hold / 3;
966
967 /* next transition requires an exchange of keepalives */
968 bgp_send_keepalive(peer);
969
970 /* FIXME: may need to check for optional params */
971 }
972
973 break;
974
975 case BGP_MSG_KEEPALIVE:
976 if (peer->state == OpenConfirm)
977 {
978 peer->state = peer->next_state = Established;
979 peer->state_time = time_now;
980 peer->keepalive_time = time_now + peer->keepalive;
981 peer->update_routes = 1;
982 peer->retry_count = 0;
983 peer->retry_time = 0;
984
985 LOG(4, 0, 0, "BGP peer %s: state Established\n", peer->name);
986 }
987
988 break;
989
990 case BGP_MSG_NOTIFICATION:
991 if (len > sizeof(p->header))
992 {
993 struct bgp_data_notification *notification =
994 (struct bgp_data_notification *) p->data;
995
996 if (notification->error_code == BGP_ERR_CEASE)
997 {
998 LOG(4, 0, 0, "BGP peer %s sent CEASE\n", peer->name);
999 bgp_restart(peer);
1000 return 0;
1001 }
1002
1003 /* FIXME: should handle more notifications */
1004 LOG(4, 0, 0, "BGP peer %s sent unhandled NOTIFICATION %d\n",
1005 peer->name, (int) notification->error_code);
1006 }
1007
1008 break;
1009 }
1010
1011 /* reset timer */
1012 peer->expire_time = time_now + peer->hold;
1013
1014 /* see if there's another message in the same packet/buffer */
1015 if (peer->inbuf->done > len)
1016 {
1017 peer->inbuf->done -= len;
1018 memmove(p, (char *) p + len, peer->inbuf->done);
1019 }
1020 else
1021 {
1022 peer->inbuf->packet.header.len = 0;
1023 peer->inbuf->done = 0;
1024 }
1025
1026 return peer->inbuf->done;
1027 }
1028
1029 /* send/buffer OPEN message */
1030 static int bgp_send_open(struct bgp_peer *peer)
1031 {
1032 struct bgp_data_open data;
1033 uint16_t len = sizeof(peer->outbuf->packet.header);
1034
1035 memset(peer->outbuf->packet.header.marker, 0xff,
1036 sizeof(peer->outbuf->packet.header.marker));
1037
1038 peer->outbuf->packet.header.type = BGP_MSG_OPEN;
1039
1040 data.version = BGP_VERSION;
1041 data.as = htons(our_as);
1042 data.hold_time = htons(peer->hold);
1043 data.identifier = my_address;
1044 data.opt_len = 0;
1045
1046 memcpy(peer->outbuf->packet.data, &data, BGP_DATA_OPEN_SIZE);
1047 len += BGP_DATA_OPEN_SIZE;
1048
1049 peer->outbuf->packet.header.len = htons(len);
1050 peer->outbuf->done = 0;
1051 peer->next_state = OpenSent;
1052
1053 return bgp_write(peer);
1054 }
1055
1056 /* send/buffer KEEPALIVE message */
1057 static int bgp_send_keepalive(struct bgp_peer *peer)
1058 {
1059 memset(peer->outbuf->packet.header.marker, 0xff,
1060 sizeof(peer->outbuf->packet.header.marker));
1061
1062 peer->outbuf->packet.header.type = BGP_MSG_KEEPALIVE;
1063 peer->outbuf->packet.header.len =
1064 htons(sizeof(peer->outbuf->packet.header));
1065
1066 peer->outbuf->done = 0;
1067 peer->next_state = (peer->state == OpenSent) ? OpenConfirm : peer->state;
1068
1069 return bgp_write(peer);
1070 }
1071
1072 /* send/buffer UPDATE message */
1073 static int bgp_send_update(struct bgp_peer *peer)
1074 {
1075 uint16_t unf_len = 0;
1076 uint16_t attr_len;
1077 uint16_t len = sizeof(peer->outbuf->packet.header);
1078 struct bgp_route_list *have = peer->routes;
1079 struct bgp_route_list *want = peer->routing ? bgp_routes : 0;
1080 struct bgp_route_list *e = 0;
1081 struct bgp_route_list *add = 0;
1082 int s;
1083
1084 char *data = (char *) &peer->outbuf->packet.data;
1085
1086 /* need leave room for attr_len, bgp_path_attrs and one prefix */
1087 char *max = (char *) &peer->outbuf->packet.data
1088 + sizeof(peer->outbuf->packet.data)
1089 - sizeof(attr_len) - peer->path_attr_len - sizeof(struct bgp_ip_prefix);
1090
1091 /* skip over unf_len */
1092 data += sizeof(unf_len);
1093 len += sizeof(unf_len);
1094
1095 memset(peer->outbuf->packet.header.marker, 0xff,
1096 sizeof(peer->outbuf->packet.header.marker));
1097
1098 peer->outbuf->packet.header.type = BGP_MSG_UPDATE;
1099
1100 peer->update_routes = 0; /* tentatively clear */
1101
1102 /* find differences */
1103 while ((have || want) && data < (max - sizeof(struct bgp_ip_prefix)))
1104 {
1105 if (have)
1106 s = want
1107 ? memcmp(&have->dest, &want->dest, sizeof(have->dest))
1108 : -1;
1109 else
1110 s = 1;
1111
1112 if (s < 0) /* found one to delete */
1113 {
1114 struct bgp_route_list *tmp = have;
1115 have = have->next;
1116
1117 s = BGP_IP_PREFIX_SIZE(tmp->dest);
1118 memcpy(data, &tmp->dest, s);
1119 data += s;
1120 unf_len += s;
1121 len += s;
1122
1123 LOG(5, 0, 0, "Withdrawing route %s/%d from BGP peer %s\n",
1124 fmtaddr(tmp->dest.prefix, 0), tmp->dest.len, peer->name);
1125
1126 free(tmp);
1127
1128 if (e)
1129 e->next = have;
1130 else
1131 peer->routes = have;
1132 }
1133 else
1134 {
1135 if (!s) /* same */
1136 {
1137 e = have; /* stash the last found to relink above */
1138 have = have->next;
1139 want = want->next;
1140 }
1141 else if (s > 0) /* addition reqd. */
1142 {
1143 if (add)
1144 {
1145 peer->update_routes = 1; /* only one add per packet */
1146 if (!have)
1147 break;
1148 }
1149 else
1150 add = want;
1151
1152 if (want)
1153 want = want->next;
1154 }
1155 }
1156 }
1157
1158 if (have || want)
1159 peer->update_routes = 1; /* more to do */
1160
1161 /* anything changed? */
1162 if (!(unf_len || add))
1163 return 1;
1164
1165 /* go back and insert unf_len */
1166 unf_len = htons(unf_len);
1167 memcpy(&peer->outbuf->packet.data, &unf_len, sizeof(unf_len));
1168
1169 if (add)
1170 {
1171 if (!(e = malloc(sizeof(*e))))
1172 {
1173 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
1174 fmtaddr(add->dest.prefix, 0), add->dest.len, strerror(errno));
1175
1176 return 0;
1177 }
1178
1179 memcpy(e, add, sizeof(*e));
1180 e->next = 0;
1181 peer->routes = bgp_insert_route(peer->routes, e);
1182
1183 attr_len = htons(peer->path_attr_len);
1184 memcpy(data, &attr_len, sizeof(attr_len));
1185 data += sizeof(attr_len);
1186 len += sizeof(attr_len);
1187
1188 memcpy(data, peer->path_attrs, peer->path_attr_len);
1189 data += peer->path_attr_len;
1190 len += peer->path_attr_len;
1191
1192 s = BGP_IP_PREFIX_SIZE(add->dest);
1193 memcpy(data, &add->dest, s);
1194 data += s;
1195 len += s;
1196
1197 LOG(5, 0, 0, "Advertising route %s/%d to BGP peer %s\n",
1198 fmtaddr(add->dest.prefix, 0), add->dest.len, peer->name);
1199 }
1200 else
1201 {
1202 attr_len = 0;
1203 memcpy(data, &attr_len, sizeof(attr_len));
1204 data += sizeof(attr_len);
1205 len += sizeof(attr_len);
1206 }
1207
1208 peer->outbuf->packet.header.len = htons(len);
1209 peer->outbuf->done = 0;
1210
1211 return bgp_write(peer);
1212 }
1213
1214 /* send/buffer NOTIFICATION message */
1215 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
1216 uint8_t subcode)
1217 {
1218 struct bgp_data_notification data;
1219 uint16_t len = 0;
1220
1221 data.error_code = code;
1222 len += sizeof(data.error_code);
1223
1224 data.error_subcode = subcode;
1225 len += sizeof(data.error_code);
1226
1227 memset(peer->outbuf->packet.header.marker, 0xff,
1228 sizeof(peer->outbuf->packet.header.marker));
1229
1230 peer->outbuf->packet.header.type = BGP_MSG_NOTIFICATION;
1231 peer->outbuf->packet.header.len =
1232 htons(sizeof(peer->outbuf->packet.header) + len);
1233
1234 memcpy(peer->outbuf->packet.data, &data, len);
1235
1236 peer->outbuf->done = 0;
1237 peer->next_state = code == BGP_ERR_CEASE ? Disabled : Idle;
1238
1239 /* we're dying; ignore any pending input */
1240 peer->inbuf->packet.header.len = 0;
1241 peer->inbuf->done = 0;
1242
1243 return bgp_write(peer);
1244 }