Add a config option to set the advertised BGP next hop.
[l2tpns.git] / bgp.c
1 /*
2 * BGPv4
3 * Used to advertise routes for upstream (l2tp port, rather than gratiutious
4 * arp) and downstream--allowing routers to load-balance both.
5 *
6 * Implementation limitations:
7 * - We never listen for incoming connections (session always initiated by us).
8 * - Any routes advertised by the peer are accepted, but ignored.
9 * - No password support; neither RFC1771 (which no-one seems to do anyway)
10 * nor RFC2385 (which requires a kernel patch on 2.4 kernels).
11 */
12
13 char const *cvs_id_bgp = "$Id: bgp.c,v 1.12 2005/09/02 23:39:36 bodea Exp $";
14
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <string.h>
18 #include <time.h>
19 #include <errno.h>
20 #include <sys/socket.h>
21 #include <netinet/in.h>
22 #include <arpa/inet.h>
23 #include <netdb.h>
24 #include <fcntl.h>
25
26 #include "l2tpns.h"
27 #include "bgp.h"
28 #include "util.h"
29
30 static void bgp_clear(struct bgp_peer *peer);
31 static void bgp_set_retry(struct bgp_peer *peer);
32 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx);
33 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
34 struct bgp_route_list *new);
35
36 static void bgp_free_routes(struct bgp_route_list *routes);
37 static char const *bgp_msg_type_str(uint8_t type);
38 static int bgp_connect(struct bgp_peer *peer);
39 static int bgp_handle_connect(struct bgp_peer *peer);
40 static int bgp_write(struct bgp_peer *peer);
41 static int bgp_read(struct bgp_peer *peer);
42 static int bgp_handle_input(struct bgp_peer *peer);
43 static int bgp_send_open(struct bgp_peer *peer);
44 static int bgp_send_keepalive(struct bgp_peer *peer);
45 static int bgp_send_update(struct bgp_peer *peer);
46 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
47 uint8_t subcode);
48
49 static uint16_t our_as;
50 static struct bgp_route_list *bgp_routes = 0;
51
52 int bgp_configured = 0;
53 struct bgp_peer *bgp_peers = 0;
54
55 /* prepare peer structure, globals */
56 int bgp_setup(int as)
57 {
58 int i;
59 struct bgp_peer *peer;
60
61 for (i = 0; i < BGP_NUM_PEERS; i++)
62 {
63 peer = &bgp_peers[i];
64 memset(peer, 0, sizeof(*peer));
65
66 peer->addr = INADDR_NONE;
67 peer->sock = -1;
68 peer->state = peer->next_state = Disabled;
69
70 if (!((peer->outbuf = malloc(sizeof(*peer->outbuf)))
71 && (peer->inbuf = malloc(sizeof(*peer->inbuf)))))
72 {
73 LOG(0, 0, 0, "Can't allocate buffers for bgp peer (%s)\n",
74 strerror(errno));
75
76 return 0;
77 }
78
79 peer->edata.type = FD_TYPE_BGP;
80 peer->edata.index = i;
81 peer->events = 0;
82 }
83
84 if (as < 1)
85 as = 0;
86
87 if ((our_as = as))
88 return 0;
89
90 bgp_routes = 0;
91 bgp_configured = 0; /* set by bgp_start */
92
93 return 1;
94 }
95
96 /* start connection with a peer */
97 int bgp_start(struct bgp_peer *peer, char *name, int as, int keepalive,
98 int hold, struct in_addr update_source, int enable)
99 {
100 struct hostent *h;
101 int ibgp;
102 int i;
103 struct bgp_path_attr a;
104 char path_attrs[64];
105 char *p = path_attrs;
106 in_addr_t ip;
107 uint32_t metric = htonl(BGP_METRIC);
108 uint32_t no_export = htonl(BGP_COMMUNITY_NO_EXPORT);
109
110 if (!our_as)
111 return 0;
112
113 if (peer->state != Disabled)
114 bgp_halt(peer);
115
116 snprintf(peer->name, sizeof(peer->name), "%s", name);
117
118 if (!(h = gethostbyname(name)) || h->h_addrtype != AF_INET)
119 {
120 LOG(0, 0, 0, "Can't get address for BGP peer %s (%s)\n",
121 name, h ? "no address" : hstrerror(h_errno));
122
123 return 0;
124 }
125
126 memcpy(&peer->addr, h->h_addr, sizeof(peer->addr));
127 peer->source_addr = update_source.s_addr;
128 peer->as = as > 0 ? as : our_as;
129 ibgp = peer->as == our_as;
130
131 /* set initial timer values */
132 peer->init_keepalive = keepalive == -1 ? BGP_KEEPALIVE_TIME : keepalive;
133 peer->init_hold = hold == -1 ? BGP_HOLD_TIME : hold;
134
135 if (peer->init_hold < 3)
136 peer->init_hold = 3;
137
138 if (peer->init_keepalive * 3 > peer->init_hold)
139 peer->init_keepalive = peer->init_hold / 3;
140
141 /* clear buffers, go to Idle state */
142 peer->next_state = Idle;
143 bgp_clear(peer);
144
145 /* set initial routing state */
146 peer->routing = enable;
147
148 /* all our routes use the same attributes, so prepare it in advance */
149 if (peer->path_attrs)
150 free(peer->path_attrs);
151
152 peer->path_attr_len = 0;
153
154 /* ORIGIN */
155 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
156 a.code = BGP_PATH_ATTR_CODE_ORIGIN;
157 a.data.s.len = 1;
158 a.data.s.value[0] = BGP_PATH_ATTR_CODE_ORIGIN_IGP;
159
160 #define ADD_ATTRIBUTE() do { \
161 i = BGP_PATH_ATTR_SIZE(a); \
162 memcpy(p, &a, i); \
163 p += i; \
164 peer->path_attr_len += i; } while (0)
165
166 ADD_ATTRIBUTE();
167
168 /* AS_PATH */
169 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
170 a.code = BGP_PATH_ATTR_CODE_AS_PATH;
171 if (ibgp)
172 {
173 /* empty path */
174 a.data.s.len = 0;
175 }
176 else
177 {
178 /* just our AS */
179 struct {
180 uint8_t type;
181 uint8_t len;
182 uint16_t value;
183 } as_path = {
184 BGP_PATH_ATTR_CODE_AS_PATH_AS_SEQUENCE,
185 1,
186 htons(our_as),
187 };
188
189 a.data.s.len = sizeof(as_path);
190 memcpy(&a.data.s.value, &as_path, sizeof(as_path));
191 }
192
193 ADD_ATTRIBUTE();
194
195 /* NEXT_HOP */
196 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
197 a.code = BGP_PATH_ATTR_CODE_NEXT_HOP;
198 if (config->nexthop_address)
199 {
200 ip = config->nexthop_address;
201 }
202 else
203 {
204 ip = my_address; /* we're it */
205 }
206 a.data.s.len = sizeof(ip);
207 memcpy(a.data.s.value, &ip, sizeof(ip));
208
209 ADD_ATTRIBUTE();
210
211 /* MULTI_EXIT_DISC */
212 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL;
213 a.code = BGP_PATH_ATTR_CODE_MULTI_EXIT_DISC;
214 a.data.s.len = sizeof(metric);
215 memcpy(a.data.s.value, &metric, sizeof(metric));
216
217 ADD_ATTRIBUTE();
218
219 if (ibgp)
220 {
221 uint32_t local_pref = htonl(BGP_LOCAL_PREF);
222
223 /* LOCAL_PREF */
224 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
225 a.code = BGP_PATH_ATTR_CODE_LOCAL_PREF;
226 a.data.s.len = sizeof(local_pref);
227 memcpy(a.data.s.value, &local_pref, sizeof(local_pref));
228
229 ADD_ATTRIBUTE();
230 }
231
232 /* COMMUNITIES */
233 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL | BGP_PATH_ATTR_FLAG_TRANS;
234 a.code = BGP_PATH_ATTR_CODE_COMMUNITIES;
235 a.data.s.len = sizeof(no_export);
236 memcpy(a.data.s.value, &no_export, sizeof(no_export));
237
238 ADD_ATTRIBUTE();
239
240 if (!(peer->path_attrs = malloc(peer->path_attr_len)))
241 {
242 LOG(0, 0, 0, "Can't allocate path_attrs for %s (%s)\n",
243 name, strerror(errno));
244
245 return 0;
246 }
247
248 memcpy(peer->path_attrs, path_attrs, peer->path_attr_len);
249
250 LOG(4, 0, 0, "Initiating BGP connection to %s (routing %s)\n",
251 name, enable ? "enabled" : "suspended");
252
253 /* we have at least one peer configured */
254 bgp_configured = 1;
255
256 /* connect */
257 return bgp_connect(peer);
258 }
259
260 /* clear counters, timers, routes and buffers; close socket; move to
261 next_state, which may be Disabled or Idle */
262 static void bgp_clear(struct bgp_peer *peer)
263 {
264 if (peer->sock != -1)
265 {
266 close(peer->sock);
267 peer->sock = -1;
268 }
269
270 peer->keepalive_time = 0;
271 peer->expire_time = 0;
272
273 peer->keepalive = peer->init_keepalive;
274 peer->hold = peer->init_hold;
275
276 bgp_free_routes(peer->routes);
277 peer->routes = 0;
278
279 peer->outbuf->packet.header.len = 0;
280 peer->outbuf->done = 0;
281 peer->inbuf->packet.header.len = 0;
282 peer->inbuf->done = 0;
283
284 peer->cli_flag = 0;
285 peer->events = 0;
286
287 if (peer->state != peer->next_state)
288 {
289 peer->state = peer->next_state;
290 peer->state_time = time_now;
291
292 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
293 bgp_state_str(peer->next_state));
294 }
295 }
296
297 /* initiate a clean shutdown */
298 void bgp_stop(struct bgp_peer *peer)
299 {
300 LOG(4, 0, 0, "Terminating BGP connection to %s\n", peer->name);
301 bgp_send_notification(peer, BGP_ERR_CEASE, 0);
302 }
303
304 /* drop connection (if any) and set state to Disabled */
305 void bgp_halt(struct bgp_peer *peer)
306 {
307 LOG(4, 0, 0, "Aborting BGP connection to %s\n", peer->name);
308 peer->next_state = Disabled;
309 bgp_clear(peer);
310 }
311
312 /* drop connection (if any) and set to Idle for connection retry */
313 int bgp_restart(struct bgp_peer *peer)
314 {
315 peer->next_state = Idle;
316 bgp_clear(peer);
317
318 /* restart now */
319 peer->retry_time = time_now;
320 peer->retry_count = 0;
321
322 /* connect */
323 return bgp_connect(peer);
324 }
325
326 static void bgp_set_retry(struct bgp_peer *peer)
327 {
328 if (peer->retry_count++ < BGP_MAX_RETRY)
329 {
330 peer->retry_time = time_now + (BGP_RETRY_BACKOFF * peer->retry_count);
331 peer->next_state = Idle;
332 bgp_clear(peer);
333 }
334 else
335 bgp_halt(peer); /* give up */
336 }
337
338 /* convert ip/mask to CIDR notation */
339 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx)
340 {
341 int i;
342 uint32_t b;
343
344 /* convert to prefix notation */
345 pfx->len = 32;
346 pfx->prefix = ip;
347
348 if (!mask) /* bogus */
349 mask = 0xffffffff;
350
351 for (i = 0; i < 32 && ((b = ntohl(1 << i)), !(mask & b)); i++)
352 {
353 pfx->len--;
354 pfx->prefix &= ~b;
355 }
356 }
357
358 /* insert route into list; sorted */
359 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
360 struct bgp_route_list *new)
361 {
362 struct bgp_route_list *p = head;
363 struct bgp_route_list *e = 0;
364
365 while (p && memcmp(&p->dest, &new->dest, sizeof(p->dest)) < 0)
366 {
367 e = p;
368 p = p->next;
369 }
370
371 if (e)
372 {
373 new->next = e->next;
374 e->next = new;
375 }
376 else
377 {
378 new->next = head;
379 head = new;
380 }
381
382 return head;
383 }
384
385 /* add route to list for peers */
386 /*
387 * Note: this doesn't do route aggregation, nor drop routes if a less
388 * specific match already exists (partly because I'm lazy, but also so
389 * that if that route is later deleted we don't have to be concerned
390 * about adding back the more specific one).
391 */
392 int bgp_add_route(in_addr_t ip, in_addr_t mask)
393 {
394 struct bgp_route_list *r = bgp_routes;
395 struct bgp_route_list add;
396 int i;
397
398 bgp_cidr(ip, mask, &add.dest);
399 add.next = 0;
400
401 /* check for duplicate */
402 while (r)
403 {
404 i = memcmp(&r->dest, &add.dest, sizeof(r->dest));
405 if (!i)
406 return 1; /* already covered */
407
408 if (i > 0)
409 break;
410
411 r = r->next;
412 }
413
414 /* insert into route list; sorted */
415 if (!(r = malloc(sizeof(*r))))
416 {
417 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
418 fmtaddr(add.dest.prefix, 0), add.dest.len, strerror(errno));
419
420 return 0;
421 }
422
423 memcpy(r, &add, sizeof(*r));
424 bgp_routes = bgp_insert_route(bgp_routes, r);
425
426 /* flag established peers for update */
427 for (i = 0; i < BGP_NUM_PEERS; i++)
428 if (bgp_peers[i].state == Established)
429 bgp_peers[i].update_routes = 1;
430
431 LOG(4, 0, 0, "Registered BGP route %s/%d\n",
432 fmtaddr(add.dest.prefix, 0), add.dest.len);
433
434 return 1;
435 }
436
437 /* remove route from list for peers */
438 int bgp_del_route(in_addr_t ip, in_addr_t mask)
439 {
440 struct bgp_route_list *r = bgp_routes;
441 struct bgp_route_list *e = 0;
442 struct bgp_route_list del;
443 int i;
444
445 bgp_cidr(ip, mask, &del.dest);
446 del.next = 0;
447
448 /* find entry in routes list and remove */
449 while (r)
450 {
451 i = memcmp(&r->dest, &del.dest, sizeof(r->dest));
452 if (!i)
453 {
454 if (e)
455 e->next = r->next;
456 else
457 bgp_routes = r->next;
458
459 free(r);
460 break;
461 }
462
463 e = r;
464
465 if (i > 0)
466 r = 0; /* stop */
467 else
468 r = r->next;
469 }
470
471 /* not found */
472 if (!r)
473 return 1;
474
475 /* flag established peers for update */
476 for (i = 0; i < BGP_NUM_PEERS; i++)
477 if (bgp_peers[i].state == Established)
478 bgp_peers[i].update_routes = 1;
479
480 LOG(4, 0, 0, "Removed BGP route %s/%d\n",
481 fmtaddr(del.dest.prefix, 0), del.dest.len);
482
483 return 1;
484 }
485
486 /* enable or disable routing */
487 void bgp_enable_routing(int enable)
488 {
489 int i;
490
491 for (i = 0; i < BGP_NUM_PEERS; i++)
492 {
493 bgp_peers[i].routing = enable;
494
495 /* flag established peers for update */
496 if (bgp_peers[i].state == Established)
497 bgp_peers[i].update_routes = 1;
498 }
499
500 LOG(4, 0, 0, "%s BGP routing\n", enable ? "Enabled" : "Suspended");
501 }
502
503 #ifdef HAVE_EPOLL
504 # include <sys/epoll.h>
505 #else
506 # include "fake_epoll.h"
507 #endif
508
509 /* return a bitmask of the events required to poll this peer's fd */
510 int bgp_set_poll()
511 {
512 int i;
513
514 if (!bgp_configured)
515 return 0;
516
517 for (i = 0; i < BGP_NUM_PEERS; i++)
518 {
519 struct bgp_peer *peer = &bgp_peers[i];
520 int events = 0;
521
522 if (peer->state == Disabled || peer->state == Idle)
523 continue;
524
525 if (peer->inbuf->done < BGP_MAX_PACKET_SIZE)
526 events |= EPOLLIN;
527
528 if (peer->state == Connect || /* connection in progress */
529 peer->update_routes || /* routing updates */
530 peer->outbuf->packet.header.len) /* pending output */
531 events |= EPOLLOUT;
532
533 if (peer->events != events)
534 {
535 struct epoll_event ev;
536
537 ev.events = peer->events = events;
538 ev.data.ptr = &peer->edata;
539 epoll_ctl(epollfd, EPOLL_CTL_MOD, peer->sock, &ev);
540 }
541 }
542
543 return 1;
544 }
545
546 /* process bgp events/timers */
547 int bgp_process(uint32_t events[])
548 {
549 int i;
550
551 if (!bgp_configured)
552 return 0;
553
554 for (i = 0; i < BGP_NUM_PEERS; i++)
555 {
556 struct bgp_peer *peer = &bgp_peers[i];
557
558 if (*peer->name && peer->cli_flag == BGP_CLI_RESTART)
559 {
560 bgp_restart(peer);
561 continue;
562 }
563
564 if (peer->state == Disabled)
565 continue;
566
567 if (peer->cli_flag)
568 {
569 switch (peer->cli_flag)
570 {
571 case BGP_CLI_SUSPEND:
572 if (peer->routing)
573 {
574 peer->routing = 0;
575 if (peer->state == Established)
576 peer->update_routes = 1;
577 }
578
579 break;
580
581 case BGP_CLI_ENABLE:
582 if (!peer->routing)
583 {
584 peer->routing = 1;
585 if (peer->state == Established)
586 peer->update_routes = 1;
587 }
588
589 break;
590 }
591
592 peer->cli_flag = 0;
593 }
594
595 /* handle empty/fill of buffers */
596 if (events[i] & EPOLLOUT)
597 {
598 int r = 1;
599 if (peer->state == Connect)
600 r = bgp_handle_connect(peer);
601 else if (peer->outbuf->packet.header.len)
602 r = bgp_write(peer);
603
604 if (!r)
605 continue;
606 }
607
608 if (events[i] & (EPOLLIN|EPOLLHUP))
609 {
610 if (!bgp_read(peer))
611 continue;
612 }
613
614 /* process input buffer contents */
615 while (peer->inbuf->done >= sizeof(peer->inbuf->packet.header)
616 && !peer->outbuf->packet.header.len) /* may need to queue a response */
617 {
618 if (bgp_handle_input(peer) < 0)
619 continue;
620 }
621
622 /* process pending updates */
623 if (peer->update_routes
624 && !peer->outbuf->packet.header.len) /* ditto */
625 {
626 if (!bgp_send_update(peer))
627 continue;
628 }
629
630 /* process timers */
631 if (peer->state == Established)
632 {
633 if (time_now > peer->expire_time)
634 {
635 LOG(1, 0, 0, "No message from BGP peer %s in %ds\n",
636 peer->name, peer->hold);
637
638 bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0);
639 continue;
640 }
641
642 if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len)
643 bgp_send_keepalive(peer);
644 }
645 else if (peer->state == Idle)
646 {
647 if (time_now > peer->retry_time)
648 bgp_connect(peer);
649 }
650 else if (time_now > peer->state_time + BGP_STATE_TIME)
651 {
652 LOG(1, 0, 0, "%s timer expired for BGP peer %s\n",
653 bgp_state_str(peer->state), peer->name);
654
655 bgp_restart(peer);
656 }
657 }
658
659 return 1;
660 }
661
662 static void bgp_free_routes(struct bgp_route_list *routes)
663 {
664 struct bgp_route_list *tmp;
665
666 while ((tmp = routes))
667 {
668 routes = tmp->next;
669 free(tmp);
670 }
671 }
672
673 char const *bgp_state_str(enum bgp_state state)
674 {
675 switch (state)
676 {
677 case Disabled: return "Disabled";
678 case Idle: return "Idle";
679 case Connect: return "Connect";
680 case Active: return "Active";
681 case OpenSent: return "OpenSent";
682 case OpenConfirm: return "OpenConfirm";
683 case Established: return "Established";
684 }
685
686 return "?";
687 }
688
689 static char const *bgp_msg_type_str(uint8_t type)
690 {
691 switch (type)
692 {
693 case BGP_MSG_OPEN: return "OPEN";
694 case BGP_MSG_UPDATE: return "UPDATE";
695 case BGP_MSG_NOTIFICATION: return "NOTIFICATION";
696 case BGP_MSG_KEEPALIVE: return "KEEPALIVE";
697 }
698
699 return "?";
700 }
701
702 /* attempt to connect to peer */
703 static int bgp_connect(struct bgp_peer *peer)
704 {
705 static int bgp_port = 0;
706 struct sockaddr_in addr;
707 struct sockaddr_in source_addr;
708 struct epoll_event ev;
709
710 if (!bgp_port)
711 {
712 struct servent *serv;
713 if (!(serv = getservbyname("bgp", "tcp")))
714 {
715 LOG(0, 0, 0, "Can't get bgp service (%s)\n", strerror(errno));
716 return 0;
717 }
718
719 bgp_port = serv->s_port;
720 }
721
722 if ((peer->sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
723 {
724 LOG(0, 0, 0, "Can't create a socket for BGP peer %s (%s)\n",
725 peer->name, strerror(errno));
726
727 peer->state = peer->next_state = Disabled;
728 return 0;
729 }
730
731 /* add to poll set */
732 ev.events = peer->events = EPOLLOUT;
733 ev.data.ptr = &peer->edata;
734 epoll_ctl(epollfd, EPOLL_CTL_ADD, peer->sock, &ev);
735
736 /* set to non-blocking */
737 fcntl(peer->sock, F_SETFL, fcntl(peer->sock, F_GETFL, 0) | O_NONBLOCK);
738
739 /* set source address */
740 memset(&source_addr, 0, sizeof(source_addr));
741 source_addr.sin_family = AF_INET;
742 source_addr.sin_addr.s_addr = peer->source_addr; /* defaults to INADDR_ANY */
743 if (bind(peer->sock, (struct sockaddr *) &source_addr, sizeof(source_addr)) < 0)
744 {
745 LOG(1, 0, 0, "Can't set source address to %s: %s\n",
746 inet_ntoa(source_addr.sin_addr), strerror(errno));
747
748 bgp_set_retry(peer);
749 return 0;
750 }
751
752 /* try connect */
753 memset(&addr, 0, sizeof(addr));
754 addr.sin_family = AF_INET;
755 addr.sin_port = bgp_port;
756 addr.sin_addr.s_addr = peer->addr;
757
758 while (connect(peer->sock, (struct sockaddr *) &addr, sizeof(addr)) == -1)
759 {
760 if (errno == EINTR) /* SIGALARM handler */
761 continue;
762
763 if (errno != EINPROGRESS)
764 {
765 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n",
766 inet_ntoa(addr.sin_addr), strerror(errno));
767
768 bgp_set_retry(peer);
769 return 0;
770 }
771
772 peer->state = Connect;
773 peer->state_time = time_now;
774
775 LOG(4, 0, 0, "BGP peer %s: state Connect\n", peer->name);
776 return 1;
777 }
778
779 peer->state = Active;
780 peer->state_time = time_now;
781 peer->retry_time = peer->retry_count = 0;
782
783 LOG(4, 0, 0, "BGP peer %s: state Active\n", inet_ntoa(addr.sin_addr));
784
785 return bgp_send_open(peer);
786 }
787
788 /* complete partial connection (state = Connect) */
789 static int bgp_handle_connect(struct bgp_peer *peer)
790 {
791 int err = 0;
792 socklen_t len = sizeof(int);
793 getsockopt(peer->sock, SOL_SOCKET, SO_ERROR, &err, &len);
794 if (err)
795 {
796 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n", peer->name,
797 strerror(err));
798
799 bgp_set_retry(peer);
800 return 0;
801 }
802
803 peer->state = Active;
804 peer->state_time = time_now;
805
806 LOG(4, 0, 0, "BGP peer %s: state Active\n", peer->name);
807
808 return bgp_send_open(peer);
809 }
810
811 /* initiate a write */
812 static int bgp_write(struct bgp_peer *peer)
813 {
814 int len = htons(peer->outbuf->packet.header.len);
815 int r;
816
817 while ((r = write(peer->sock, &peer->outbuf->packet + peer->outbuf->done,
818 len - peer->outbuf->done)) == -1)
819 {
820 if (errno == EINTR)
821 continue;
822
823 if (errno == EAGAIN)
824 return 1;
825
826 if (errno == EPIPE)
827 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
828 else
829 LOG(1, 0, 0, "Can't write to BGP peer %s (%s)\n", peer->name,
830 strerror(errno));
831
832 bgp_set_retry(peer);
833 return 0;
834 }
835
836 if (r < len)
837 {
838 peer->outbuf->done += r;
839 return 1;
840 }
841
842 LOG(4, 0, 0, "Sent %s to BGP peer %s\n",
843 bgp_msg_type_str(peer->outbuf->packet.header.type), peer->name);
844
845 peer->outbuf->packet.header.len = 0;
846 peer->outbuf->done = 0;
847
848 if (peer->state == Established)
849 peer->keepalive_time = time_now + peer->keepalive;
850
851 if (peer->state != peer->next_state)
852 {
853 if (peer->next_state == Disabled || peer->next_state == Idle)
854 {
855 bgp_clear(peer);
856 return 0;
857 }
858
859 peer->state = peer->next_state;
860 peer->state_time = time_now;
861
862 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
863 bgp_state_str(peer->state));
864 }
865
866 return 1;
867 }
868
869 /* initiate a read */
870 static int bgp_read(struct bgp_peer *peer)
871 {
872 int r;
873
874 while ((r = read(peer->sock, &peer->inbuf->packet + peer->inbuf->done,
875 BGP_MAX_PACKET_SIZE - peer->inbuf->done)) < 1)
876 {
877 if (!r)
878 {
879 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
880 }
881 else
882 {
883 if (errno == EINTR)
884 continue;
885
886 if (errno == EAGAIN)
887 return 1;
888
889 LOG(1, 0, 0, "Can't read from BGP peer %s (%s)\n", peer->name,
890 strerror(errno));
891 }
892
893 bgp_set_retry(peer);
894 return 0;
895 }
896
897 peer->inbuf->done += r;
898 return 1;
899 }
900
901 /* process buffered packets */
902 static int bgp_handle_input(struct bgp_peer *peer)
903 {
904 struct bgp_packet *p = &peer->inbuf->packet;
905 int len = ntohs(p->header.len);
906
907 if (len > BGP_MAX_PACKET_SIZE)
908 {
909 LOG(1, 0, 0, "Bad header length from BGP %s\n", peer->name);
910 bgp_send_notification(peer, BGP_ERR_HEADER, BGP_ERR_HDR_BAD_LEN);
911 return 0;
912 }
913
914 if (peer->inbuf->done < len)
915 return 0;
916
917 LOG(4, 0, 0, "Received %s from BGP peer %s\n",
918 bgp_msg_type_str(p->header.type), peer->name);
919
920 switch (p->header.type)
921 {
922 case BGP_MSG_OPEN:
923 {
924 struct bgp_data_open data;
925 int hold;
926 int i;
927
928 for (i = 0; i < sizeof(p->header.marker); i++)
929 {
930 if ((unsigned char) p->header.marker[i] != 0xff)
931 {
932 LOG(1, 0, 0, "Invalid marker from BGP peer %s\n",
933 peer->name);
934
935 bgp_send_notification(peer, BGP_ERR_HEADER,
936 BGP_ERR_HDR_NOT_SYNC);
937
938 return 0;
939 }
940 }
941
942 if (peer->state != OpenSent)
943 {
944 LOG(1, 0, 0, "OPEN from BGP peer %s in %s state\n",
945 peer->name, bgp_state_str(peer->state));
946
947 bgp_send_notification(peer, BGP_ERR_FSM, 0);
948 return 0;
949 }
950
951 memcpy(&data, p->data, len - sizeof(p->header));
952
953 if (data.version != BGP_VERSION)
954 {
955 LOG(1, 0, 0, "Bad version (%d) sent by BGP peer %s\n",
956 (int) data.version, peer->name);
957
958 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_VERSION);
959 return 0;
960 }
961
962 if (ntohs(data.as) != peer->as)
963 {
964 LOG(1, 0, 0, "Bad AS sent by BGP peer %s (got %d, "
965 "expected %d)\n", peer->name, (int) htons(data.as),
966 (int) peer->as);
967
968 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_BAD_AS);
969 return 0;
970 }
971
972 if ((hold = ntohs(data.hold_time)) < 3)
973 {
974 LOG(1, 0, 0, "Bad hold time (%d) from BGP peer %s\n",
975 hold, peer->name);
976
977 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_HOLD_TIME);
978 return 0;
979 }
980
981 /* pick lowest hold time */
982 if (hold < peer->hold)
983 peer->hold = hold;
984
985 /* adjust our keepalive based on negotiated hold value */
986 if (peer->keepalive * 3 > peer->hold)
987 peer->keepalive = peer->hold / 3;
988
989 /* next transition requires an exchange of keepalives */
990 bgp_send_keepalive(peer);
991
992 /* FIXME: may need to check for optional params */
993 }
994
995 break;
996
997 case BGP_MSG_KEEPALIVE:
998 if (peer->state == OpenConfirm)
999 {
1000 peer->state = peer->next_state = Established;
1001 peer->state_time = time_now;
1002 peer->keepalive_time = time_now + peer->keepalive;
1003 peer->update_routes = 1;
1004 peer->retry_count = 0;
1005 peer->retry_time = 0;
1006
1007 LOG(4, 0, 0, "BGP peer %s: state Established\n", peer->name);
1008 }
1009
1010 break;
1011
1012 case BGP_MSG_NOTIFICATION:
1013 if (len > sizeof(p->header))
1014 {
1015 struct bgp_data_notification *notification =
1016 (struct bgp_data_notification *) p->data;
1017
1018 if (notification->error_code == BGP_ERR_CEASE)
1019 {
1020 LOG(4, 0, 0, "BGP peer %s sent CEASE\n", peer->name);
1021 bgp_set_retry(peer);
1022 return 0;
1023 }
1024
1025 /* FIXME: should handle more notifications */
1026 LOG(4, 0, 0, "BGP peer %s sent unhandled NOTIFICATION %d\n",
1027 peer->name, (int) notification->error_code);
1028 }
1029
1030 break;
1031 }
1032
1033 /* reset timer */
1034 peer->expire_time = time_now + peer->hold;
1035
1036 /* see if there's another message in the same packet/buffer */
1037 if (peer->inbuf->done > len)
1038 {
1039 peer->inbuf->done -= len;
1040 memmove(p, (char *) p + len, peer->inbuf->done);
1041 }
1042 else
1043 {
1044 peer->inbuf->packet.header.len = 0;
1045 peer->inbuf->done = 0;
1046 }
1047
1048 return peer->inbuf->done;
1049 }
1050
1051 /* send/buffer OPEN message */
1052 static int bgp_send_open(struct bgp_peer *peer)
1053 {
1054 struct bgp_data_open data;
1055 uint16_t len = sizeof(peer->outbuf->packet.header);
1056
1057 memset(peer->outbuf->packet.header.marker, 0xff,
1058 sizeof(peer->outbuf->packet.header.marker));
1059
1060 peer->outbuf->packet.header.type = BGP_MSG_OPEN;
1061
1062 data.version = BGP_VERSION;
1063 data.as = htons(our_as);
1064 data.hold_time = htons(peer->hold);
1065 data.identifier = my_address;
1066 data.opt_len = 0;
1067
1068 memcpy(peer->outbuf->packet.data, &data, BGP_DATA_OPEN_SIZE);
1069 len += BGP_DATA_OPEN_SIZE;
1070
1071 peer->outbuf->packet.header.len = htons(len);
1072 peer->outbuf->done = 0;
1073 peer->next_state = OpenSent;
1074
1075 return bgp_write(peer);
1076 }
1077
1078 /* send/buffer KEEPALIVE message */
1079 static int bgp_send_keepalive(struct bgp_peer *peer)
1080 {
1081 memset(peer->outbuf->packet.header.marker, 0xff,
1082 sizeof(peer->outbuf->packet.header.marker));
1083
1084 peer->outbuf->packet.header.type = BGP_MSG_KEEPALIVE;
1085 peer->outbuf->packet.header.len =
1086 htons(sizeof(peer->outbuf->packet.header));
1087
1088 peer->outbuf->done = 0;
1089 peer->next_state = (peer->state == OpenSent) ? OpenConfirm : peer->state;
1090
1091 return bgp_write(peer);
1092 }
1093
1094 /* send/buffer UPDATE message */
1095 static int bgp_send_update(struct bgp_peer *peer)
1096 {
1097 uint16_t unf_len = 0;
1098 uint16_t attr_len;
1099 uint16_t len = sizeof(peer->outbuf->packet.header);
1100 struct bgp_route_list *have = peer->routes;
1101 struct bgp_route_list *want = peer->routing ? bgp_routes : 0;
1102 struct bgp_route_list *e = 0;
1103 struct bgp_route_list *add = 0;
1104 int s;
1105
1106 char *data = (char *) &peer->outbuf->packet.data;
1107
1108 /* need leave room for attr_len, bgp_path_attrs and one prefix */
1109 char *max = (char *) &peer->outbuf->packet.data
1110 + sizeof(peer->outbuf->packet.data)
1111 - sizeof(attr_len) - peer->path_attr_len - sizeof(struct bgp_ip_prefix);
1112
1113 /* skip over unf_len */
1114 data += sizeof(unf_len);
1115 len += sizeof(unf_len);
1116
1117 memset(peer->outbuf->packet.header.marker, 0xff,
1118 sizeof(peer->outbuf->packet.header.marker));
1119
1120 peer->outbuf->packet.header.type = BGP_MSG_UPDATE;
1121
1122 peer->update_routes = 0; /* tentatively clear */
1123
1124 /* find differences */
1125 while ((have || want) && data < (max - sizeof(struct bgp_ip_prefix)))
1126 {
1127 if (have)
1128 s = want
1129 ? memcmp(&have->dest, &want->dest, sizeof(have->dest))
1130 : -1;
1131 else
1132 s = 1;
1133
1134 if (s < 0) /* found one to delete */
1135 {
1136 struct bgp_route_list *tmp = have;
1137 have = have->next;
1138
1139 s = BGP_IP_PREFIX_SIZE(tmp->dest);
1140 memcpy(data, &tmp->dest, s);
1141 data += s;
1142 unf_len += s;
1143 len += s;
1144
1145 LOG(5, 0, 0, "Withdrawing route %s/%d from BGP peer %s\n",
1146 fmtaddr(tmp->dest.prefix, 0), tmp->dest.len, peer->name);
1147
1148 free(tmp);
1149
1150 if (e)
1151 e->next = have;
1152 else
1153 peer->routes = have;
1154 }
1155 else
1156 {
1157 if (!s) /* same */
1158 {
1159 e = have; /* stash the last found to relink above */
1160 have = have->next;
1161 want = want->next;
1162 }
1163 else if (s > 0) /* addition reqd. */
1164 {
1165 if (add)
1166 {
1167 peer->update_routes = 1; /* only one add per packet */
1168 if (!have)
1169 break;
1170 }
1171 else
1172 add = want;
1173
1174 if (want)
1175 want = want->next;
1176 }
1177 }
1178 }
1179
1180 if (have || want)
1181 peer->update_routes = 1; /* more to do */
1182
1183 /* anything changed? */
1184 if (!(unf_len || add))
1185 return 1;
1186
1187 /* go back and insert unf_len */
1188 unf_len = htons(unf_len);
1189 memcpy(&peer->outbuf->packet.data, &unf_len, sizeof(unf_len));
1190
1191 if (add)
1192 {
1193 if (!(e = malloc(sizeof(*e))))
1194 {
1195 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
1196 fmtaddr(add->dest.prefix, 0), add->dest.len, strerror(errno));
1197
1198 return 0;
1199 }
1200
1201 memcpy(e, add, sizeof(*e));
1202 e->next = 0;
1203 peer->routes = bgp_insert_route(peer->routes, e);
1204
1205 attr_len = htons(peer->path_attr_len);
1206 memcpy(data, &attr_len, sizeof(attr_len));
1207 data += sizeof(attr_len);
1208 len += sizeof(attr_len);
1209
1210 memcpy(data, peer->path_attrs, peer->path_attr_len);
1211 data += peer->path_attr_len;
1212 len += peer->path_attr_len;
1213
1214 s = BGP_IP_PREFIX_SIZE(add->dest);
1215 memcpy(data, &add->dest, s);
1216 data += s;
1217 len += s;
1218
1219 LOG(5, 0, 0, "Advertising route %s/%d to BGP peer %s\n",
1220 fmtaddr(add->dest.prefix, 0), add->dest.len, peer->name);
1221 }
1222 else
1223 {
1224 attr_len = 0;
1225 memcpy(data, &attr_len, sizeof(attr_len));
1226 data += sizeof(attr_len);
1227 len += sizeof(attr_len);
1228 }
1229
1230 peer->outbuf->packet.header.len = htons(len);
1231 peer->outbuf->done = 0;
1232
1233 return bgp_write(peer);
1234 }
1235
1236 /* send/buffer NOTIFICATION message */
1237 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
1238 uint8_t subcode)
1239 {
1240 struct bgp_data_notification data;
1241 uint16_t len = 0;
1242
1243 data.error_code = code;
1244 len += sizeof(data.error_code);
1245
1246 data.error_subcode = subcode;
1247 len += sizeof(data.error_code);
1248
1249 memset(peer->outbuf->packet.header.marker, 0xff,
1250 sizeof(peer->outbuf->packet.header.marker));
1251
1252 peer->outbuf->packet.header.type = BGP_MSG_NOTIFICATION;
1253 peer->outbuf->packet.header.len =
1254 htons(sizeof(peer->outbuf->packet.header) + len);
1255
1256 memcpy(peer->outbuf->packet.data, &data, len);
1257
1258 peer->outbuf->done = 0;
1259 peer->next_state = code == BGP_ERR_CEASE ? Disabled : Idle;
1260
1261 /* we're dying; ignore any pending input */
1262 peer->inbuf->packet.header.len = 0;
1263 peer->inbuf->done = 0;
1264
1265 return bgp_write(peer);
1266 }