2deeeb3af4c5c09939a375887c579ee908b68300
[l2tpns.git] / bgp.c
1 /*
2 * BGPv4
3 * Used to advertise routes for upstream (l2tp port, rather than gratiutious
4 * arp) and downstream--allowing routers to load-balance both.
5 *
6 * Implementation limitations:
7 * - We never listen for incoming connections (session always initiated by us).
8 * - Any routes advertised by the peer are accepted, but ignored.
9 * - No password support; neither RFC1771 (which no-one seems to do anyway)
10 * nor RFC2385 (which requires a kernel patch on 2.4 kernels).
11 */
12
13 char const *cvs_id_bgp = "$Id: bgp.c,v 1.12 2005/09/02 23:39:36 bodea Exp $";
14
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <string.h>
18 #include <time.h>
19 #include <errno.h>
20 #include <sys/socket.h>
21 #include <netinet/in.h>
22 #include <arpa/inet.h>
23 #include <netdb.h>
24 #include <fcntl.h>
25
26 #include "l2tpns.h"
27 #include "bgp.h"
28 #include "util.h"
29
30 static void bgp_clear(struct bgp_peer *peer);
31 static void bgp_set_retry(struct bgp_peer *peer);
32 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx);
33 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
34 struct bgp_route_list *new);
35
36 static void bgp_free_routes(struct bgp_route_list *routes);
37 static char const *bgp_msg_type_str(uint8_t type);
38 static int bgp_connect(struct bgp_peer *peer);
39 static int bgp_handle_connect(struct bgp_peer *peer);
40 static int bgp_write(struct bgp_peer *peer);
41 static int bgp_read(struct bgp_peer *peer);
42 static int bgp_handle_input(struct bgp_peer *peer);
43 static int bgp_send_open(struct bgp_peer *peer);
44 static int bgp_send_keepalive(struct bgp_peer *peer);
45 static int bgp_send_update(struct bgp_peer *peer);
46 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
47 uint8_t subcode);
48 static int bgp_send_notification_full(struct bgp_peer *peer, uint8_t code,
49 uint8_t subcode, char *notification_data, uint16_t data_len);
50
51 static uint16_t our_as;
52 static struct bgp_route_list *bgp_routes = 0;
53
54 int bgp_configured = 0;
55 struct bgp_peer *bgp_peers = 0;
56
57 /* prepare peer structure, globals */
58 int bgp_setup(int as)
59 {
60 int i;
61 struct bgp_peer *peer;
62
63 for (i = 0; i < BGP_NUM_PEERS; i++)
64 {
65 peer = &bgp_peers[i];
66 memset(peer, 0, sizeof(*peer));
67
68 peer->addr = INADDR_NONE;
69 peer->sock = -1;
70 peer->state = peer->next_state = Disabled;
71
72 if (!((peer->outbuf = malloc(sizeof(*peer->outbuf)))
73 && (peer->inbuf = malloc(sizeof(*peer->inbuf)))))
74 {
75 LOG(0, 0, 0, "Can't allocate buffers for bgp peer (%s)\n",
76 strerror(errno));
77
78 return 0;
79 }
80
81 peer->edata.type = FD_TYPE_BGP;
82 peer->edata.index = i;
83 peer->events = 0;
84 }
85
86 if (as < 1)
87 as = 0;
88
89 if ((our_as = as))
90 return 0;
91
92 bgp_routes = 0;
93 bgp_configured = 0; /* set by bgp_start */
94
95 return 1;
96 }
97
98 /* start connection with a peer */
99 int bgp_start(struct bgp_peer *peer, char *name, int as, int keepalive,
100 int hold, int enable)
101 {
102 struct hostent *h;
103 int ibgp;
104 int i;
105 struct bgp_path_attr a;
106 char path_attrs[64];
107 char *p = path_attrs;
108 in_addr_t ip;
109 uint32_t metric = htonl(BGP_METRIC);
110 uint32_t no_export = htonl(BGP_COMMUNITY_NO_EXPORT);
111
112 if (!our_as)
113 return 0;
114
115 if (peer->state != Disabled)
116 bgp_halt(peer);
117
118 snprintf(peer->name, sizeof(peer->name), "%s", name);
119
120 if (!(h = gethostbyname(name)) || h->h_addrtype != AF_INET)
121 {
122 LOG(0, 0, 0, "Can't get address for BGP peer %s (%s)\n",
123 name, h ? "no address" : hstrerror(h_errno));
124
125 return 0;
126 }
127
128 memcpy(&peer->addr, h->h_addr, sizeof(peer->addr));
129 peer->as = as > 0 ? as : our_as;
130 ibgp = peer->as == our_as;
131
132 /* set initial timer values */
133 peer->init_keepalive = keepalive == -1 ? BGP_KEEPALIVE_TIME : keepalive;
134 peer->init_hold = hold == -1 ? BGP_HOLD_TIME : hold;
135
136 if (peer->init_hold < 3)
137 peer->init_hold = 3;
138
139 if (peer->init_keepalive * 3 > peer->init_hold)
140 peer->init_keepalive = peer->init_hold / 3;
141
142 /* clear buffers, go to Idle state */
143 peer->next_state = Idle;
144 bgp_clear(peer);
145
146 /* set initial routing state */
147 peer->routing = enable;
148
149 /* all our routes use the same attributes, so prepare it in advance */
150 if (peer->path_attrs)
151 free(peer->path_attrs);
152
153 peer->path_attr_len = 0;
154
155 /* ORIGIN */
156 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
157 a.code = BGP_PATH_ATTR_CODE_ORIGIN;
158 a.data.s.len = 1;
159 a.data.s.value[0] = BGP_PATH_ATTR_CODE_ORIGIN_IGP;
160
161 #define ADD_ATTRIBUTE() do { \
162 i = BGP_PATH_ATTR_SIZE(a); \
163 memcpy(p, &a, i); \
164 p += i; \
165 peer->path_attr_len += i; } while (0)
166
167 ADD_ATTRIBUTE();
168
169 /* AS_PATH */
170 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
171 a.code = BGP_PATH_ATTR_CODE_AS_PATH;
172 if (ibgp)
173 {
174 /* empty path */
175 a.data.s.len = 0;
176 }
177 else
178 {
179 /* just our AS */
180 struct {
181 uint8_t type;
182 uint8_t len;
183 uint16_t value;
184 } as_path = {
185 BGP_PATH_ATTR_CODE_AS_PATH_AS_SEQUENCE,
186 1,
187 htons(our_as),
188 };
189
190 a.data.s.len = sizeof(as_path);
191 memcpy(&a.data.s.value, &as_path, sizeof(as_path));
192 }
193
194 ADD_ATTRIBUTE();
195
196 /* NEXT_HOP */
197 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
198 a.code = BGP_PATH_ATTR_CODE_NEXT_HOP;
199 ip = my_address; /* we're it */
200 a.data.s.len = sizeof(ip);
201 memcpy(a.data.s.value, &ip, sizeof(ip));
202
203 ADD_ATTRIBUTE();
204
205 /* MULTI_EXIT_DISC */
206 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL;
207 a.code = BGP_PATH_ATTR_CODE_MULTI_EXIT_DISC;
208 a.data.s.len = sizeof(metric);
209 memcpy(a.data.s.value, &metric, sizeof(metric));
210
211 ADD_ATTRIBUTE();
212
213 if (ibgp)
214 {
215 uint32_t local_pref = htonl(BGP_LOCAL_PREF);
216
217 /* LOCAL_PREF */
218 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
219 a.code = BGP_PATH_ATTR_CODE_LOCAL_PREF;
220 a.data.s.len = sizeof(local_pref);
221 memcpy(a.data.s.value, &local_pref, sizeof(local_pref));
222
223 ADD_ATTRIBUTE();
224 }
225
226 /* COMMUNITIES */
227 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL | BGP_PATH_ATTR_FLAG_TRANS;
228 a.code = BGP_PATH_ATTR_CODE_COMMUNITIES;
229 a.data.s.len = sizeof(no_export);
230 memcpy(a.data.s.value, &no_export, sizeof(no_export));
231
232 ADD_ATTRIBUTE();
233
234 if (!(peer->path_attrs = malloc(peer->path_attr_len)))
235 {
236 LOG(0, 0, 0, "Can't allocate path_attrs for %s (%s)\n",
237 name, strerror(errno));
238
239 return 0;
240 }
241
242 memcpy(peer->path_attrs, path_attrs, peer->path_attr_len);
243
244 LOG(4, 0, 0, "Initiating BGP connection to %s (routing %s)\n",
245 name, enable ? "enabled" : "suspended");
246
247 /* we have at least one peer configured */
248 bgp_configured = 1;
249
250 /* connect */
251 return bgp_connect(peer);
252 }
253
254 /* clear counters, timers, routes and buffers; close socket; move to
255 next_state, which may be Disabled or Idle */
256 static void bgp_clear(struct bgp_peer *peer)
257 {
258 if (peer->sock != -1)
259 {
260 close(peer->sock);
261 peer->sock = -1;
262 }
263
264 peer->keepalive_time = 0;
265 peer->expire_time = 0;
266
267 peer->keepalive = peer->init_keepalive;
268 peer->hold = peer->init_hold;
269
270 bgp_free_routes(peer->routes);
271 peer->routes = 0;
272
273 peer->outbuf->packet.header.len = 0;
274 peer->outbuf->done = 0;
275 peer->inbuf->packet.header.len = 0;
276 peer->inbuf->done = 0;
277
278 peer->cli_flag = 0;
279 peer->events = 0;
280
281 if (peer->state != peer->next_state)
282 {
283 peer->state = peer->next_state;
284 peer->state_time = time_now;
285
286 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
287 bgp_state_str(peer->next_state));
288 }
289 }
290
291 /* initiate a clean shutdown */
292 void bgp_stop(struct bgp_peer *peer)
293 {
294 LOG(4, 0, 0, "Terminating BGP connection to %s\n", peer->name);
295 bgp_send_notification(peer, BGP_ERR_CEASE, 0);
296 }
297
298 /* drop connection (if any) and set state to Disabled */
299 void bgp_halt(struct bgp_peer *peer)
300 {
301 LOG(4, 0, 0, "Aborting BGP connection to %s\n", peer->name);
302 peer->next_state = Disabled;
303 bgp_clear(peer);
304 }
305
306 /* drop connection (if any) and set to Idle for connection retry */
307 int bgp_restart(struct bgp_peer *peer)
308 {
309 peer->next_state = Idle;
310 bgp_clear(peer);
311
312 /* restart now */
313 peer->retry_time = time_now;
314 peer->retry_count = 0;
315
316 /* connect */
317 return bgp_connect(peer);
318 }
319
320 static void bgp_set_retry(struct bgp_peer *peer)
321 {
322 if (peer->retry_count++ < BGP_MAX_RETRY)
323 {
324 peer->retry_time = time_now + (BGP_RETRY_BACKOFF * peer->retry_count);
325 peer->next_state = Idle;
326 bgp_clear(peer);
327 }
328 else
329 bgp_halt(peer); /* give up */
330 }
331
332 /* convert ip/mask to CIDR notation */
333 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx)
334 {
335 int i;
336 uint32_t b;
337
338 /* convert to prefix notation */
339 pfx->len = 32;
340 pfx->prefix = ip;
341
342 if (!mask) /* bogus */
343 mask = 0xffffffff;
344
345 for (i = 0; i < 32 && ((b = ntohl(1 << i)), !(mask & b)); i++)
346 {
347 pfx->len--;
348 pfx->prefix &= ~b;
349 }
350 }
351
352 /* insert route into list; sorted */
353 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
354 struct bgp_route_list *new)
355 {
356 struct bgp_route_list *p = head;
357 struct bgp_route_list *e = 0;
358
359 while (p && memcmp(&p->dest, &new->dest, sizeof(p->dest)) < 0)
360 {
361 e = p;
362 p = p->next;
363 }
364
365 if (e)
366 {
367 new->next = e->next;
368 e->next = new;
369 }
370 else
371 {
372 new->next = head;
373 head = new;
374 }
375
376 return head;
377 }
378
379 /* add route to list for peers */
380 /*
381 * Note: this doesn't do route aggregation, nor drop routes if a less
382 * specific match already exists (partly because I'm lazy, but also so
383 * that if that route is later deleted we don't have to be concerned
384 * about adding back the more specific one).
385 */
386 int bgp_add_route(in_addr_t ip, in_addr_t mask)
387 {
388 struct bgp_route_list *r = bgp_routes;
389 struct bgp_route_list add;
390 int i;
391
392 bgp_cidr(ip, mask, &add.dest);
393 add.next = 0;
394
395 /* check for duplicate */
396 while (r)
397 {
398 i = memcmp(&r->dest, &add.dest, sizeof(r->dest));
399 if (!i)
400 return 1; /* already covered */
401
402 if (i > 0)
403 break;
404
405 r = r->next;
406 }
407
408 /* insert into route list; sorted */
409 if (!(r = malloc(sizeof(*r))))
410 {
411 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
412 fmtaddr(add.dest.prefix, 0), add.dest.len, strerror(errno));
413
414 return 0;
415 }
416
417 memcpy(r, &add, sizeof(*r));
418 bgp_routes = bgp_insert_route(bgp_routes, r);
419
420 /* flag established peers for update */
421 for (i = 0; i < BGP_NUM_PEERS; i++)
422 if (bgp_peers[i].state == Established)
423 bgp_peers[i].update_routes = 1;
424
425 LOG(4, 0, 0, "Registered BGP route %s/%d\n",
426 fmtaddr(add.dest.prefix, 0), add.dest.len);
427
428 return 1;
429 }
430
431 /* remove route from list for peers */
432 int bgp_del_route(in_addr_t ip, in_addr_t mask)
433 {
434 struct bgp_route_list *r = bgp_routes;
435 struct bgp_route_list *e = 0;
436 struct bgp_route_list del;
437 int i;
438
439 bgp_cidr(ip, mask, &del.dest);
440 del.next = 0;
441
442 /* find entry in routes list and remove */
443 while (r)
444 {
445 i = memcmp(&r->dest, &del.dest, sizeof(r->dest));
446 if (!i)
447 {
448 if (e)
449 e->next = r->next;
450 else
451 bgp_routes = r->next;
452
453 free(r);
454 break;
455 }
456
457 e = r;
458
459 if (i > 0)
460 r = 0; /* stop */
461 else
462 r = r->next;
463 }
464
465 /* not found */
466 if (!r)
467 return 1;
468
469 /* flag established peers for update */
470 for (i = 0; i < BGP_NUM_PEERS; i++)
471 if (bgp_peers[i].state == Established)
472 bgp_peers[i].update_routes = 1;
473
474 LOG(4, 0, 0, "Removed BGP route %s/%d\n",
475 fmtaddr(del.dest.prefix, 0), del.dest.len);
476
477 return 1;
478 }
479
480 /* enable or disable routing */
481 void bgp_enable_routing(int enable)
482 {
483 int i;
484
485 for (i = 0; i < BGP_NUM_PEERS; i++)
486 {
487 bgp_peers[i].routing = enable;
488
489 /* flag established peers for update */
490 if (bgp_peers[i].state == Established)
491 bgp_peers[i].update_routes = 1;
492 }
493
494 LOG(4, 0, 0, "%s BGP routing\n", enable ? "Enabled" : "Suspended");
495 }
496
497 #ifdef HAVE_EPOLL
498 # include <sys/epoll.h>
499 #else
500 # include "fake_epoll.h"
501 #endif
502
503 /* return a bitmask of the events required to poll this peer's fd */
504 int bgp_set_poll()
505 {
506 int i;
507
508 if (!bgp_configured)
509 return 0;
510
511 for (i = 0; i < BGP_NUM_PEERS; i++)
512 {
513 struct bgp_peer *peer = &bgp_peers[i];
514 int events = 0;
515
516 if (peer->state == Disabled || peer->state == Idle)
517 continue;
518
519 if (peer->inbuf->done < BGP_MAX_PACKET_SIZE)
520 events |= EPOLLIN;
521
522 if (peer->state == Connect || /* connection in progress */
523 peer->update_routes || /* routing updates */
524 peer->outbuf->packet.header.len) /* pending output */
525 events |= EPOLLOUT;
526
527 if (peer->events != events)
528 {
529 struct epoll_event ev;
530
531 ev.events = peer->events = events;
532 ev.data.ptr = &peer->edata;
533 epoll_ctl(epollfd, EPOLL_CTL_MOD, peer->sock, &ev);
534 }
535 }
536
537 return 1;
538 }
539
540 /* process bgp events/timers */
541 int bgp_process(uint32_t events[])
542 {
543 int i;
544
545 if (!bgp_configured)
546 return 0;
547
548 for (i = 0; i < BGP_NUM_PEERS; i++)
549 {
550 struct bgp_peer *peer = &bgp_peers[i];
551
552 if (*peer->name && peer->cli_flag == BGP_CLI_RESTART)
553 {
554 bgp_restart(peer);
555 continue;
556 }
557
558 if (peer->state == Disabled)
559 continue;
560
561 if (peer->cli_flag)
562 {
563 switch (peer->cli_flag)
564 {
565 case BGP_CLI_SUSPEND:
566 if (peer->routing)
567 {
568 peer->routing = 0;
569 if (peer->state == Established)
570 peer->update_routes = 1;
571 }
572
573 break;
574
575 case BGP_CLI_ENABLE:
576 if (!peer->routing)
577 {
578 peer->routing = 1;
579 if (peer->state == Established)
580 peer->update_routes = 1;
581 }
582
583 break;
584 }
585
586 peer->cli_flag = 0;
587 }
588
589 /* handle empty/fill of buffers */
590 if (events[i] & EPOLLOUT)
591 {
592 int r = 1;
593 if (peer->state == Connect)
594 r = bgp_handle_connect(peer);
595 else if (peer->outbuf->packet.header.len)
596 r = bgp_write(peer);
597
598 if (!r)
599 continue;
600 }
601
602 if (events[i] & (EPOLLIN|EPOLLHUP))
603 {
604 if (!bgp_read(peer))
605 continue;
606 }
607
608 /* process input buffer contents */
609 while (peer->inbuf->done >= sizeof(peer->inbuf->packet.header)
610 && !peer->outbuf->packet.header.len) /* may need to queue a response */
611 {
612 if (bgp_handle_input(peer) < 0)
613 continue;
614 }
615
616 /* process pending updates */
617 if (peer->update_routes
618 && !peer->outbuf->packet.header.len) /* ditto */
619 {
620 if (!bgp_send_update(peer))
621 continue;
622 }
623
624 /* process timers */
625 if (peer->state == Established)
626 {
627 if (time_now > peer->expire_time)
628 {
629 LOG(1, 0, 0, "No message from BGP peer %s in %ds\n",
630 peer->name, peer->hold);
631
632 bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0);
633 continue;
634 }
635
636 if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len)
637 bgp_send_keepalive(peer);
638 }
639 else if (peer->state == Idle)
640 {
641 if (time_now > peer->retry_time)
642 bgp_connect(peer);
643 }
644 else if (time_now > peer->state_time + BGP_STATE_TIME)
645 {
646 LOG(1, 0, 0, "%s timer expired for BGP peer %s\n",
647 bgp_state_str(peer->state), peer->name);
648
649 bgp_restart(peer);
650 }
651 }
652
653 return 1;
654 }
655
656 static void bgp_free_routes(struct bgp_route_list *routes)
657 {
658 struct bgp_route_list *tmp;
659
660 while ((tmp = routes))
661 {
662 routes = tmp->next;
663 free(tmp);
664 }
665 }
666
667 char const *bgp_state_str(enum bgp_state state)
668 {
669 switch (state)
670 {
671 case Disabled: return "Disabled";
672 case Idle: return "Idle";
673 case Connect: return "Connect";
674 case Active: return "Active";
675 case OpenSent: return "OpenSent";
676 case OpenConfirm: return "OpenConfirm";
677 case Established: return "Established";
678 }
679
680 return "?";
681 }
682
683 static char const *bgp_msg_type_str(uint8_t type)
684 {
685 switch (type)
686 {
687 case BGP_MSG_OPEN: return "OPEN";
688 case BGP_MSG_UPDATE: return "UPDATE";
689 case BGP_MSG_NOTIFICATION: return "NOTIFICATION";
690 case BGP_MSG_KEEPALIVE: return "KEEPALIVE";
691 }
692
693 return "?";
694 }
695
696 /* attempt to connect to peer */
697 static int bgp_connect(struct bgp_peer *peer)
698 {
699 static int bgp_port = 0;
700 struct sockaddr_in addr;
701 struct epoll_event ev;
702
703 if (!bgp_port)
704 {
705 struct servent *serv;
706 if (!(serv = getservbyname("bgp", "tcp")))
707 {
708 LOG(0, 0, 0, "Can't get bgp service (%s)\n", strerror(errno));
709 return 0;
710 }
711
712 bgp_port = serv->s_port;
713 }
714
715 if ((peer->sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
716 {
717 LOG(0, 0, 0, "Can't create a socket for BGP peer %s (%s)\n",
718 peer->name, strerror(errno));
719
720 peer->state = peer->next_state = Disabled;
721 return 0;
722 }
723
724 /* add to poll set */
725 ev.events = peer->events = EPOLLOUT;
726 ev.data.ptr = &peer->edata;
727 epoll_ctl(epollfd, EPOLL_CTL_ADD, peer->sock, &ev);
728
729 /* set to non-blocking */
730 fcntl(peer->sock, F_SETFL, fcntl(peer->sock, F_GETFL, 0) | O_NONBLOCK);
731
732 /* try connect */
733 memset(&addr, 0, sizeof(addr));
734 addr.sin_family = AF_INET;
735 addr.sin_port = bgp_port;
736 addr.sin_addr.s_addr = peer->addr;
737
738 while (connect(peer->sock, (struct sockaddr *) &addr, sizeof(addr)) == -1)
739 {
740 if (errno == EINTR) /* SIGALARM handler */
741 continue;
742
743 if (errno != EINPROGRESS)
744 {
745 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n",
746 inet_ntoa(addr.sin_addr), strerror(errno));
747
748 bgp_set_retry(peer);
749 return 0;
750 }
751
752 peer->state = Connect;
753 peer->state_time = time_now;
754
755 LOG(4, 0, 0, "BGP peer %s: state Connect\n", peer->name);
756 return 1;
757 }
758
759 peer->state = Active;
760 peer->state_time = time_now;
761 peer->retry_time = peer->retry_count = 0;
762
763 LOG(4, 0, 0, "BGP peer %s: state Active\n", inet_ntoa(addr.sin_addr));
764
765 peer->handle_ipv6_routes = 0;
766
767 return bgp_send_open(peer);
768 }
769
770 /* complete partial connection (state = Connect) */
771 static int bgp_handle_connect(struct bgp_peer *peer)
772 {
773 int err = 0;
774 socklen_t len = sizeof(int);
775 getsockopt(peer->sock, SOL_SOCKET, SO_ERROR, &err, &len);
776 if (err)
777 {
778 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n", peer->name,
779 strerror(err));
780
781 bgp_set_retry(peer);
782 return 0;
783 }
784
785 peer->state = Active;
786 peer->state_time = time_now;
787
788 LOG(4, 0, 0, "BGP peer %s: state Active\n", peer->name);
789
790 peer->handle_ipv6_routes = 0;
791
792 return bgp_send_open(peer);
793 }
794
795 /* initiate a write */
796 static int bgp_write(struct bgp_peer *peer)
797 {
798 int len = htons(peer->outbuf->packet.header.len);
799 int r;
800
801 while ((r = write(peer->sock, &peer->outbuf->packet + peer->outbuf->done,
802 len - peer->outbuf->done)) == -1)
803 {
804 if (errno == EINTR)
805 continue;
806
807 if (errno == EAGAIN)
808 return 1;
809
810 if (errno == EPIPE)
811 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
812 else
813 LOG(1, 0, 0, "Can't write to BGP peer %s (%s)\n", peer->name,
814 strerror(errno));
815
816 bgp_set_retry(peer);
817 return 0;
818 }
819
820 if (r < len)
821 {
822 peer->outbuf->done += r;
823 return 1;
824 }
825
826 LOG(4, 0, 0, "Sent %s to BGP peer %s\n",
827 bgp_msg_type_str(peer->outbuf->packet.header.type), peer->name);
828
829 peer->outbuf->packet.header.len = 0;
830 peer->outbuf->done = 0;
831
832 if (peer->state == Established)
833 peer->keepalive_time = time_now + peer->keepalive;
834
835 if (peer->state != peer->next_state)
836 {
837 if (peer->next_state == Disabled || peer->next_state == Idle)
838 {
839 bgp_clear(peer);
840 return 0;
841 }
842
843 peer->state = peer->next_state;
844 peer->state_time = time_now;
845
846 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
847 bgp_state_str(peer->state));
848 }
849
850 return 1;
851 }
852
853 /* initiate a read */
854 static int bgp_read(struct bgp_peer *peer)
855 {
856 int r;
857
858 while ((r = read(peer->sock, &peer->inbuf->packet + peer->inbuf->done,
859 BGP_MAX_PACKET_SIZE - peer->inbuf->done)) < 1)
860 {
861 if (!r)
862 {
863 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
864 }
865 else
866 {
867 if (errno == EINTR)
868 continue;
869
870 if (errno == EAGAIN)
871 return 1;
872
873 LOG(1, 0, 0, "Can't read from BGP peer %s (%s)\n", peer->name,
874 strerror(errno));
875 }
876
877 bgp_set_retry(peer);
878 return 0;
879 }
880
881 peer->inbuf->done += r;
882 return 1;
883 }
884
885 /* process buffered packets */
886 static int bgp_handle_input(struct bgp_peer *peer)
887 {
888 struct bgp_packet *p = &peer->inbuf->packet;
889 int len = ntohs(p->header.len);
890
891 if (len > BGP_MAX_PACKET_SIZE)
892 {
893 LOG(1, 0, 0, "Bad header length from BGP %s\n", peer->name);
894 bgp_send_notification(peer, BGP_ERR_HEADER, BGP_ERR_HDR_BAD_LEN);
895 return 0;
896 }
897
898 if (peer->inbuf->done < len)
899 return 0;
900
901 LOG(4, 0, 0, "Received %s from BGP peer %s\n",
902 bgp_msg_type_str(p->header.type), peer->name);
903
904 switch (p->header.type)
905 {
906 case BGP_MSG_OPEN:
907 {
908 struct bgp_data_open data;
909 int hold;
910 int i;
911 off_t param_offset, capability_offset;
912 struct bgp_opt_param *param;
913 uint8_t capabilities_len;
914 char *capabilities = NULL;
915 struct bgp_capability *capability;
916 struct bgp_mp_cap_param *mp_cap;
917
918 for (i = 0; i < sizeof(p->header.marker); i++)
919 {
920 if ((unsigned char) p->header.marker[i] != 0xff)
921 {
922 LOG(1, 0, 0, "Invalid marker from BGP peer %s\n",
923 peer->name);
924
925 bgp_send_notification(peer, BGP_ERR_HEADER,
926 BGP_ERR_HDR_NOT_SYNC);
927
928 return 0;
929 }
930 }
931
932 if (peer->state != OpenSent)
933 {
934 LOG(1, 0, 0, "OPEN from BGP peer %s in %s state\n",
935 peer->name, bgp_state_str(peer->state));
936
937 bgp_send_notification(peer, BGP_ERR_FSM, 0);
938 return 0;
939 }
940
941 memcpy(&data, p->data, len - sizeof(p->header));
942
943 if (data.version != BGP_VERSION)
944 {
945 LOG(1, 0, 0, "Bad version (%d) sent by BGP peer %s\n",
946 (int) data.version, peer->name);
947
948 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_VERSION);
949 return 0;
950 }
951
952 if (ntohs(data.as) != peer->as)
953 {
954 LOG(1, 0, 0, "Bad AS sent by BGP peer %s (got %d, "
955 "expected %d)\n", peer->name, (int) htons(data.as),
956 (int) peer->as);
957
958 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_BAD_AS);
959 return 0;
960 }
961
962 if ((hold = ntohs(data.hold_time)) < 3)
963 {
964 LOG(1, 0, 0, "Bad hold time (%d) from BGP peer %s\n",
965 hold, peer->name);
966
967 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_HOLD_TIME);
968 return 0;
969 }
970
971 /* pick lowest hold time */
972 if (hold < peer->hold)
973 peer->hold = hold;
974
975 /* adjust our keepalive based on negotiated hold value */
976 if (peer->keepalive * 3 > peer->hold)
977 peer->keepalive = peer->hold / 3;
978
979 /* check for optional parameters */
980 /* 2 is for the size of type + len (both uint8_t) */
981 for (param_offset = 0;
982 param_offset < data.opt_len;
983 param_offset += 2 + param->len)
984 {
985 param = (struct bgp_opt_param *)(&data.opt_params + param_offset);
986
987 /* sensible check */
988 if (data.opt_len - param_offset < 2
989 || param->len > data.opt_len - param_offset - 2)
990 {
991 LOG(1, 0, 0, "Malformed Optional Parameter list from BGP peer %s\n",
992 peer->name);
993
994 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_UNSPEC);
995 return 0;
996 }
997
998 /* we know only one parameter type */
999 if (param->type != BGP_PARAM_TYPE_CAPABILITY)
1000 {
1001 LOG(1, 0, 0, "Unsupported Optional Parameter type %d from BGP peer %s\n",
1002 param->type, peer->name);
1003
1004 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_UNSUP_PARAM);
1005 return 0;
1006 }
1007
1008 capabilities_len = param->len;
1009 capabilities = (char *)&param->value;
1010 }
1011
1012 /* look for BGP multiprotocol capability */
1013 if (capabilities)
1014 {
1015 for (capability_offset = 0;
1016 capability_offset < capabilities_len;
1017 capability_offset += 2 + capability->len)
1018 {
1019 capability = (struct bgp_capability *)(capabilities + capability_offset);
1020
1021 /* sensible check */
1022 if (capabilities_len - capability_offset < 2
1023 || capability->len > capabilities_len - capability_offset - 2)
1024 {
1025 LOG(1, 0, 0, "Malformed Capabilities list from BGP peer %s\n",
1026 peer->name);
1027
1028 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_UNSPEC);
1029 return 0;
1030 }
1031
1032 /* we only know one capability code */
1033 if (capability->code != BGP_CAP_CODE_MP
1034 && capability->len != sizeof(struct bgp_mp_cap_param))
1035 {
1036 LOG(4, 0, 0, "Unsupported Capability code %d from BGP peer %s\n",
1037 capability->code, peer->name);
1038
1039 bgp_send_notification_full(peer, BGP_ERR_OPEN, BGP_ERR_OPN_UNSUP_CAP,
1040 (char *)capability, 2 + capability->len);
1041 /* we don't terminate, still; we just jump to the next one */
1042 continue;
1043 }
1044
1045 mp_cap = (struct bgp_mp_cap_param *)&capability->value;
1046 /* the only <AFI, SAFI> tuple we support */
1047 if (ntohs(mp_cap->afi) != AF_INET6 && mp_cap->safi != BGP_MP_SAFI_UNICAST)
1048 {
1049 LOG(4, 0, 0, "Unsupported multiprotocol AFI %d and SAFI %d from BGP peer %s\n",
1050 mp_cap->afi, mp_cap->safi, peer->name);
1051
1052 bgp_send_notification_full(peer, BGP_ERR_OPEN, BGP_ERR_OPN_UNSUP_CAP,
1053 (char *)capability, 2 + capability->len);
1054 /* we don't terminate, still; we just jump to the next one */
1055 continue;
1056 }
1057
1058 peer->handle_ipv6_routes = 1;
1059 }
1060 }
1061
1062 /* next transition requires an exchange of keepalives */
1063 bgp_send_keepalive(peer);
1064 }
1065
1066 break;
1067
1068 case BGP_MSG_KEEPALIVE:
1069 if (peer->state == OpenConfirm)
1070 {
1071 peer->state = peer->next_state = Established;
1072 peer->state_time = time_now;
1073 peer->keepalive_time = time_now + peer->keepalive;
1074 peer->update_routes = 1;
1075 peer->retry_count = 0;
1076 peer->retry_time = 0;
1077
1078 LOG(4, 0, 0, "BGP peer %s: state Established\n", peer->name);
1079 }
1080
1081 break;
1082
1083 case BGP_MSG_NOTIFICATION:
1084 if (len > sizeof(p->header))
1085 {
1086 struct bgp_data_notification *notification =
1087 (struct bgp_data_notification *) p->data;
1088
1089 if (notification->error_code == BGP_ERR_CEASE)
1090 {
1091 LOG(4, 0, 0, "BGP peer %s sent CEASE\n", peer->name);
1092 bgp_restart(peer);
1093 return 0;
1094 }
1095
1096 /* FIXME: should handle more notifications */
1097 LOG(4, 0, 0, "BGP peer %s sent unhandled NOTIFICATION %d\n",
1098 peer->name, (int) notification->error_code);
1099 }
1100
1101 break;
1102 }
1103
1104 /* reset timer */
1105 peer->expire_time = time_now + peer->hold;
1106
1107 /* see if there's another message in the same packet/buffer */
1108 if (peer->inbuf->done > len)
1109 {
1110 peer->inbuf->done -= len;
1111 memmove(p, (char *) p + len, peer->inbuf->done);
1112 }
1113 else
1114 {
1115 peer->inbuf->packet.header.len = 0;
1116 peer->inbuf->done = 0;
1117 }
1118
1119 return peer->inbuf->done;
1120 }
1121
1122 /* send/buffer OPEN message */
1123 static int bgp_send_open(struct bgp_peer *peer)
1124 {
1125 struct bgp_data_open data;
1126 struct bgp_mp_cap_param mp_ipv6 = { htons(AF_INET6), 0, BGP_MP_SAFI_UNICAST };
1127 struct bgp_capability cap_mp_ipv6;
1128 struct bgp_opt_param param_cap_mp_ipv6;
1129 uint16_t len = sizeof(peer->outbuf->packet.header);
1130
1131 memset(peer->outbuf->packet.header.marker, 0xff,
1132 sizeof(peer->outbuf->packet.header.marker));
1133
1134 peer->outbuf->packet.header.type = BGP_MSG_OPEN;
1135
1136 data.version = BGP_VERSION;
1137 data.as = htons(our_as);
1138 data.hold_time = htons(peer->hold);
1139 data.identifier = my_address;
1140
1141 /* construct the param and capability */
1142 cap_mp_ipv6.code = BGP_CAP_CODE_MP;
1143 cap_mp_ipv6.len = sizeof(mp_ipv6);
1144 memcpy(&cap_mp_ipv6.value, &mp_ipv6, cap_mp_ipv6.len);
1145
1146 param_cap_mp_ipv6.type = BGP_PARAM_TYPE_CAPABILITY;
1147 param_cap_mp_ipv6.len = 2 + sizeof(mp_ipv6);
1148 memcpy(&param_cap_mp_ipv6.value, &cap_mp_ipv6, param_cap_mp_ipv6.len);
1149
1150 data.opt_len = 2 + param_cap_mp_ipv6.len;
1151 memcpy(&data.opt_params, &param_cap_mp_ipv6, data.opt_len);
1152
1153 memcpy(peer->outbuf->packet.data, &data, BGP_DATA_OPEN_SIZE);
1154 len += BGP_DATA_OPEN_SIZE;
1155
1156 peer->outbuf->packet.header.len = htons(len);
1157 peer->outbuf->done = 0;
1158 peer->next_state = OpenSent;
1159
1160 return bgp_write(peer);
1161 }
1162
1163 /* send/buffer KEEPALIVE message */
1164 static int bgp_send_keepalive(struct bgp_peer *peer)
1165 {
1166 memset(peer->outbuf->packet.header.marker, 0xff,
1167 sizeof(peer->outbuf->packet.header.marker));
1168
1169 peer->outbuf->packet.header.type = BGP_MSG_KEEPALIVE;
1170 peer->outbuf->packet.header.len =
1171 htons(sizeof(peer->outbuf->packet.header));
1172
1173 peer->outbuf->done = 0;
1174 peer->next_state = (peer->state == OpenSent) ? OpenConfirm : peer->state;
1175
1176 return bgp_write(peer);
1177 }
1178
1179 /* send/buffer UPDATE message */
1180 static int bgp_send_update(struct bgp_peer *peer)
1181 {
1182 uint16_t unf_len = 0;
1183 uint16_t attr_len;
1184 uint16_t len = sizeof(peer->outbuf->packet.header);
1185 struct bgp_route_list *have = peer->routes;
1186 struct bgp_route_list *want = peer->routing ? bgp_routes : 0;
1187 struct bgp_route_list *e = 0;
1188 struct bgp_route_list *add = 0;
1189 int s;
1190
1191 char *data = (char *) &peer->outbuf->packet.data;
1192
1193 /* need leave room for attr_len, bgp_path_attrs and one prefix */
1194 char *max = (char *) &peer->outbuf->packet.data
1195 + sizeof(peer->outbuf->packet.data)
1196 - sizeof(attr_len) - peer->path_attr_len - sizeof(struct bgp_ip_prefix);
1197
1198 /* skip over unf_len */
1199 data += sizeof(unf_len);
1200 len += sizeof(unf_len);
1201
1202 memset(peer->outbuf->packet.header.marker, 0xff,
1203 sizeof(peer->outbuf->packet.header.marker));
1204
1205 peer->outbuf->packet.header.type = BGP_MSG_UPDATE;
1206
1207 peer->update_routes = 0; /* tentatively clear */
1208
1209 /* find differences */
1210 while ((have || want) && data < (max - sizeof(struct bgp_ip_prefix)))
1211 {
1212 if (have)
1213 s = want
1214 ? memcmp(&have->dest, &want->dest, sizeof(have->dest))
1215 : -1;
1216 else
1217 s = 1;
1218
1219 if (s < 0) /* found one to delete */
1220 {
1221 struct bgp_route_list *tmp = have;
1222 have = have->next;
1223
1224 s = BGP_IP_PREFIX_SIZE(tmp->dest);
1225 memcpy(data, &tmp->dest, s);
1226 data += s;
1227 unf_len += s;
1228 len += s;
1229
1230 LOG(5, 0, 0, "Withdrawing route %s/%d from BGP peer %s\n",
1231 fmtaddr(tmp->dest.prefix, 0), tmp->dest.len, peer->name);
1232
1233 free(tmp);
1234
1235 if (e)
1236 e->next = have;
1237 else
1238 peer->routes = have;
1239 }
1240 else
1241 {
1242 if (!s) /* same */
1243 {
1244 e = have; /* stash the last found to relink above */
1245 have = have->next;
1246 want = want->next;
1247 }
1248 else if (s > 0) /* addition reqd. */
1249 {
1250 if (add)
1251 {
1252 peer->update_routes = 1; /* only one add per packet */
1253 if (!have)
1254 break;
1255 }
1256 else
1257 add = want;
1258
1259 if (want)
1260 want = want->next;
1261 }
1262 }
1263 }
1264
1265 if (have || want)
1266 peer->update_routes = 1; /* more to do */
1267
1268 /* anything changed? */
1269 if (!(unf_len || add))
1270 return 1;
1271
1272 /* go back and insert unf_len */
1273 unf_len = htons(unf_len);
1274 memcpy(&peer->outbuf->packet.data, &unf_len, sizeof(unf_len));
1275
1276 if (add)
1277 {
1278 if (!(e = malloc(sizeof(*e))))
1279 {
1280 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
1281 fmtaddr(add->dest.prefix, 0), add->dest.len, strerror(errno));
1282
1283 return 0;
1284 }
1285
1286 memcpy(e, add, sizeof(*e));
1287 e->next = 0;
1288 peer->routes = bgp_insert_route(peer->routes, e);
1289
1290 attr_len = htons(peer->path_attr_len);
1291 memcpy(data, &attr_len, sizeof(attr_len));
1292 data += sizeof(attr_len);
1293 len += sizeof(attr_len);
1294
1295 memcpy(data, peer->path_attrs, peer->path_attr_len);
1296 data += peer->path_attr_len;
1297 len += peer->path_attr_len;
1298
1299 s = BGP_IP_PREFIX_SIZE(add->dest);
1300 memcpy(data, &add->dest, s);
1301 data += s;
1302 len += s;
1303
1304 LOG(5, 0, 0, "Advertising route %s/%d to BGP peer %s\n",
1305 fmtaddr(add->dest.prefix, 0), add->dest.len, peer->name);
1306 }
1307 else
1308 {
1309 attr_len = 0;
1310 memcpy(data, &attr_len, sizeof(attr_len));
1311 data += sizeof(attr_len);
1312 len += sizeof(attr_len);
1313 }
1314
1315 peer->outbuf->packet.header.len = htons(len);
1316 peer->outbuf->done = 0;
1317
1318 return bgp_write(peer);
1319 }
1320
1321 /* send/buffer NOTIFICATION message */
1322 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
1323 uint8_t subcode)
1324 {
1325 return bgp_send_notification_full(peer, code, subcode, NULL, 0);
1326 }
1327
1328 static int bgp_send_notification_full(struct bgp_peer *peer, uint8_t code,
1329 uint8_t subcode, char *notification_data, uint16_t data_len)
1330 {
1331 struct bgp_data_notification data;
1332 uint16_t len = 0;
1333
1334 data.error_code = code;
1335 len += sizeof(data.error_code);
1336
1337 data.error_subcode = subcode;
1338 len += sizeof(data.error_code);
1339
1340 memcpy(data.data, notification_data, data_len);
1341 len += data_len;
1342
1343 memset(peer->outbuf->packet.header.marker, 0xff,
1344 sizeof(peer->outbuf->packet.header.marker));
1345
1346 peer->outbuf->packet.header.type = BGP_MSG_NOTIFICATION;
1347 peer->outbuf->packet.header.len =
1348 htons(sizeof(peer->outbuf->packet.header) + len);
1349
1350 memcpy(peer->outbuf->packet.data, &data, len);
1351
1352 peer->outbuf->done = 0;
1353 peer->next_state = code == BGP_ERR_CEASE ? Disabled : Idle;
1354
1355 /* we're dying; ignore any pending input */
1356 peer->inbuf->packet.header.len = 0;
1357 peer->inbuf->done = 0;
1358
1359 return bgp_write(peer);
1360 }