Move NEXT_HOP attribute so that we can exclude it.
[l2tpns.git] / bgp.c
1 /*
2 * BGPv4
3 * Used to advertise routes for upstream (l2tp port, rather than gratiutious
4 * arp) and downstream--allowing routers to load-balance both.
5 *
6 * Implementation limitations:
7 * - We never listen for incoming connections (session always initiated by us).
8 * - Any routes advertised by the peer are accepted, but ignored.
9 * - No password support; neither RFC1771 (which no-one seems to do anyway)
10 * nor RFC2385 (which requires a kernel patch on 2.4 kernels).
11 */
12
13 char const *cvs_id_bgp = "$Id: bgp.c,v 1.12 2005/09/02 23:39:36 bodea Exp $";
14
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <string.h>
18 #include <time.h>
19 #include <errno.h>
20 #include <sys/socket.h>
21 #include <netinet/in.h>
22 #include <arpa/inet.h>
23 #include <netdb.h>
24 #include <fcntl.h>
25
26 #include "l2tpns.h"
27 #include "bgp.h"
28 #include "util.h"
29
30 static void bgp_clear(struct bgp_peer *peer);
31 static void bgp_set_retry(struct bgp_peer *peer);
32 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx);
33 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
34 struct bgp_route_list *new);
35
36 static void bgp_free_routes(struct bgp_route_list *routes);
37 static char const *bgp_msg_type_str(uint8_t type);
38 static int bgp_connect(struct bgp_peer *peer);
39 static int bgp_handle_connect(struct bgp_peer *peer);
40 static int bgp_write(struct bgp_peer *peer);
41 static int bgp_read(struct bgp_peer *peer);
42 static int bgp_handle_input(struct bgp_peer *peer);
43 static int bgp_send_open(struct bgp_peer *peer);
44 static int bgp_send_keepalive(struct bgp_peer *peer);
45 static int bgp_send_update(struct bgp_peer *peer);
46 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
47 uint8_t subcode);
48 static int bgp_send_notification_full(struct bgp_peer *peer, uint8_t code,
49 uint8_t subcode, char *notification_data, uint16_t data_len);
50
51 static uint16_t our_as;
52 static struct bgp_route_list *bgp_routes = 0;
53
54 int bgp_configured = 0;
55 struct bgp_peer *bgp_peers = 0;
56
57 /* prepare peer structure, globals */
58 int bgp_setup(int as)
59 {
60 int i;
61 struct bgp_peer *peer;
62
63 for (i = 0; i < BGP_NUM_PEERS; i++)
64 {
65 peer = &bgp_peers[i];
66 memset(peer, 0, sizeof(*peer));
67
68 peer->addr = INADDR_NONE;
69 peer->sock = -1;
70 peer->state = peer->next_state = Disabled;
71
72 if (!((peer->outbuf = malloc(sizeof(*peer->outbuf)))
73 && (peer->inbuf = malloc(sizeof(*peer->inbuf)))))
74 {
75 LOG(0, 0, 0, "Can't allocate buffers for bgp peer (%s)\n",
76 strerror(errno));
77
78 return 0;
79 }
80
81 peer->edata.type = FD_TYPE_BGP;
82 peer->edata.index = i;
83 peer->events = 0;
84 }
85
86 if (as < 1)
87 as = 0;
88
89 if ((our_as = as))
90 return 0;
91
92 bgp_routes = 0;
93 bgp_configured = 0; /* set by bgp_start */
94
95 return 1;
96 }
97
98 /* start connection with a peer */
99 int bgp_start(struct bgp_peer *peer, char *name, int as, int keepalive,
100 int hold, int enable)
101 {
102 struct hostent *h;
103 int ibgp;
104 int i;
105 struct bgp_path_attr a;
106 char path_attrs[64];
107 char *p = path_attrs;
108 in_addr_t ip;
109 uint32_t metric = htonl(BGP_METRIC);
110 uint32_t no_export = htonl(BGP_COMMUNITY_NO_EXPORT);
111
112 if (!our_as)
113 return 0;
114
115 if (peer->state != Disabled)
116 bgp_halt(peer);
117
118 snprintf(peer->name, sizeof(peer->name), "%s", name);
119
120 if (!(h = gethostbyname(name)) || h->h_addrtype != AF_INET)
121 {
122 LOG(0, 0, 0, "Can't get address for BGP peer %s (%s)\n",
123 name, h ? "no address" : hstrerror(h_errno));
124
125 return 0;
126 }
127
128 memcpy(&peer->addr, h->h_addr, sizeof(peer->addr));
129 peer->as = as > 0 ? as : our_as;
130 ibgp = peer->as == our_as;
131
132 /* set initial timer values */
133 peer->init_keepalive = keepalive == -1 ? BGP_KEEPALIVE_TIME : keepalive;
134 peer->init_hold = hold == -1 ? BGP_HOLD_TIME : hold;
135
136 if (peer->init_hold < 3)
137 peer->init_hold = 3;
138
139 if (peer->init_keepalive * 3 > peer->init_hold)
140 peer->init_keepalive = peer->init_hold / 3;
141
142 /* clear buffers, go to Idle state */
143 peer->next_state = Idle;
144 bgp_clear(peer);
145
146 /* set initial routing state */
147 peer->routing = enable;
148
149 /* all our routes use the same attributes, so prepare it in advance */
150 if (peer->path_attrs)
151 free(peer->path_attrs);
152
153 peer->path_attr_len = 0;
154
155 /* ORIGIN */
156 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
157 a.code = BGP_PATH_ATTR_CODE_ORIGIN;
158 a.data.s.len = 1;
159 a.data.s.value[0] = BGP_PATH_ATTR_CODE_ORIGIN_IGP;
160
161 #define ADD_ATTRIBUTE() do { \
162 i = BGP_PATH_ATTR_SIZE(a); \
163 memcpy(p, &a, i); \
164 p += i; \
165 peer->path_attr_len += i; } while (0)
166
167 ADD_ATTRIBUTE();
168
169 /* AS_PATH */
170 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
171 a.code = BGP_PATH_ATTR_CODE_AS_PATH;
172 if (ibgp)
173 {
174 /* empty path */
175 a.data.s.len = 0;
176 }
177 else
178 {
179 /* just our AS */
180 struct {
181 uint8_t type;
182 uint8_t len;
183 uint16_t value;
184 } as_path = {
185 BGP_PATH_ATTR_CODE_AS_PATH_AS_SEQUENCE,
186 1,
187 htons(our_as),
188 };
189
190 a.data.s.len = sizeof(as_path);
191 memcpy(&a.data.s.value, &as_path, sizeof(as_path));
192 }
193
194 ADD_ATTRIBUTE();
195
196 /* MULTI_EXIT_DISC */
197 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL;
198 a.code = BGP_PATH_ATTR_CODE_MULTI_EXIT_DISC;
199 a.data.s.len = sizeof(metric);
200 memcpy(a.data.s.value, &metric, sizeof(metric));
201
202 ADD_ATTRIBUTE();
203
204 if (ibgp)
205 {
206 uint32_t local_pref = htonl(BGP_LOCAL_PREF);
207
208 /* LOCAL_PREF */
209 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
210 a.code = BGP_PATH_ATTR_CODE_LOCAL_PREF;
211 a.data.s.len = sizeof(local_pref);
212 memcpy(a.data.s.value, &local_pref, sizeof(local_pref));
213
214 ADD_ATTRIBUTE();
215 }
216
217 /* COMMUNITIES */
218 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL | BGP_PATH_ATTR_FLAG_TRANS;
219 a.code = BGP_PATH_ATTR_CODE_COMMUNITIES;
220 a.data.s.len = sizeof(no_export);
221 memcpy(a.data.s.value, &no_export, sizeof(no_export));
222
223 ADD_ATTRIBUTE();
224
225 /* remember the len before adding NEXT_HOP */
226 peer->path_attr_len_without_nexthop = peer->path_attr_len;
227
228 /* NEXT_HOP */
229 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
230 a.code = BGP_PATH_ATTR_CODE_NEXT_HOP;
231 ip = my_address; /* we're it */
232 a.data.s.len = sizeof(ip);
233 memcpy(a.data.s.value, &ip, sizeof(ip));
234
235 ADD_ATTRIBUTE();
236
237 if (!(peer->path_attrs = malloc(peer->path_attr_len)))
238 {
239 LOG(0, 0, 0, "Can't allocate path_attrs for %s (%s)\n",
240 name, strerror(errno));
241
242 return 0;
243 }
244
245 memcpy(peer->path_attrs, path_attrs, peer->path_attr_len);
246
247 LOG(4, 0, 0, "Initiating BGP connection to %s (routing %s)\n",
248 name, enable ? "enabled" : "suspended");
249
250 /* we have at least one peer configured */
251 bgp_configured = 1;
252
253 /* connect */
254 return bgp_connect(peer);
255 }
256
257 /* clear counters, timers, routes and buffers; close socket; move to
258 next_state, which may be Disabled or Idle */
259 static void bgp_clear(struct bgp_peer *peer)
260 {
261 if (peer->sock != -1)
262 {
263 close(peer->sock);
264 peer->sock = -1;
265 }
266
267 peer->keepalive_time = 0;
268 peer->expire_time = 0;
269
270 peer->keepalive = peer->init_keepalive;
271 peer->hold = peer->init_hold;
272
273 bgp_free_routes(peer->routes);
274 peer->routes = 0;
275
276 peer->outbuf->packet.header.len = 0;
277 peer->outbuf->done = 0;
278 peer->inbuf->packet.header.len = 0;
279 peer->inbuf->done = 0;
280
281 peer->cli_flag = 0;
282 peer->events = 0;
283
284 if (peer->state != peer->next_state)
285 {
286 peer->state = peer->next_state;
287 peer->state_time = time_now;
288
289 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
290 bgp_state_str(peer->next_state));
291 }
292 }
293
294 /* initiate a clean shutdown */
295 void bgp_stop(struct bgp_peer *peer)
296 {
297 LOG(4, 0, 0, "Terminating BGP connection to %s\n", peer->name);
298 bgp_send_notification(peer, BGP_ERR_CEASE, 0);
299 }
300
301 /* drop connection (if any) and set state to Disabled */
302 void bgp_halt(struct bgp_peer *peer)
303 {
304 LOG(4, 0, 0, "Aborting BGP connection to %s\n", peer->name);
305 peer->next_state = Disabled;
306 bgp_clear(peer);
307 }
308
309 /* drop connection (if any) and set to Idle for connection retry */
310 int bgp_restart(struct bgp_peer *peer)
311 {
312 peer->next_state = Idle;
313 bgp_clear(peer);
314
315 /* restart now */
316 peer->retry_time = time_now;
317 peer->retry_count = 0;
318
319 /* connect */
320 return bgp_connect(peer);
321 }
322
323 static void bgp_set_retry(struct bgp_peer *peer)
324 {
325 if (peer->retry_count++ < BGP_MAX_RETRY)
326 {
327 peer->retry_time = time_now + (BGP_RETRY_BACKOFF * peer->retry_count);
328 peer->next_state = Idle;
329 bgp_clear(peer);
330 }
331 else
332 bgp_halt(peer); /* give up */
333 }
334
335 /* convert ip/mask to CIDR notation */
336 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx)
337 {
338 int i;
339 uint32_t b;
340
341 /* convert to prefix notation */
342 pfx->len = 32;
343 pfx->prefix = ip;
344
345 if (!mask) /* bogus */
346 mask = 0xffffffff;
347
348 for (i = 0; i < 32 && ((b = ntohl(1 << i)), !(mask & b)); i++)
349 {
350 pfx->len--;
351 pfx->prefix &= ~b;
352 }
353 }
354
355 /* insert route into list; sorted */
356 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
357 struct bgp_route_list *new)
358 {
359 struct bgp_route_list *p = head;
360 struct bgp_route_list *e = 0;
361
362 while (p && memcmp(&p->dest, &new->dest, sizeof(p->dest)) < 0)
363 {
364 e = p;
365 p = p->next;
366 }
367
368 if (e)
369 {
370 new->next = e->next;
371 e->next = new;
372 }
373 else
374 {
375 new->next = head;
376 head = new;
377 }
378
379 return head;
380 }
381
382 /* add route to list for peers */
383 /*
384 * Note: this doesn't do route aggregation, nor drop routes if a less
385 * specific match already exists (partly because I'm lazy, but also so
386 * that if that route is later deleted we don't have to be concerned
387 * about adding back the more specific one).
388 */
389 int bgp_add_route(in_addr_t ip, in_addr_t mask)
390 {
391 struct bgp_route_list *r = bgp_routes;
392 struct bgp_route_list add;
393 int i;
394
395 bgp_cidr(ip, mask, &add.dest);
396 add.next = 0;
397
398 /* check for duplicate */
399 while (r)
400 {
401 i = memcmp(&r->dest, &add.dest, sizeof(r->dest));
402 if (!i)
403 return 1; /* already covered */
404
405 if (i > 0)
406 break;
407
408 r = r->next;
409 }
410
411 /* insert into route list; sorted */
412 if (!(r = malloc(sizeof(*r))))
413 {
414 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
415 fmtaddr(add.dest.prefix, 0), add.dest.len, strerror(errno));
416
417 return 0;
418 }
419
420 memcpy(r, &add, sizeof(*r));
421 bgp_routes = bgp_insert_route(bgp_routes, r);
422
423 /* flag established peers for update */
424 for (i = 0; i < BGP_NUM_PEERS; i++)
425 if (bgp_peers[i].state == Established)
426 bgp_peers[i].update_routes = 1;
427
428 LOG(4, 0, 0, "Registered BGP route %s/%d\n",
429 fmtaddr(add.dest.prefix, 0), add.dest.len);
430
431 return 1;
432 }
433
434 /* remove route from list for peers */
435 int bgp_del_route(in_addr_t ip, in_addr_t mask)
436 {
437 struct bgp_route_list *r = bgp_routes;
438 struct bgp_route_list *e = 0;
439 struct bgp_route_list del;
440 int i;
441
442 bgp_cidr(ip, mask, &del.dest);
443 del.next = 0;
444
445 /* find entry in routes list and remove */
446 while (r)
447 {
448 i = memcmp(&r->dest, &del.dest, sizeof(r->dest));
449 if (!i)
450 {
451 if (e)
452 e->next = r->next;
453 else
454 bgp_routes = r->next;
455
456 free(r);
457 break;
458 }
459
460 e = r;
461
462 if (i > 0)
463 r = 0; /* stop */
464 else
465 r = r->next;
466 }
467
468 /* not found */
469 if (!r)
470 return 1;
471
472 /* flag established peers for update */
473 for (i = 0; i < BGP_NUM_PEERS; i++)
474 if (bgp_peers[i].state == Established)
475 bgp_peers[i].update_routes = 1;
476
477 LOG(4, 0, 0, "Removed BGP route %s/%d\n",
478 fmtaddr(del.dest.prefix, 0), del.dest.len);
479
480 return 1;
481 }
482
483 /* enable or disable routing */
484 void bgp_enable_routing(int enable)
485 {
486 int i;
487
488 for (i = 0; i < BGP_NUM_PEERS; i++)
489 {
490 bgp_peers[i].routing = enable;
491
492 /* flag established peers for update */
493 if (bgp_peers[i].state == Established)
494 bgp_peers[i].update_routes = 1;
495 }
496
497 LOG(4, 0, 0, "%s BGP routing\n", enable ? "Enabled" : "Suspended");
498 }
499
500 #ifdef HAVE_EPOLL
501 # include <sys/epoll.h>
502 #else
503 # include "fake_epoll.h"
504 #endif
505
506 /* return a bitmask of the events required to poll this peer's fd */
507 int bgp_set_poll()
508 {
509 int i;
510
511 if (!bgp_configured)
512 return 0;
513
514 for (i = 0; i < BGP_NUM_PEERS; i++)
515 {
516 struct bgp_peer *peer = &bgp_peers[i];
517 int events = 0;
518
519 if (peer->state == Disabled || peer->state == Idle)
520 continue;
521
522 if (peer->inbuf->done < BGP_MAX_PACKET_SIZE)
523 events |= EPOLLIN;
524
525 if (peer->state == Connect || /* connection in progress */
526 peer->update_routes || /* routing updates */
527 peer->outbuf->packet.header.len) /* pending output */
528 events |= EPOLLOUT;
529
530 if (peer->events != events)
531 {
532 struct epoll_event ev;
533
534 ev.events = peer->events = events;
535 ev.data.ptr = &peer->edata;
536 epoll_ctl(epollfd, EPOLL_CTL_MOD, peer->sock, &ev);
537 }
538 }
539
540 return 1;
541 }
542
543 /* process bgp events/timers */
544 int bgp_process(uint32_t events[])
545 {
546 int i;
547
548 if (!bgp_configured)
549 return 0;
550
551 for (i = 0; i < BGP_NUM_PEERS; i++)
552 {
553 struct bgp_peer *peer = &bgp_peers[i];
554
555 if (*peer->name && peer->cli_flag == BGP_CLI_RESTART)
556 {
557 bgp_restart(peer);
558 continue;
559 }
560
561 if (peer->state == Disabled)
562 continue;
563
564 if (peer->cli_flag)
565 {
566 switch (peer->cli_flag)
567 {
568 case BGP_CLI_SUSPEND:
569 if (peer->routing)
570 {
571 peer->routing = 0;
572 if (peer->state == Established)
573 peer->update_routes = 1;
574 }
575
576 break;
577
578 case BGP_CLI_ENABLE:
579 if (!peer->routing)
580 {
581 peer->routing = 1;
582 if (peer->state == Established)
583 peer->update_routes = 1;
584 }
585
586 break;
587 }
588
589 peer->cli_flag = 0;
590 }
591
592 /* handle empty/fill of buffers */
593 if (events[i] & EPOLLOUT)
594 {
595 int r = 1;
596 if (peer->state == Connect)
597 r = bgp_handle_connect(peer);
598 else if (peer->outbuf->packet.header.len)
599 r = bgp_write(peer);
600
601 if (!r)
602 continue;
603 }
604
605 if (events[i] & (EPOLLIN|EPOLLHUP))
606 {
607 if (!bgp_read(peer))
608 continue;
609 }
610
611 /* process input buffer contents */
612 while (peer->inbuf->done >= sizeof(peer->inbuf->packet.header)
613 && !peer->outbuf->packet.header.len) /* may need to queue a response */
614 {
615 if (bgp_handle_input(peer) < 0)
616 continue;
617 }
618
619 /* process pending updates */
620 if (peer->update_routes
621 && !peer->outbuf->packet.header.len) /* ditto */
622 {
623 if (!bgp_send_update(peer))
624 continue;
625 }
626
627 /* process timers */
628 if (peer->state == Established)
629 {
630 if (time_now > peer->expire_time)
631 {
632 LOG(1, 0, 0, "No message from BGP peer %s in %ds\n",
633 peer->name, peer->hold);
634
635 bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0);
636 continue;
637 }
638
639 if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len)
640 bgp_send_keepalive(peer);
641 }
642 else if (peer->state == Idle)
643 {
644 if (time_now > peer->retry_time)
645 bgp_connect(peer);
646 }
647 else if (time_now > peer->state_time + BGP_STATE_TIME)
648 {
649 LOG(1, 0, 0, "%s timer expired for BGP peer %s\n",
650 bgp_state_str(peer->state), peer->name);
651
652 bgp_restart(peer);
653 }
654 }
655
656 return 1;
657 }
658
659 static void bgp_free_routes(struct bgp_route_list *routes)
660 {
661 struct bgp_route_list *tmp;
662
663 while ((tmp = routes))
664 {
665 routes = tmp->next;
666 free(tmp);
667 }
668 }
669
670 char const *bgp_state_str(enum bgp_state state)
671 {
672 switch (state)
673 {
674 case Disabled: return "Disabled";
675 case Idle: return "Idle";
676 case Connect: return "Connect";
677 case Active: return "Active";
678 case OpenSent: return "OpenSent";
679 case OpenConfirm: return "OpenConfirm";
680 case Established: return "Established";
681 }
682
683 return "?";
684 }
685
686 static char const *bgp_msg_type_str(uint8_t type)
687 {
688 switch (type)
689 {
690 case BGP_MSG_OPEN: return "OPEN";
691 case BGP_MSG_UPDATE: return "UPDATE";
692 case BGP_MSG_NOTIFICATION: return "NOTIFICATION";
693 case BGP_MSG_KEEPALIVE: return "KEEPALIVE";
694 }
695
696 return "?";
697 }
698
699 /* attempt to connect to peer */
700 static int bgp_connect(struct bgp_peer *peer)
701 {
702 static int bgp_port = 0;
703 struct sockaddr_in addr;
704 struct epoll_event ev;
705
706 if (!bgp_port)
707 {
708 struct servent *serv;
709 if (!(serv = getservbyname("bgp", "tcp")))
710 {
711 LOG(0, 0, 0, "Can't get bgp service (%s)\n", strerror(errno));
712 return 0;
713 }
714
715 bgp_port = serv->s_port;
716 }
717
718 if ((peer->sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
719 {
720 LOG(0, 0, 0, "Can't create a socket for BGP peer %s (%s)\n",
721 peer->name, strerror(errno));
722
723 peer->state = peer->next_state = Disabled;
724 return 0;
725 }
726
727 /* add to poll set */
728 ev.events = peer->events = EPOLLOUT;
729 ev.data.ptr = &peer->edata;
730 epoll_ctl(epollfd, EPOLL_CTL_ADD, peer->sock, &ev);
731
732 /* set to non-blocking */
733 fcntl(peer->sock, F_SETFL, fcntl(peer->sock, F_GETFL, 0) | O_NONBLOCK);
734
735 /* try connect */
736 memset(&addr, 0, sizeof(addr));
737 addr.sin_family = AF_INET;
738 addr.sin_port = bgp_port;
739 addr.sin_addr.s_addr = peer->addr;
740
741 while (connect(peer->sock, (struct sockaddr *) &addr, sizeof(addr)) == -1)
742 {
743 if (errno == EINTR) /* SIGALARM handler */
744 continue;
745
746 if (errno != EINPROGRESS)
747 {
748 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n",
749 inet_ntoa(addr.sin_addr), strerror(errno));
750
751 bgp_set_retry(peer);
752 return 0;
753 }
754
755 peer->state = Connect;
756 peer->state_time = time_now;
757
758 LOG(4, 0, 0, "BGP peer %s: state Connect\n", peer->name);
759 return 1;
760 }
761
762 peer->state = Active;
763 peer->state_time = time_now;
764 peer->retry_time = peer->retry_count = 0;
765
766 LOG(4, 0, 0, "BGP peer %s: state Active\n", inet_ntoa(addr.sin_addr));
767
768 peer->handle_ipv6_routes = 0;
769
770 return bgp_send_open(peer);
771 }
772
773 /* complete partial connection (state = Connect) */
774 static int bgp_handle_connect(struct bgp_peer *peer)
775 {
776 int err = 0;
777 socklen_t len = sizeof(int);
778 getsockopt(peer->sock, SOL_SOCKET, SO_ERROR, &err, &len);
779 if (err)
780 {
781 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n", peer->name,
782 strerror(err));
783
784 bgp_set_retry(peer);
785 return 0;
786 }
787
788 peer->state = Active;
789 peer->state_time = time_now;
790
791 LOG(4, 0, 0, "BGP peer %s: state Active\n", peer->name);
792
793 peer->handle_ipv6_routes = 0;
794
795 return bgp_send_open(peer);
796 }
797
798 /* initiate a write */
799 static int bgp_write(struct bgp_peer *peer)
800 {
801 int len = htons(peer->outbuf->packet.header.len);
802 int r;
803
804 while ((r = write(peer->sock, &peer->outbuf->packet + peer->outbuf->done,
805 len - peer->outbuf->done)) == -1)
806 {
807 if (errno == EINTR)
808 continue;
809
810 if (errno == EAGAIN)
811 return 1;
812
813 if (errno == EPIPE)
814 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
815 else
816 LOG(1, 0, 0, "Can't write to BGP peer %s (%s)\n", peer->name,
817 strerror(errno));
818
819 bgp_set_retry(peer);
820 return 0;
821 }
822
823 if (r < len)
824 {
825 peer->outbuf->done += r;
826 return 1;
827 }
828
829 LOG(4, 0, 0, "Sent %s to BGP peer %s\n",
830 bgp_msg_type_str(peer->outbuf->packet.header.type), peer->name);
831
832 peer->outbuf->packet.header.len = 0;
833 peer->outbuf->done = 0;
834
835 if (peer->state == Established)
836 peer->keepalive_time = time_now + peer->keepalive;
837
838 if (peer->state != peer->next_state)
839 {
840 if (peer->next_state == Disabled || peer->next_state == Idle)
841 {
842 bgp_clear(peer);
843 return 0;
844 }
845
846 peer->state = peer->next_state;
847 peer->state_time = time_now;
848
849 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
850 bgp_state_str(peer->state));
851 }
852
853 return 1;
854 }
855
856 /* initiate a read */
857 static int bgp_read(struct bgp_peer *peer)
858 {
859 int r;
860
861 while ((r = read(peer->sock, &peer->inbuf->packet + peer->inbuf->done,
862 BGP_MAX_PACKET_SIZE - peer->inbuf->done)) < 1)
863 {
864 if (!r)
865 {
866 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
867 }
868 else
869 {
870 if (errno == EINTR)
871 continue;
872
873 if (errno == EAGAIN)
874 return 1;
875
876 LOG(1, 0, 0, "Can't read from BGP peer %s (%s)\n", peer->name,
877 strerror(errno));
878 }
879
880 bgp_set_retry(peer);
881 return 0;
882 }
883
884 peer->inbuf->done += r;
885 return 1;
886 }
887
888 /* process buffered packets */
889 static int bgp_handle_input(struct bgp_peer *peer)
890 {
891 struct bgp_packet *p = &peer->inbuf->packet;
892 int len = ntohs(p->header.len);
893
894 if (len > BGP_MAX_PACKET_SIZE)
895 {
896 LOG(1, 0, 0, "Bad header length from BGP %s\n", peer->name);
897 bgp_send_notification(peer, BGP_ERR_HEADER, BGP_ERR_HDR_BAD_LEN);
898 return 0;
899 }
900
901 if (peer->inbuf->done < len)
902 return 0;
903
904 LOG(4, 0, 0, "Received %s from BGP peer %s\n",
905 bgp_msg_type_str(p->header.type), peer->name);
906
907 switch (p->header.type)
908 {
909 case BGP_MSG_OPEN:
910 {
911 struct bgp_data_open data;
912 int hold;
913 int i;
914 off_t param_offset, capability_offset;
915 struct bgp_opt_param *param;
916 uint8_t capabilities_len;
917 char *capabilities = NULL;
918 struct bgp_capability *capability;
919 struct bgp_mp_cap_param *mp_cap;
920
921 for (i = 0; i < sizeof(p->header.marker); i++)
922 {
923 if ((unsigned char) p->header.marker[i] != 0xff)
924 {
925 LOG(1, 0, 0, "Invalid marker from BGP peer %s\n",
926 peer->name);
927
928 bgp_send_notification(peer, BGP_ERR_HEADER,
929 BGP_ERR_HDR_NOT_SYNC);
930
931 return 0;
932 }
933 }
934
935 if (peer->state != OpenSent)
936 {
937 LOG(1, 0, 0, "OPEN from BGP peer %s in %s state\n",
938 peer->name, bgp_state_str(peer->state));
939
940 bgp_send_notification(peer, BGP_ERR_FSM, 0);
941 return 0;
942 }
943
944 memcpy(&data, p->data, len - sizeof(p->header));
945
946 if (data.version != BGP_VERSION)
947 {
948 LOG(1, 0, 0, "Bad version (%d) sent by BGP peer %s\n",
949 (int) data.version, peer->name);
950
951 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_VERSION);
952 return 0;
953 }
954
955 if (ntohs(data.as) != peer->as)
956 {
957 LOG(1, 0, 0, "Bad AS sent by BGP peer %s (got %d, "
958 "expected %d)\n", peer->name, (int) htons(data.as),
959 (int) peer->as);
960
961 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_BAD_AS);
962 return 0;
963 }
964
965 if ((hold = ntohs(data.hold_time)) < 3)
966 {
967 LOG(1, 0, 0, "Bad hold time (%d) from BGP peer %s\n",
968 hold, peer->name);
969
970 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_HOLD_TIME);
971 return 0;
972 }
973
974 /* pick lowest hold time */
975 if (hold < peer->hold)
976 peer->hold = hold;
977
978 /* adjust our keepalive based on negotiated hold value */
979 if (peer->keepalive * 3 > peer->hold)
980 peer->keepalive = peer->hold / 3;
981
982 /* check for optional parameters */
983 /* 2 is for the size of type + len (both uint8_t) */
984 for (param_offset = 0;
985 param_offset < data.opt_len;
986 param_offset += 2 + param->len)
987 {
988 param = (struct bgp_opt_param *)(&data.opt_params + param_offset);
989
990 /* sensible check */
991 if (data.opt_len - param_offset < 2
992 || param->len > data.opt_len - param_offset - 2)
993 {
994 LOG(1, 0, 0, "Malformed Optional Parameter list from BGP peer %s\n",
995 peer->name);
996
997 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_UNSPEC);
998 return 0;
999 }
1000
1001 /* we know only one parameter type */
1002 if (param->type != BGP_PARAM_TYPE_CAPABILITY)
1003 {
1004 LOG(1, 0, 0, "Unsupported Optional Parameter type %d from BGP peer %s\n",
1005 param->type, peer->name);
1006
1007 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_UNSUP_PARAM);
1008 return 0;
1009 }
1010
1011 capabilities_len = param->len;
1012 capabilities = (char *)&param->value;
1013 }
1014
1015 /* look for BGP multiprotocol capability */
1016 if (capabilities)
1017 {
1018 for (capability_offset = 0;
1019 capability_offset < capabilities_len;
1020 capability_offset += 2 + capability->len)
1021 {
1022 capability = (struct bgp_capability *)(capabilities + capability_offset);
1023
1024 /* sensible check */
1025 if (capabilities_len - capability_offset < 2
1026 || capability->len > capabilities_len - capability_offset - 2)
1027 {
1028 LOG(1, 0, 0, "Malformed Capabilities list from BGP peer %s\n",
1029 peer->name);
1030
1031 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_UNSPEC);
1032 return 0;
1033 }
1034
1035 /* we only know one capability code */
1036 if (capability->code != BGP_CAP_CODE_MP
1037 && capability->len != sizeof(struct bgp_mp_cap_param))
1038 {
1039 LOG(4, 0, 0, "Unsupported Capability code %d from BGP peer %s\n",
1040 capability->code, peer->name);
1041
1042 bgp_send_notification_full(peer, BGP_ERR_OPEN, BGP_ERR_OPN_UNSUP_CAP,
1043 (char *)capability, 2 + capability->len);
1044 /* we don't terminate, still; we just jump to the next one */
1045 continue;
1046 }
1047
1048 mp_cap = (struct bgp_mp_cap_param *)&capability->value;
1049 /* the only <AFI, SAFI> tuple we support */
1050 if (ntohs(mp_cap->afi) != AF_INET6 && mp_cap->safi != BGP_MP_SAFI_UNICAST)
1051 {
1052 LOG(4, 0, 0, "Unsupported multiprotocol AFI %d and SAFI %d from BGP peer %s\n",
1053 mp_cap->afi, mp_cap->safi, peer->name);
1054
1055 bgp_send_notification_full(peer, BGP_ERR_OPEN, BGP_ERR_OPN_UNSUP_CAP,
1056 (char *)capability, 2 + capability->len);
1057 /* we don't terminate, still; we just jump to the next one */
1058 continue;
1059 }
1060
1061 peer->handle_ipv6_routes = 1;
1062 }
1063 }
1064
1065 /* next transition requires an exchange of keepalives */
1066 bgp_send_keepalive(peer);
1067 }
1068
1069 break;
1070
1071 case BGP_MSG_KEEPALIVE:
1072 if (peer->state == OpenConfirm)
1073 {
1074 peer->state = peer->next_state = Established;
1075 peer->state_time = time_now;
1076 peer->keepalive_time = time_now + peer->keepalive;
1077 peer->update_routes = 1;
1078 peer->retry_count = 0;
1079 peer->retry_time = 0;
1080
1081 LOG(4, 0, 0, "BGP peer %s: state Established\n", peer->name);
1082 }
1083
1084 break;
1085
1086 case BGP_MSG_NOTIFICATION:
1087 if (len > sizeof(p->header))
1088 {
1089 struct bgp_data_notification *notification =
1090 (struct bgp_data_notification *) p->data;
1091
1092 if (notification->error_code == BGP_ERR_CEASE)
1093 {
1094 LOG(4, 0, 0, "BGP peer %s sent CEASE\n", peer->name);
1095 bgp_restart(peer);
1096 return 0;
1097 }
1098
1099 if (notification->error_code == BGP_ERR_OPEN
1100 && notification->error_subcode == BGP_ERR_OPN_UNSUP_CAP)
1101 {
1102 /* the only capability we advertise is this one, so upon receiving
1103 an "unsupported capability" message, we disable IPv6 routes for
1104 this peer */
1105 LOG(4, 0, 0, "BGP peer %s doesn't support IPv6 routes advertisement\n", peer->name);
1106 peer->handle_ipv6_routes = 0;
1107 break;
1108 }
1109
1110 /* FIXME: should handle more notifications */
1111 LOG(4, 0, 0, "BGP peer %s sent unhandled NOTIFICATION %d\n",
1112 peer->name, (int) notification->error_code);
1113 }
1114
1115 break;
1116 }
1117
1118 /* reset timer */
1119 peer->expire_time = time_now + peer->hold;
1120
1121 /* see if there's another message in the same packet/buffer */
1122 if (peer->inbuf->done > len)
1123 {
1124 peer->inbuf->done -= len;
1125 memmove(p, (char *) p + len, peer->inbuf->done);
1126 }
1127 else
1128 {
1129 peer->inbuf->packet.header.len = 0;
1130 peer->inbuf->done = 0;
1131 }
1132
1133 return peer->inbuf->done;
1134 }
1135
1136 /* send/buffer OPEN message */
1137 static int bgp_send_open(struct bgp_peer *peer)
1138 {
1139 struct bgp_data_open data;
1140 struct bgp_mp_cap_param mp_ipv6 = { htons(AF_INET6), 0, BGP_MP_SAFI_UNICAST };
1141 struct bgp_capability cap_mp_ipv6;
1142 struct bgp_opt_param param_cap_mp_ipv6;
1143 uint16_t len = sizeof(peer->outbuf->packet.header);
1144
1145 memset(peer->outbuf->packet.header.marker, 0xff,
1146 sizeof(peer->outbuf->packet.header.marker));
1147
1148 peer->outbuf->packet.header.type = BGP_MSG_OPEN;
1149
1150 data.version = BGP_VERSION;
1151 data.as = htons(our_as);
1152 data.hold_time = htons(peer->hold);
1153 data.identifier = my_address;
1154
1155 /* construct the param and capability */
1156 cap_mp_ipv6.code = BGP_CAP_CODE_MP;
1157 cap_mp_ipv6.len = sizeof(mp_ipv6);
1158 memcpy(&cap_mp_ipv6.value, &mp_ipv6, cap_mp_ipv6.len);
1159
1160 param_cap_mp_ipv6.type = BGP_PARAM_TYPE_CAPABILITY;
1161 param_cap_mp_ipv6.len = 2 + sizeof(mp_ipv6);
1162 memcpy(&param_cap_mp_ipv6.value, &cap_mp_ipv6, param_cap_mp_ipv6.len);
1163
1164 data.opt_len = 2 + param_cap_mp_ipv6.len;
1165 memcpy(&data.opt_params, &param_cap_mp_ipv6, data.opt_len);
1166
1167 memcpy(peer->outbuf->packet.data, &data, BGP_DATA_OPEN_SIZE);
1168 len += BGP_DATA_OPEN_SIZE;
1169
1170 peer->outbuf->packet.header.len = htons(len);
1171 peer->outbuf->done = 0;
1172 peer->next_state = OpenSent;
1173
1174 return bgp_write(peer);
1175 }
1176
1177 /* send/buffer KEEPALIVE message */
1178 static int bgp_send_keepalive(struct bgp_peer *peer)
1179 {
1180 memset(peer->outbuf->packet.header.marker, 0xff,
1181 sizeof(peer->outbuf->packet.header.marker));
1182
1183 peer->outbuf->packet.header.type = BGP_MSG_KEEPALIVE;
1184 peer->outbuf->packet.header.len =
1185 htons(sizeof(peer->outbuf->packet.header));
1186
1187 peer->outbuf->done = 0;
1188 peer->next_state = (peer->state == OpenSent) ? OpenConfirm : peer->state;
1189
1190 return bgp_write(peer);
1191 }
1192
1193 /* send/buffer UPDATE message */
1194 static int bgp_send_update(struct bgp_peer *peer)
1195 {
1196 uint16_t unf_len = 0;
1197 uint16_t attr_len;
1198 uint16_t len = sizeof(peer->outbuf->packet.header);
1199 struct bgp_route_list *have = peer->routes;
1200 struct bgp_route_list *want = peer->routing ? bgp_routes : 0;
1201 struct bgp_route_list *e = 0;
1202 struct bgp_route_list *add = 0;
1203 int s;
1204
1205 char *data = (char *) &peer->outbuf->packet.data;
1206
1207 /* need leave room for attr_len, bgp_path_attrs and one prefix */
1208 char *max = (char *) &peer->outbuf->packet.data
1209 + sizeof(peer->outbuf->packet.data)
1210 - sizeof(attr_len) - peer->path_attr_len - sizeof(struct bgp_ip_prefix);
1211
1212 /* skip over unf_len */
1213 data += sizeof(unf_len);
1214 len += sizeof(unf_len);
1215
1216 memset(peer->outbuf->packet.header.marker, 0xff,
1217 sizeof(peer->outbuf->packet.header.marker));
1218
1219 peer->outbuf->packet.header.type = BGP_MSG_UPDATE;
1220
1221 peer->update_routes = 0; /* tentatively clear */
1222
1223 /* find differences */
1224 while ((have || want) && data < (max - sizeof(struct bgp_ip_prefix)))
1225 {
1226 if (have)
1227 s = want
1228 ? memcmp(&have->dest, &want->dest, sizeof(have->dest))
1229 : -1;
1230 else
1231 s = 1;
1232
1233 if (s < 0) /* found one to delete */
1234 {
1235 struct bgp_route_list *tmp = have;
1236 have = have->next;
1237
1238 s = BGP_IP_PREFIX_SIZE(tmp->dest);
1239 memcpy(data, &tmp->dest, s);
1240 data += s;
1241 unf_len += s;
1242 len += s;
1243
1244 LOG(5, 0, 0, "Withdrawing route %s/%d from BGP peer %s\n",
1245 fmtaddr(tmp->dest.prefix, 0), tmp->dest.len, peer->name);
1246
1247 free(tmp);
1248
1249 if (e)
1250 e->next = have;
1251 else
1252 peer->routes = have;
1253 }
1254 else
1255 {
1256 if (!s) /* same */
1257 {
1258 e = have; /* stash the last found to relink above */
1259 have = have->next;
1260 want = want->next;
1261 }
1262 else if (s > 0) /* addition reqd. */
1263 {
1264 if (add)
1265 {
1266 peer->update_routes = 1; /* only one add per packet */
1267 if (!have)
1268 break;
1269 }
1270 else
1271 add = want;
1272
1273 if (want)
1274 want = want->next;
1275 }
1276 }
1277 }
1278
1279 if (have || want)
1280 peer->update_routes = 1; /* more to do */
1281
1282 /* anything changed? */
1283 if (!(unf_len || add))
1284 return 1;
1285
1286 /* go back and insert unf_len */
1287 unf_len = htons(unf_len);
1288 memcpy(&peer->outbuf->packet.data, &unf_len, sizeof(unf_len));
1289
1290 if (add)
1291 {
1292 if (!(e = malloc(sizeof(*e))))
1293 {
1294 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
1295 fmtaddr(add->dest.prefix, 0), add->dest.len, strerror(errno));
1296
1297 return 0;
1298 }
1299
1300 memcpy(e, add, sizeof(*e));
1301 e->next = 0;
1302 peer->routes = bgp_insert_route(peer->routes, e);
1303
1304 attr_len = htons(peer->path_attr_len);
1305 memcpy(data, &attr_len, sizeof(attr_len));
1306 data += sizeof(attr_len);
1307 len += sizeof(attr_len);
1308
1309 memcpy(data, peer->path_attrs, peer->path_attr_len);
1310 data += peer->path_attr_len;
1311 len += peer->path_attr_len;
1312
1313 s = BGP_IP_PREFIX_SIZE(add->dest);
1314 memcpy(data, &add->dest, s);
1315 data += s;
1316 len += s;
1317
1318 LOG(5, 0, 0, "Advertising route %s/%d to BGP peer %s\n",
1319 fmtaddr(add->dest.prefix, 0), add->dest.len, peer->name);
1320 }
1321 else
1322 {
1323 attr_len = 0;
1324 memcpy(data, &attr_len, sizeof(attr_len));
1325 data += sizeof(attr_len);
1326 len += sizeof(attr_len);
1327 }
1328
1329 peer->outbuf->packet.header.len = htons(len);
1330 peer->outbuf->done = 0;
1331
1332 return bgp_write(peer);
1333 }
1334
1335 /* send/buffer NOTIFICATION message */
1336 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
1337 uint8_t subcode)
1338 {
1339 return bgp_send_notification_full(peer, code, subcode, NULL, 0);
1340 }
1341
1342 static int bgp_send_notification_full(struct bgp_peer *peer, uint8_t code,
1343 uint8_t subcode, char *notification_data, uint16_t data_len)
1344 {
1345 struct bgp_data_notification data;
1346 uint16_t len = 0;
1347
1348 data.error_code = code;
1349 len += sizeof(data.error_code);
1350
1351 data.error_subcode = subcode;
1352 len += sizeof(data.error_code);
1353
1354 memcpy(data.data, notification_data, data_len);
1355 len += data_len;
1356
1357 memset(peer->outbuf->packet.header.marker, 0xff,
1358 sizeof(peer->outbuf->packet.header.marker));
1359
1360 peer->outbuf->packet.header.type = BGP_MSG_NOTIFICATION;
1361 peer->outbuf->packet.header.len =
1362 htons(sizeof(peer->outbuf->packet.header) + len);
1363
1364 memcpy(peer->outbuf->packet.data, &data, len);
1365
1366 peer->outbuf->done = 0;
1367 peer->next_state = code == BGP_ERR_CEASE ? Disabled : Idle;
1368
1369 /* we're dying; ignore any pending input */
1370 peer->inbuf->packet.header.len = 0;
1371 peer->inbuf->done = 0;
1372
1373 return bgp_write(peer);
1374 }