9d237c9a68817f0f87f4c59a32dae6e010245a9a
[l2tpns.git] / bgp.c
1 /*
2 * BGPv4
3 * Used to advertise routes for upstream (l2tp port, rather than gratiutious
4 * arp) and downstream--allowing routers to load-balance both.
5 *
6 * Implementation limitations:
7 * - We never listen for incoming connections (session always initiated by us).
8 * - Any routes advertised by the peer are accepted, but ignored.
9 * - No password support; neither RFC1771 (which no-one seems to do anyway)
10 * nor RFC2385 (which requires a kernel patch on 2.4 kernels).
11 */
12
13 char const *cvs_id_bgp = "$Id: bgp.c,v 1.12 2005/09/02 23:39:36 bodea Exp $";
14
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <string.h>
18 #include <time.h>
19 #include <errno.h>
20 #include <sys/socket.h>
21 #include <netinet/in.h>
22 #include <arpa/inet.h>
23 #include <netdb.h>
24 #include <fcntl.h>
25
26 #include "l2tpns.h"
27 #include "bgp.h"
28 #include "util.h"
29
30 static void bgp_clear(struct bgp_peer *peer);
31 static void bgp_set_retry(struct bgp_peer *peer);
32 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx);
33 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
34 struct bgp_route_list *new);
35
36 static void bgp_free_routes(struct bgp_route_list *routes);
37 static char const *bgp_msg_type_str(uint8_t type);
38 static int bgp_connect(struct bgp_peer *peer);
39 static int bgp_handle_connect(struct bgp_peer *peer);
40 static int bgp_write(struct bgp_peer *peer);
41 static int bgp_read(struct bgp_peer *peer);
42 static int bgp_handle_input(struct bgp_peer *peer);
43 static int bgp_send_open(struct bgp_peer *peer);
44 static int bgp_send_keepalive(struct bgp_peer *peer);
45 static int bgp_send_update(struct bgp_peer *peer);
46 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
47 uint8_t subcode);
48 static int bgp_send_notification_full(struct bgp_peer *peer, uint8_t code,
49 uint8_t subcode, char *notification_data, uint16_t data_len);
50
51 static uint16_t our_as;
52 static struct bgp_route_list *bgp_routes = 0;
53
54 int bgp_configured = 0;
55 struct bgp_peer *bgp_peers = 0;
56
57 /* prepare peer structure, globals */
58 int bgp_setup(int as)
59 {
60 int i;
61 struct bgp_peer *peer;
62
63 for (i = 0; i < BGP_NUM_PEERS; i++)
64 {
65 peer = &bgp_peers[i];
66 memset(peer, 0, sizeof(*peer));
67
68 peer->addr = INADDR_NONE;
69 peer->sock = -1;
70 peer->state = peer->next_state = Disabled;
71
72 if (!((peer->outbuf = malloc(sizeof(*peer->outbuf)))
73 && (peer->inbuf = malloc(sizeof(*peer->inbuf)))))
74 {
75 LOG(0, 0, 0, "Can't allocate buffers for bgp peer (%s)\n",
76 strerror(errno));
77
78 return 0;
79 }
80
81 peer->edata.type = FD_TYPE_BGP;
82 peer->edata.index = i;
83 peer->events = 0;
84 }
85
86 if (as < 1)
87 as = 0;
88
89 if ((our_as = as))
90 return 0;
91
92 bgp_routes = 0;
93 bgp_configured = 0; /* set by bgp_start */
94
95 return 1;
96 }
97
98 /* start connection with a peer */
99 int bgp_start(struct bgp_peer *peer, char *name, int as, int keepalive,
100 int hold, int enable)
101 {
102 struct hostent *h;
103 int ibgp;
104 int i;
105 struct bgp_path_attr a;
106 char path_attrs[64];
107 char *p = path_attrs;
108 in_addr_t ip;
109 uint32_t metric = htonl(BGP_METRIC);
110 uint32_t no_export = htonl(BGP_COMMUNITY_NO_EXPORT);
111
112 if (!our_as)
113 return 0;
114
115 if (peer->state != Disabled)
116 bgp_halt(peer);
117
118 snprintf(peer->name, sizeof(peer->name), "%s", name);
119
120 if (!(h = gethostbyname(name)) || h->h_addrtype != AF_INET)
121 {
122 LOG(0, 0, 0, "Can't get address for BGP peer %s (%s)\n",
123 name, h ? "no address" : hstrerror(h_errno));
124
125 return 0;
126 }
127
128 memcpy(&peer->addr, h->h_addr, sizeof(peer->addr));
129 peer->as = as > 0 ? as : our_as;
130 ibgp = peer->as == our_as;
131
132 /* set initial timer values */
133 peer->init_keepalive = keepalive == -1 ? BGP_KEEPALIVE_TIME : keepalive;
134 peer->init_hold = hold == -1 ? BGP_HOLD_TIME : hold;
135
136 if (peer->init_hold < 3)
137 peer->init_hold = 3;
138
139 if (peer->init_keepalive * 3 > peer->init_hold)
140 peer->init_keepalive = peer->init_hold / 3;
141
142 /* clear buffers, go to Idle state */
143 peer->next_state = Idle;
144 bgp_clear(peer);
145
146 /* set initial routing state */
147 peer->routing = enable;
148
149 /* all our routes use the same attributes, so prepare it in advance */
150 if (peer->path_attrs)
151 free(peer->path_attrs);
152
153 peer->path_attr_len = 0;
154
155 /* ORIGIN */
156 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
157 a.code = BGP_PATH_ATTR_CODE_ORIGIN;
158 a.data.s.len = 1;
159 a.data.s.value[0] = BGP_PATH_ATTR_CODE_ORIGIN_IGP;
160
161 #define ADD_ATTRIBUTE() do { \
162 i = BGP_PATH_ATTR_SIZE(a); \
163 memcpy(p, &a, i); \
164 p += i; \
165 peer->path_attr_len += i; } while (0)
166
167 ADD_ATTRIBUTE();
168
169 /* AS_PATH */
170 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
171 a.code = BGP_PATH_ATTR_CODE_AS_PATH;
172 if (ibgp)
173 {
174 /* empty path */
175 a.data.s.len = 0;
176 }
177 else
178 {
179 /* just our AS */
180 struct {
181 uint8_t type;
182 uint8_t len;
183 uint16_t value;
184 } as_path = {
185 BGP_PATH_ATTR_CODE_AS_PATH_AS_SEQUENCE,
186 1,
187 htons(our_as),
188 };
189
190 a.data.s.len = sizeof(as_path);
191 memcpy(&a.data.s.value, &as_path, sizeof(as_path));
192 }
193
194 ADD_ATTRIBUTE();
195
196 /* NEXT_HOP */
197 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
198 a.code = BGP_PATH_ATTR_CODE_NEXT_HOP;
199 ip = my_address; /* we're it */
200 a.data.s.len = sizeof(ip);
201 memcpy(a.data.s.value, &ip, sizeof(ip));
202
203 ADD_ATTRIBUTE();
204
205 /* MULTI_EXIT_DISC */
206 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL;
207 a.code = BGP_PATH_ATTR_CODE_MULTI_EXIT_DISC;
208 a.data.s.len = sizeof(metric);
209 memcpy(a.data.s.value, &metric, sizeof(metric));
210
211 ADD_ATTRIBUTE();
212
213 if (ibgp)
214 {
215 uint32_t local_pref = htonl(BGP_LOCAL_PREF);
216
217 /* LOCAL_PREF */
218 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
219 a.code = BGP_PATH_ATTR_CODE_LOCAL_PREF;
220 a.data.s.len = sizeof(local_pref);
221 memcpy(a.data.s.value, &local_pref, sizeof(local_pref));
222
223 ADD_ATTRIBUTE();
224 }
225
226 /* COMMUNITIES */
227 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL | BGP_PATH_ATTR_FLAG_TRANS;
228 a.code = BGP_PATH_ATTR_CODE_COMMUNITIES;
229 a.data.s.len = sizeof(no_export);
230 memcpy(a.data.s.value, &no_export, sizeof(no_export));
231
232 ADD_ATTRIBUTE();
233
234 if (!(peer->path_attrs = malloc(peer->path_attr_len)))
235 {
236 LOG(0, 0, 0, "Can't allocate path_attrs for %s (%s)\n",
237 name, strerror(errno));
238
239 return 0;
240 }
241
242 memcpy(peer->path_attrs, path_attrs, peer->path_attr_len);
243
244 LOG(4, 0, 0, "Initiating BGP connection to %s (routing %s)\n",
245 name, enable ? "enabled" : "suspended");
246
247 /* we have at least one peer configured */
248 bgp_configured = 1;
249
250 /* connect */
251 return bgp_connect(peer);
252 }
253
254 /* clear counters, timers, routes and buffers; close socket; move to
255 next_state, which may be Disabled or Idle */
256 static void bgp_clear(struct bgp_peer *peer)
257 {
258 if (peer->sock != -1)
259 {
260 close(peer->sock);
261 peer->sock = -1;
262 }
263
264 peer->keepalive_time = 0;
265 peer->expire_time = 0;
266
267 peer->keepalive = peer->init_keepalive;
268 peer->hold = peer->init_hold;
269
270 bgp_free_routes(peer->routes);
271 peer->routes = 0;
272
273 peer->outbuf->packet.header.len = 0;
274 peer->outbuf->done = 0;
275 peer->inbuf->packet.header.len = 0;
276 peer->inbuf->done = 0;
277
278 peer->cli_flag = 0;
279 peer->events = 0;
280
281 if (peer->state != peer->next_state)
282 {
283 peer->state = peer->next_state;
284 peer->state_time = time_now;
285
286 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
287 bgp_state_str(peer->next_state));
288 }
289 }
290
291 /* initiate a clean shutdown */
292 void bgp_stop(struct bgp_peer *peer)
293 {
294 LOG(4, 0, 0, "Terminating BGP connection to %s\n", peer->name);
295 bgp_send_notification(peer, BGP_ERR_CEASE, 0);
296 }
297
298 /* drop connection (if any) and set state to Disabled */
299 void bgp_halt(struct bgp_peer *peer)
300 {
301 LOG(4, 0, 0, "Aborting BGP connection to %s\n", peer->name);
302 peer->next_state = Disabled;
303 bgp_clear(peer);
304 }
305
306 /* drop connection (if any) and set to Idle for connection retry */
307 int bgp_restart(struct bgp_peer *peer)
308 {
309 peer->next_state = Idle;
310 bgp_clear(peer);
311
312 /* restart now */
313 peer->retry_time = time_now;
314 peer->retry_count = 0;
315
316 /* connect */
317 return bgp_connect(peer);
318 }
319
320 static void bgp_set_retry(struct bgp_peer *peer)
321 {
322 if (peer->retry_count++ < BGP_MAX_RETRY)
323 {
324 peer->retry_time = time_now + (BGP_RETRY_BACKOFF * peer->retry_count);
325 peer->next_state = Idle;
326 bgp_clear(peer);
327 }
328 else
329 bgp_halt(peer); /* give up */
330 }
331
332 /* convert ip/mask to CIDR notation */
333 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx)
334 {
335 int i;
336 uint32_t b;
337
338 /* convert to prefix notation */
339 pfx->len = 32;
340 pfx->prefix = ip;
341
342 if (!mask) /* bogus */
343 mask = 0xffffffff;
344
345 for (i = 0; i < 32 && ((b = ntohl(1 << i)), !(mask & b)); i++)
346 {
347 pfx->len--;
348 pfx->prefix &= ~b;
349 }
350 }
351
352 /* insert route into list; sorted */
353 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
354 struct bgp_route_list *new)
355 {
356 struct bgp_route_list *p = head;
357 struct bgp_route_list *e = 0;
358
359 while (p && memcmp(&p->dest, &new->dest, sizeof(p->dest)) < 0)
360 {
361 e = p;
362 p = p->next;
363 }
364
365 if (e)
366 {
367 new->next = e->next;
368 e->next = new;
369 }
370 else
371 {
372 new->next = head;
373 head = new;
374 }
375
376 return head;
377 }
378
379 /* add route to list for peers */
380 /*
381 * Note: this doesn't do route aggregation, nor drop routes if a less
382 * specific match already exists (partly because I'm lazy, but also so
383 * that if that route is later deleted we don't have to be concerned
384 * about adding back the more specific one).
385 */
386 int bgp_add_route(in_addr_t ip, in_addr_t mask)
387 {
388 struct bgp_route_list *r = bgp_routes;
389 struct bgp_route_list add;
390 int i;
391
392 bgp_cidr(ip, mask, &add.dest);
393 add.next = 0;
394
395 /* check for duplicate */
396 while (r)
397 {
398 i = memcmp(&r->dest, &add.dest, sizeof(r->dest));
399 if (!i)
400 return 1; /* already covered */
401
402 if (i > 0)
403 break;
404
405 r = r->next;
406 }
407
408 /* insert into route list; sorted */
409 if (!(r = malloc(sizeof(*r))))
410 {
411 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
412 fmtaddr(add.dest.prefix, 0), add.dest.len, strerror(errno));
413
414 return 0;
415 }
416
417 memcpy(r, &add, sizeof(*r));
418 bgp_routes = bgp_insert_route(bgp_routes, r);
419
420 /* flag established peers for update */
421 for (i = 0; i < BGP_NUM_PEERS; i++)
422 if (bgp_peers[i].state == Established)
423 bgp_peers[i].update_routes = 1;
424
425 LOG(4, 0, 0, "Registered BGP route %s/%d\n",
426 fmtaddr(add.dest.prefix, 0), add.dest.len);
427
428 return 1;
429 }
430
431 /* remove route from list for peers */
432 int bgp_del_route(in_addr_t ip, in_addr_t mask)
433 {
434 struct bgp_route_list *r = bgp_routes;
435 struct bgp_route_list *e = 0;
436 struct bgp_route_list del;
437 int i;
438
439 bgp_cidr(ip, mask, &del.dest);
440 del.next = 0;
441
442 /* find entry in routes list and remove */
443 while (r)
444 {
445 i = memcmp(&r->dest, &del.dest, sizeof(r->dest));
446 if (!i)
447 {
448 if (e)
449 e->next = r->next;
450 else
451 bgp_routes = r->next;
452
453 free(r);
454 break;
455 }
456
457 e = r;
458
459 if (i > 0)
460 r = 0; /* stop */
461 else
462 r = r->next;
463 }
464
465 /* not found */
466 if (!r)
467 return 1;
468
469 /* flag established peers for update */
470 for (i = 0; i < BGP_NUM_PEERS; i++)
471 if (bgp_peers[i].state == Established)
472 bgp_peers[i].update_routes = 1;
473
474 LOG(4, 0, 0, "Removed BGP route %s/%d\n",
475 fmtaddr(del.dest.prefix, 0), del.dest.len);
476
477 return 1;
478 }
479
480 /* enable or disable routing */
481 void bgp_enable_routing(int enable)
482 {
483 int i;
484
485 for (i = 0; i < BGP_NUM_PEERS; i++)
486 {
487 bgp_peers[i].routing = enable;
488
489 /* flag established peers for update */
490 if (bgp_peers[i].state == Established)
491 bgp_peers[i].update_routes = 1;
492 }
493
494 LOG(4, 0, 0, "%s BGP routing\n", enable ? "Enabled" : "Suspended");
495 }
496
497 #ifdef HAVE_EPOLL
498 # include <sys/epoll.h>
499 #else
500 # include "fake_epoll.h"
501 #endif
502
503 /* return a bitmask of the events required to poll this peer's fd */
504 int bgp_set_poll()
505 {
506 int i;
507
508 if (!bgp_configured)
509 return 0;
510
511 for (i = 0; i < BGP_NUM_PEERS; i++)
512 {
513 struct bgp_peer *peer = &bgp_peers[i];
514 int events = 0;
515
516 if (peer->state == Disabled || peer->state == Idle)
517 continue;
518
519 if (peer->inbuf->done < BGP_MAX_PACKET_SIZE)
520 events |= EPOLLIN;
521
522 if (peer->state == Connect || /* connection in progress */
523 peer->update_routes || /* routing updates */
524 peer->outbuf->packet.header.len) /* pending output */
525 events |= EPOLLOUT;
526
527 if (peer->events != events)
528 {
529 struct epoll_event ev;
530
531 ev.events = peer->events = events;
532 ev.data.ptr = &peer->edata;
533 epoll_ctl(epollfd, EPOLL_CTL_MOD, peer->sock, &ev);
534 }
535 }
536
537 return 1;
538 }
539
540 /* process bgp events/timers */
541 int bgp_process(uint32_t events[])
542 {
543 int i;
544
545 if (!bgp_configured)
546 return 0;
547
548 for (i = 0; i < BGP_NUM_PEERS; i++)
549 {
550 struct bgp_peer *peer = &bgp_peers[i];
551
552 if (*peer->name && peer->cli_flag == BGP_CLI_RESTART)
553 {
554 bgp_restart(peer);
555 continue;
556 }
557
558 if (peer->state == Disabled)
559 continue;
560
561 if (peer->cli_flag)
562 {
563 switch (peer->cli_flag)
564 {
565 case BGP_CLI_SUSPEND:
566 if (peer->routing)
567 {
568 peer->routing = 0;
569 if (peer->state == Established)
570 peer->update_routes = 1;
571 }
572
573 break;
574
575 case BGP_CLI_ENABLE:
576 if (!peer->routing)
577 {
578 peer->routing = 1;
579 if (peer->state == Established)
580 peer->update_routes = 1;
581 }
582
583 break;
584 }
585
586 peer->cli_flag = 0;
587 }
588
589 /* handle empty/fill of buffers */
590 if (events[i] & EPOLLOUT)
591 {
592 int r = 1;
593 if (peer->state == Connect)
594 r = bgp_handle_connect(peer);
595 else if (peer->outbuf->packet.header.len)
596 r = bgp_write(peer);
597
598 if (!r)
599 continue;
600 }
601
602 if (events[i] & (EPOLLIN|EPOLLHUP))
603 {
604 if (!bgp_read(peer))
605 continue;
606 }
607
608 /* process input buffer contents */
609 while (peer->inbuf->done >= sizeof(peer->inbuf->packet.header)
610 && !peer->outbuf->packet.header.len) /* may need to queue a response */
611 {
612 if (bgp_handle_input(peer) < 0)
613 continue;
614 }
615
616 /* process pending updates */
617 if (peer->update_routes
618 && !peer->outbuf->packet.header.len) /* ditto */
619 {
620 if (!bgp_send_update(peer))
621 continue;
622 }
623
624 /* process timers */
625 if (peer->state == Established)
626 {
627 if (time_now > peer->expire_time)
628 {
629 LOG(1, 0, 0, "No message from BGP peer %s in %ds\n",
630 peer->name, peer->hold);
631
632 bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0);
633 continue;
634 }
635
636 if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len)
637 bgp_send_keepalive(peer);
638 }
639 else if (peer->state == Idle)
640 {
641 if (time_now > peer->retry_time)
642 bgp_connect(peer);
643 }
644 else if (time_now > peer->state_time + BGP_STATE_TIME)
645 {
646 LOG(1, 0, 0, "%s timer expired for BGP peer %s\n",
647 bgp_state_str(peer->state), peer->name);
648
649 bgp_restart(peer);
650 }
651 }
652
653 return 1;
654 }
655
656 static void bgp_free_routes(struct bgp_route_list *routes)
657 {
658 struct bgp_route_list *tmp;
659
660 while ((tmp = routes))
661 {
662 routes = tmp->next;
663 free(tmp);
664 }
665 }
666
667 char const *bgp_state_str(enum bgp_state state)
668 {
669 switch (state)
670 {
671 case Disabled: return "Disabled";
672 case Idle: return "Idle";
673 case Connect: return "Connect";
674 case Active: return "Active";
675 case OpenSent: return "OpenSent";
676 case OpenConfirm: return "OpenConfirm";
677 case Established: return "Established";
678 }
679
680 return "?";
681 }
682
683 static char const *bgp_msg_type_str(uint8_t type)
684 {
685 switch (type)
686 {
687 case BGP_MSG_OPEN: return "OPEN";
688 case BGP_MSG_UPDATE: return "UPDATE";
689 case BGP_MSG_NOTIFICATION: return "NOTIFICATION";
690 case BGP_MSG_KEEPALIVE: return "KEEPALIVE";
691 }
692
693 return "?";
694 }
695
696 /* attempt to connect to peer */
697 static int bgp_connect(struct bgp_peer *peer)
698 {
699 static int bgp_port = 0;
700 struct sockaddr_in addr;
701 struct epoll_event ev;
702
703 if (!bgp_port)
704 {
705 struct servent *serv;
706 if (!(serv = getservbyname("bgp", "tcp")))
707 {
708 LOG(0, 0, 0, "Can't get bgp service (%s)\n", strerror(errno));
709 return 0;
710 }
711
712 bgp_port = serv->s_port;
713 }
714
715 if ((peer->sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
716 {
717 LOG(0, 0, 0, "Can't create a socket for BGP peer %s (%s)\n",
718 peer->name, strerror(errno));
719
720 peer->state = peer->next_state = Disabled;
721 return 0;
722 }
723
724 /* add to poll set */
725 ev.events = peer->events = EPOLLOUT;
726 ev.data.ptr = &peer->edata;
727 epoll_ctl(epollfd, EPOLL_CTL_ADD, peer->sock, &ev);
728
729 /* set to non-blocking */
730 fcntl(peer->sock, F_SETFL, fcntl(peer->sock, F_GETFL, 0) | O_NONBLOCK);
731
732 /* try connect */
733 memset(&addr, 0, sizeof(addr));
734 addr.sin_family = AF_INET;
735 addr.sin_port = bgp_port;
736 addr.sin_addr.s_addr = peer->addr;
737
738 while (connect(peer->sock, (struct sockaddr *) &addr, sizeof(addr)) == -1)
739 {
740 if (errno == EINTR) /* SIGALARM handler */
741 continue;
742
743 if (errno != EINPROGRESS)
744 {
745 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n",
746 inet_ntoa(addr.sin_addr), strerror(errno));
747
748 bgp_set_retry(peer);
749 return 0;
750 }
751
752 peer->state = Connect;
753 peer->state_time = time_now;
754
755 LOG(4, 0, 0, "BGP peer %s: state Connect\n", peer->name);
756 return 1;
757 }
758
759 peer->state = Active;
760 peer->state_time = time_now;
761 peer->retry_time = peer->retry_count = 0;
762
763 LOG(4, 0, 0, "BGP peer %s: state Active\n", inet_ntoa(addr.sin_addr));
764
765 return bgp_send_open(peer);
766 }
767
768 /* complete partial connection (state = Connect) */
769 static int bgp_handle_connect(struct bgp_peer *peer)
770 {
771 int err = 0;
772 socklen_t len = sizeof(int);
773 getsockopt(peer->sock, SOL_SOCKET, SO_ERROR, &err, &len);
774 if (err)
775 {
776 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n", peer->name,
777 strerror(err));
778
779 bgp_set_retry(peer);
780 return 0;
781 }
782
783 peer->state = Active;
784 peer->state_time = time_now;
785
786 LOG(4, 0, 0, "BGP peer %s: state Active\n", peer->name);
787
788 return bgp_send_open(peer);
789 }
790
791 /* initiate a write */
792 static int bgp_write(struct bgp_peer *peer)
793 {
794 int len = htons(peer->outbuf->packet.header.len);
795 int r;
796
797 while ((r = write(peer->sock, &peer->outbuf->packet + peer->outbuf->done,
798 len - peer->outbuf->done)) == -1)
799 {
800 if (errno == EINTR)
801 continue;
802
803 if (errno == EAGAIN)
804 return 1;
805
806 if (errno == EPIPE)
807 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
808 else
809 LOG(1, 0, 0, "Can't write to BGP peer %s (%s)\n", peer->name,
810 strerror(errno));
811
812 bgp_set_retry(peer);
813 return 0;
814 }
815
816 if (r < len)
817 {
818 peer->outbuf->done += r;
819 return 1;
820 }
821
822 LOG(4, 0, 0, "Sent %s to BGP peer %s\n",
823 bgp_msg_type_str(peer->outbuf->packet.header.type), peer->name);
824
825 peer->outbuf->packet.header.len = 0;
826 peer->outbuf->done = 0;
827
828 if (peer->state == Established)
829 peer->keepalive_time = time_now + peer->keepalive;
830
831 if (peer->state != peer->next_state)
832 {
833 if (peer->next_state == Disabled || peer->next_state == Idle)
834 {
835 bgp_clear(peer);
836 return 0;
837 }
838
839 peer->state = peer->next_state;
840 peer->state_time = time_now;
841
842 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
843 bgp_state_str(peer->state));
844 }
845
846 return 1;
847 }
848
849 /* initiate a read */
850 static int bgp_read(struct bgp_peer *peer)
851 {
852 int r;
853
854 while ((r = read(peer->sock, &peer->inbuf->packet + peer->inbuf->done,
855 BGP_MAX_PACKET_SIZE - peer->inbuf->done)) < 1)
856 {
857 if (!r)
858 {
859 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
860 }
861 else
862 {
863 if (errno == EINTR)
864 continue;
865
866 if (errno == EAGAIN)
867 return 1;
868
869 LOG(1, 0, 0, "Can't read from BGP peer %s (%s)\n", peer->name,
870 strerror(errno));
871 }
872
873 bgp_set_retry(peer);
874 return 0;
875 }
876
877 peer->inbuf->done += r;
878 return 1;
879 }
880
881 /* process buffered packets */
882 static int bgp_handle_input(struct bgp_peer *peer)
883 {
884 struct bgp_packet *p = &peer->inbuf->packet;
885 int len = ntohs(p->header.len);
886
887 if (len > BGP_MAX_PACKET_SIZE)
888 {
889 LOG(1, 0, 0, "Bad header length from BGP %s\n", peer->name);
890 bgp_send_notification(peer, BGP_ERR_HEADER, BGP_ERR_HDR_BAD_LEN);
891 return 0;
892 }
893
894 if (peer->inbuf->done < len)
895 return 0;
896
897 LOG(4, 0, 0, "Received %s from BGP peer %s\n",
898 bgp_msg_type_str(p->header.type), peer->name);
899
900 switch (p->header.type)
901 {
902 case BGP_MSG_OPEN:
903 {
904 struct bgp_data_open data;
905 int hold;
906 int i;
907 off_t param_offset, capability_offset;
908 struct bgp_opt_param *param;
909 uint8_t capabilities_len;
910 char *capabilities = NULL;
911 struct bgp_capability *capability;
912 struct bgp_mp_cap_param *mp_cap;
913
914 for (i = 0; i < sizeof(p->header.marker); i++)
915 {
916 if ((unsigned char) p->header.marker[i] != 0xff)
917 {
918 LOG(1, 0, 0, "Invalid marker from BGP peer %s\n",
919 peer->name);
920
921 bgp_send_notification(peer, BGP_ERR_HEADER,
922 BGP_ERR_HDR_NOT_SYNC);
923
924 return 0;
925 }
926 }
927
928 if (peer->state != OpenSent)
929 {
930 LOG(1, 0, 0, "OPEN from BGP peer %s in %s state\n",
931 peer->name, bgp_state_str(peer->state));
932
933 bgp_send_notification(peer, BGP_ERR_FSM, 0);
934 return 0;
935 }
936
937 memcpy(&data, p->data, len - sizeof(p->header));
938
939 if (data.version != BGP_VERSION)
940 {
941 LOG(1, 0, 0, "Bad version (%d) sent by BGP peer %s\n",
942 (int) data.version, peer->name);
943
944 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_VERSION);
945 return 0;
946 }
947
948 if (ntohs(data.as) != peer->as)
949 {
950 LOG(1, 0, 0, "Bad AS sent by BGP peer %s (got %d, "
951 "expected %d)\n", peer->name, (int) htons(data.as),
952 (int) peer->as);
953
954 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_BAD_AS);
955 return 0;
956 }
957
958 if ((hold = ntohs(data.hold_time)) < 3)
959 {
960 LOG(1, 0, 0, "Bad hold time (%d) from BGP peer %s\n",
961 hold, peer->name);
962
963 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_HOLD_TIME);
964 return 0;
965 }
966
967 /* pick lowest hold time */
968 if (hold < peer->hold)
969 peer->hold = hold;
970
971 /* adjust our keepalive based on negotiated hold value */
972 if (peer->keepalive * 3 > peer->hold)
973 peer->keepalive = peer->hold / 3;
974
975 /* check for optional parameters */
976 /* 2 is for the size of type + len (both uint8_t) */
977 for (param_offset = 0;
978 param_offset < data.opt_len;
979 param_offset += 2 + param->len)
980 {
981 param = (struct bgp_opt_param *)(&data.opt_params + param_offset);
982
983 /* sensible check */
984 if (data.opt_len - param_offset < 2
985 || param->len > data.opt_len - param_offset - 2)
986 {
987 LOG(1, 0, 0, "Malformed Optional Parameter list from BGP peer %s\n",
988 peer->name);
989
990 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_UNSPEC);
991 return 0;
992 }
993
994 /* we know only one parameter type */
995 if (param->type != BGP_CAPABILITY_PARAM_TYPE)
996 {
997 LOG(1, 0, 0, "Unsupported Optional Parameter type %d from BGP peer %s\n",
998 param->type, peer->name);
999
1000 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_UNSUP_PARAM);
1001 return 0;
1002 }
1003
1004 capabilities_len = param->len;
1005 capabilities = (char *)&param->value;
1006 }
1007
1008 /* look for BGP multiprotocol capability */
1009 if (capabilities)
1010 {
1011 for (capability_offset = 0;
1012 capability_offset < capabilities_len;
1013 capability_offset += 2 + capability->len)
1014 {
1015 capability = (struct bgp_capability *)(capabilities + capability_offset);
1016
1017 /* sensible check */
1018 if (capabilities_len - capability_offset < 2
1019 || capability->len > capabilities_len - capability_offset - 2)
1020 {
1021 LOG(1, 0, 0, "Malformed Capabilities list from BGP peer %s\n",
1022 peer->name);
1023
1024 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_UNSPEC);
1025 return 0;
1026 }
1027
1028 /* we only know one capability code */
1029 if (capability->code != BGP_CAP_CODE_MP
1030 && capability->len != sizeof(struct bgp_mp_cap_param))
1031 {
1032 LOG(4, 0, 0, "Unsupported Capability code %d from BGP peer %s\n",
1033 capability->code, peer->name);
1034
1035 bgp_send_notification_full(peer, BGP_ERR_OPEN, BGP_ERR_OPN_UNSUP_CAP,
1036 capability, 2 + capability->len);
1037 /* we don't terminate, still; we just jump to the next one */
1038 continue;
1039 }
1040
1041 mp_cap = (struct bgp_mp_cap_param *)&capability->value;
1042 /* the only <AFI, SAFI> tuple we support */
1043 if (mp_cap->afi != AF_INET6 && mp_cap->safi != BGP_MP_SAFI_UNICAST)
1044 {
1045 LOG(4, 0, 0, "Unsupported multiprotocol AFI %d and SAFI %d from BGP peer %s\n",
1046 mp_cap->afi, mp_cap->safi, peer->name);
1047
1048 bgp_send_notification_full(peer, BGP_ERR_OPEN, BGP_ERR_OPN_UNSUP_CAP,
1049 capability, 2 + capability->len);
1050 /* we don't terminate, still; we just jump to the next one */
1051 continue;
1052 }
1053
1054 }
1055 }
1056
1057 /* next transition requires an exchange of keepalives */
1058 bgp_send_keepalive(peer);
1059 }
1060
1061 break;
1062
1063 case BGP_MSG_KEEPALIVE:
1064 if (peer->state == OpenConfirm)
1065 {
1066 peer->state = peer->next_state = Established;
1067 peer->state_time = time_now;
1068 peer->keepalive_time = time_now + peer->keepalive;
1069 peer->update_routes = 1;
1070 peer->retry_count = 0;
1071 peer->retry_time = 0;
1072
1073 LOG(4, 0, 0, "BGP peer %s: state Established\n", peer->name);
1074 }
1075
1076 break;
1077
1078 case BGP_MSG_NOTIFICATION:
1079 if (len > sizeof(p->header))
1080 {
1081 struct bgp_data_notification *notification =
1082 (struct bgp_data_notification *) p->data;
1083
1084 if (notification->error_code == BGP_ERR_CEASE)
1085 {
1086 LOG(4, 0, 0, "BGP peer %s sent CEASE\n", peer->name);
1087 bgp_restart(peer);
1088 return 0;
1089 }
1090
1091 /* FIXME: should handle more notifications */
1092 LOG(4, 0, 0, "BGP peer %s sent unhandled NOTIFICATION %d\n",
1093 peer->name, (int) notification->error_code);
1094 }
1095
1096 break;
1097 }
1098
1099 /* reset timer */
1100 peer->expire_time = time_now + peer->hold;
1101
1102 /* see if there's another message in the same packet/buffer */
1103 if (peer->inbuf->done > len)
1104 {
1105 peer->inbuf->done -= len;
1106 memmove(p, (char *) p + len, peer->inbuf->done);
1107 }
1108 else
1109 {
1110 peer->inbuf->packet.header.len = 0;
1111 peer->inbuf->done = 0;
1112 }
1113
1114 return peer->inbuf->done;
1115 }
1116
1117 /* send/buffer OPEN message */
1118 static int bgp_send_open(struct bgp_peer *peer)
1119 {
1120 struct bgp_data_open data;
1121 uint16_t len = sizeof(peer->outbuf->packet.header);
1122
1123 memset(peer->outbuf->packet.header.marker, 0xff,
1124 sizeof(peer->outbuf->packet.header.marker));
1125
1126 peer->outbuf->packet.header.type = BGP_MSG_OPEN;
1127
1128 data.version = BGP_VERSION;
1129 data.as = htons(our_as);
1130 data.hold_time = htons(peer->hold);
1131 data.identifier = my_address;
1132 data.opt_len = 0;
1133
1134 memcpy(peer->outbuf->packet.data, &data, BGP_DATA_OPEN_SIZE);
1135 len += BGP_DATA_OPEN_SIZE;
1136
1137 peer->outbuf->packet.header.len = htons(len);
1138 peer->outbuf->done = 0;
1139 peer->next_state = OpenSent;
1140
1141 return bgp_write(peer);
1142 }
1143
1144 /* send/buffer KEEPALIVE message */
1145 static int bgp_send_keepalive(struct bgp_peer *peer)
1146 {
1147 memset(peer->outbuf->packet.header.marker, 0xff,
1148 sizeof(peer->outbuf->packet.header.marker));
1149
1150 peer->outbuf->packet.header.type = BGP_MSG_KEEPALIVE;
1151 peer->outbuf->packet.header.len =
1152 htons(sizeof(peer->outbuf->packet.header));
1153
1154 peer->outbuf->done = 0;
1155 peer->next_state = (peer->state == OpenSent) ? OpenConfirm : peer->state;
1156
1157 return bgp_write(peer);
1158 }
1159
1160 /* send/buffer UPDATE message */
1161 static int bgp_send_update(struct bgp_peer *peer)
1162 {
1163 uint16_t unf_len = 0;
1164 uint16_t attr_len;
1165 uint16_t len = sizeof(peer->outbuf->packet.header);
1166 struct bgp_route_list *have = peer->routes;
1167 struct bgp_route_list *want = peer->routing ? bgp_routes : 0;
1168 struct bgp_route_list *e = 0;
1169 struct bgp_route_list *add = 0;
1170 int s;
1171
1172 char *data = (char *) &peer->outbuf->packet.data;
1173
1174 /* need leave room for attr_len, bgp_path_attrs and one prefix */
1175 char *max = (char *) &peer->outbuf->packet.data
1176 + sizeof(peer->outbuf->packet.data)
1177 - sizeof(attr_len) - peer->path_attr_len - sizeof(struct bgp_ip_prefix);
1178
1179 /* skip over unf_len */
1180 data += sizeof(unf_len);
1181 len += sizeof(unf_len);
1182
1183 memset(peer->outbuf->packet.header.marker, 0xff,
1184 sizeof(peer->outbuf->packet.header.marker));
1185
1186 peer->outbuf->packet.header.type = BGP_MSG_UPDATE;
1187
1188 peer->update_routes = 0; /* tentatively clear */
1189
1190 /* find differences */
1191 while ((have || want) && data < (max - sizeof(struct bgp_ip_prefix)))
1192 {
1193 if (have)
1194 s = want
1195 ? memcmp(&have->dest, &want->dest, sizeof(have->dest))
1196 : -1;
1197 else
1198 s = 1;
1199
1200 if (s < 0) /* found one to delete */
1201 {
1202 struct bgp_route_list *tmp = have;
1203 have = have->next;
1204
1205 s = BGP_IP_PREFIX_SIZE(tmp->dest);
1206 memcpy(data, &tmp->dest, s);
1207 data += s;
1208 unf_len += s;
1209 len += s;
1210
1211 LOG(5, 0, 0, "Withdrawing route %s/%d from BGP peer %s\n",
1212 fmtaddr(tmp->dest.prefix, 0), tmp->dest.len, peer->name);
1213
1214 free(tmp);
1215
1216 if (e)
1217 e->next = have;
1218 else
1219 peer->routes = have;
1220 }
1221 else
1222 {
1223 if (!s) /* same */
1224 {
1225 e = have; /* stash the last found to relink above */
1226 have = have->next;
1227 want = want->next;
1228 }
1229 else if (s > 0) /* addition reqd. */
1230 {
1231 if (add)
1232 {
1233 peer->update_routes = 1; /* only one add per packet */
1234 if (!have)
1235 break;
1236 }
1237 else
1238 add = want;
1239
1240 if (want)
1241 want = want->next;
1242 }
1243 }
1244 }
1245
1246 if (have || want)
1247 peer->update_routes = 1; /* more to do */
1248
1249 /* anything changed? */
1250 if (!(unf_len || add))
1251 return 1;
1252
1253 /* go back and insert unf_len */
1254 unf_len = htons(unf_len);
1255 memcpy(&peer->outbuf->packet.data, &unf_len, sizeof(unf_len));
1256
1257 if (add)
1258 {
1259 if (!(e = malloc(sizeof(*e))))
1260 {
1261 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
1262 fmtaddr(add->dest.prefix, 0), add->dest.len, strerror(errno));
1263
1264 return 0;
1265 }
1266
1267 memcpy(e, add, sizeof(*e));
1268 e->next = 0;
1269 peer->routes = bgp_insert_route(peer->routes, e);
1270
1271 attr_len = htons(peer->path_attr_len);
1272 memcpy(data, &attr_len, sizeof(attr_len));
1273 data += sizeof(attr_len);
1274 len += sizeof(attr_len);
1275
1276 memcpy(data, peer->path_attrs, peer->path_attr_len);
1277 data += peer->path_attr_len;
1278 len += peer->path_attr_len;
1279
1280 s = BGP_IP_PREFIX_SIZE(add->dest);
1281 memcpy(data, &add->dest, s);
1282 data += s;
1283 len += s;
1284
1285 LOG(5, 0, 0, "Advertising route %s/%d to BGP peer %s\n",
1286 fmtaddr(add->dest.prefix, 0), add->dest.len, peer->name);
1287 }
1288 else
1289 {
1290 attr_len = 0;
1291 memcpy(data, &attr_len, sizeof(attr_len));
1292 data += sizeof(attr_len);
1293 len += sizeof(attr_len);
1294 }
1295
1296 peer->outbuf->packet.header.len = htons(len);
1297 peer->outbuf->done = 0;
1298
1299 return bgp_write(peer);
1300 }
1301
1302 /* send/buffer NOTIFICATION message */
1303 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
1304 uint8_t subcode)
1305 {
1306 return bgp_send_notification_full(peer, code, subcode, NULL, 0);
1307 }
1308
1309 static int bgp_send_notification_full(struct bgp_peer *peer, uint8_t code,
1310 uint8_t subcode, char *notification_data, uint16_t data_len)
1311 {
1312 struct bgp_data_notification data;
1313 uint16_t len = 0;
1314
1315 data.error_code = code;
1316 len += sizeof(data.error_code);
1317
1318 data.error_subcode = subcode;
1319 len += sizeof(data.error_code);
1320
1321 memcpy(data.data, notification_data, data_len);
1322 len += data_len;
1323
1324 memset(peer->outbuf->packet.header.marker, 0xff,
1325 sizeof(peer->outbuf->packet.header.marker));
1326
1327 peer->outbuf->packet.header.type = BGP_MSG_NOTIFICATION;
1328 peer->outbuf->packet.header.len =
1329 htons(sizeof(peer->outbuf->packet.header) + len);
1330
1331 memcpy(peer->outbuf->packet.data, &data, len);
1332
1333 peer->outbuf->done = 0;
1334 peer->next_state = code == BGP_ERR_CEASE ? Disabled : Idle;
1335
1336 /* we're dying; ignore any pending input */
1337 peer->inbuf->packet.header.len = 0;
1338 peer->inbuf->done = 0;
1339
1340 return bgp_write(peer);
1341 }