d7701bed9155febfff262fccfc00c879da5b15f1
[l2tpns.git] / bgp.c
1 /*
2 * BGPv4
3 * Used to advertise routes for upstream (l2tp port, rather than gratiutious
4 * arp) and downstream--allowing routers to load-balance both.
5 *
6 * Implementation limitations:
7 * - We never listen for incoming connections (session always initiated by us).
8 * - Any routes advertised by the peer are accepted, but ignored.
9 * - No password support; neither RFC1771 (which no-one seems to do anyway)
10 * nor RFC2385 (which requires a kernel patch on 2.4 kernels).
11 */
12
13 char const *cvs_id_bgp = "$Id: bgp.c,v 1.12 2005/09/02 23:39:36 bodea Exp $";
14
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <string.h>
18 #include <time.h>
19 #include <errno.h>
20 #include <sys/socket.h>
21 #include <netinet/in.h>
22 #include <arpa/inet.h>
23 #include <netdb.h>
24 #include <fcntl.h>
25
26 #include "l2tpns.h"
27 #include "bgp.h"
28 #include "util.h"
29
30 static void bgp_clear(struct bgp_peer *peer);
31 static void bgp_set_retry(struct bgp_peer *peer);
32 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx);
33 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
34 struct bgp_route_list *new);
35
36 static void bgp_free_routes(struct bgp_route_list *routes);
37 static char const *bgp_msg_type_str(uint8_t type);
38 static int bgp_connect(struct bgp_peer *peer);
39 static int bgp_handle_connect(struct bgp_peer *peer);
40 static int bgp_write(struct bgp_peer *peer);
41 static int bgp_read(struct bgp_peer *peer);
42 static int bgp_handle_input(struct bgp_peer *peer);
43 static int bgp_send_open(struct bgp_peer *peer);
44 static int bgp_send_keepalive(struct bgp_peer *peer);
45 static int bgp_send_update(struct bgp_peer *peer);
46 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
47 uint8_t subcode);
48
49 static uint16_t our_as;
50 static struct bgp_route_list *bgp_routes = 0;
51
52 int bgp_configured = 0;
53 struct bgp_peer *bgp_peers = 0;
54
55 /* prepare peer structure, globals */
56 int bgp_setup(int as)
57 {
58 int i;
59 struct bgp_peer *peer;
60
61 for (i = 0; i < BGP_NUM_PEERS; i++)
62 {
63 peer = &bgp_peers[i];
64 memset(peer, 0, sizeof(*peer));
65
66 peer->addr = INADDR_NONE;
67 peer->sock = -1;
68 peer->state = peer->next_state = Disabled;
69
70 if (!((peer->outbuf = malloc(sizeof(*peer->outbuf)))
71 && (peer->inbuf = malloc(sizeof(*peer->inbuf)))))
72 {
73 LOG(0, 0, 0, "Can't allocate buffers for bgp peer (%s)\n",
74 strerror(errno));
75
76 return 0;
77 }
78
79 peer->edata.type = FD_TYPE_BGP;
80 peer->edata.index = i;
81 peer->events = 0;
82 }
83
84 if (as < 1)
85 as = 0;
86
87 if ((our_as = as))
88 return 0;
89
90 bgp_routes = 0;
91 bgp_configured = 0; /* set by bgp_start */
92
93 return 1;
94 }
95
96 /* start connection with a peer */
97 int bgp_start(struct bgp_peer *peer, char *name, int as, int keepalive,
98 int hold, int enable)
99 {
100 struct hostent *h;
101 int ibgp;
102 int i;
103 struct bgp_path_attr a;
104 char path_attrs[64];
105 char *p = path_attrs;
106 in_addr_t ip;
107 uint32_t metric = htonl(BGP_METRIC);
108 uint32_t no_export = htonl(BGP_COMMUNITY_NO_EXPORT);
109
110 if (!our_as)
111 return 0;
112
113 if (peer->state != Disabled)
114 bgp_halt(peer);
115
116 snprintf(peer->name, sizeof(peer->name), "%s", name);
117
118 if (!(h = gethostbyname(name)) || h->h_addrtype != AF_INET)
119 {
120 LOG(0, 0, 0, "Can't get address for BGP peer %s (%s)\n",
121 name, h ? "no address" : hstrerror(h_errno));
122
123 return 0;
124 }
125
126 memcpy(&peer->addr, h->h_addr, sizeof(peer->addr));
127 peer->as = as > 0 ? as : our_as;
128 ibgp = peer->as == our_as;
129
130 /* set initial timer values */
131 peer->init_keepalive = keepalive == -1 ? BGP_KEEPALIVE_TIME : keepalive;
132 peer->init_hold = hold == -1 ? BGP_HOLD_TIME : hold;
133
134 if (peer->init_hold < 3)
135 peer->init_hold = 3;
136
137 if (peer->init_keepalive * 3 > peer->init_hold)
138 peer->init_keepalive = peer->init_hold / 3;
139
140 /* clear buffers, go to Idle state */
141 peer->next_state = Idle;
142 bgp_clear(peer);
143
144 /* set initial routing state */
145 peer->routing = enable;
146
147 /* all our routes use the same attributes, so prepare it in advance */
148 if (peer->path_attrs)
149 free(peer->path_attrs);
150
151 peer->path_attr_len = 0;
152
153 /* ORIGIN */
154 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
155 a.code = BGP_PATH_ATTR_CODE_ORIGIN;
156 a.data.s.len = 1;
157 a.data.s.value[0] = BGP_PATH_ATTR_CODE_ORIGIN_IGP;
158
159 #define ADD_ATTRIBUTE() do { \
160 i = BGP_PATH_ATTR_SIZE(a); \
161 memcpy(p, &a, i); \
162 p += i; \
163 peer->path_attr_len += i; } while (0)
164
165 ADD_ATTRIBUTE();
166
167 /* AS_PATH */
168 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
169 a.code = BGP_PATH_ATTR_CODE_AS_PATH;
170 if (ibgp)
171 {
172 /* empty path */
173 a.data.s.len = 0;
174 }
175 else
176 {
177 /* just our AS */
178 struct {
179 uint8_t type;
180 uint8_t len;
181 uint16_t value;
182 } as_path = {
183 BGP_PATH_ATTR_CODE_AS_PATH_AS_SEQUENCE,
184 1,
185 htons(our_as),
186 };
187
188 a.data.s.len = sizeof(as_path);
189 memcpy(&a.data.s.value, &as_path, sizeof(as_path));
190 }
191
192 ADD_ATTRIBUTE();
193
194 /* NEXT_HOP */
195 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
196 a.code = BGP_PATH_ATTR_CODE_NEXT_HOP;
197 ip = my_address; /* we're it */
198 a.data.s.len = sizeof(ip);
199 memcpy(a.data.s.value, &ip, sizeof(ip));
200
201 ADD_ATTRIBUTE();
202
203 /* MULTI_EXIT_DISC */
204 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL;
205 a.code = BGP_PATH_ATTR_CODE_MULTI_EXIT_DISC;
206 a.data.s.len = sizeof(metric);
207 memcpy(a.data.s.value, &metric, sizeof(metric));
208
209 ADD_ATTRIBUTE();
210
211 if (ibgp)
212 {
213 uint32_t local_pref = htonl(BGP_LOCAL_PREF);
214
215 /* LOCAL_PREF */
216 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
217 a.code = BGP_PATH_ATTR_CODE_LOCAL_PREF;
218 a.data.s.len = sizeof(local_pref);
219 memcpy(a.data.s.value, &local_pref, sizeof(local_pref));
220
221 ADD_ATTRIBUTE();
222 }
223
224 /* COMMUNITIES */
225 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL | BGP_PATH_ATTR_FLAG_TRANS;
226 a.code = BGP_PATH_ATTR_CODE_COMMUNITIES;
227 a.data.s.len = sizeof(no_export);
228 memcpy(a.data.s.value, &no_export, sizeof(no_export));
229
230 ADD_ATTRIBUTE();
231
232 if (!(peer->path_attrs = malloc(peer->path_attr_len)))
233 {
234 LOG(0, 0, 0, "Can't allocate path_attrs for %s (%s)\n",
235 name, strerror(errno));
236
237 return 0;
238 }
239
240 memcpy(peer->path_attrs, path_attrs, peer->path_attr_len);
241
242 LOG(4, 0, 0, "Initiating BGP connection to %s (routing %s)\n",
243 name, enable ? "enabled" : "suspended");
244
245 /* we have at least one peer configured */
246 bgp_configured = 1;
247
248 /* connect */
249 return bgp_connect(peer);
250 }
251
252 /* clear counters, timers, routes and buffers; close socket; move to
253 next_state, which may be Disabled or Idle */
254 static void bgp_clear(struct bgp_peer *peer)
255 {
256 if (peer->sock != -1)
257 {
258 close(peer->sock);
259 peer->sock = -1;
260 }
261
262 peer->keepalive_time = 0;
263 peer->expire_time = 0;
264
265 peer->keepalive = peer->init_keepalive;
266 peer->hold = peer->init_hold;
267
268 bgp_free_routes(peer->routes);
269 peer->routes = 0;
270
271 peer->outbuf->packet.header.len = 0;
272 peer->outbuf->done = 0;
273 peer->inbuf->packet.header.len = 0;
274 peer->inbuf->done = 0;
275
276 peer->cli_flag = 0;
277 peer->events = 0;
278
279 if (peer->state != peer->next_state)
280 {
281 peer->state = peer->next_state;
282 peer->state_time = time_now;
283
284 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
285 bgp_state_str(peer->next_state));
286 }
287 }
288
289 /* initiate a clean shutdown */
290 void bgp_stop(struct bgp_peer *peer)
291 {
292 LOG(4, 0, 0, "Terminating BGP connection to %s\n", peer->name);
293 bgp_send_notification(peer, BGP_ERR_CEASE, 0);
294 }
295
296 /* drop connection (if any) and set state to Disabled */
297 void bgp_halt(struct bgp_peer *peer)
298 {
299 LOG(4, 0, 0, "Aborting BGP connection to %s\n", peer->name);
300 peer->next_state = Disabled;
301 bgp_clear(peer);
302 }
303
304 /* drop connection (if any) and set to Idle for connection retry */
305 int bgp_restart(struct bgp_peer *peer)
306 {
307 peer->next_state = Idle;
308 bgp_clear(peer);
309
310 /* restart now */
311 peer->retry_time = time_now;
312 peer->retry_count = 0;
313
314 /* connect */
315 return bgp_connect(peer);
316 }
317
318 static void bgp_set_retry(struct bgp_peer *peer)
319 {
320 if (peer->retry_count++ < BGP_MAX_RETRY)
321 {
322 peer->retry_time = time_now + (BGP_RETRY_BACKOFF * peer->retry_count);
323 peer->next_state = Idle;
324 bgp_clear(peer);
325 }
326 else
327 bgp_halt(peer); /* give up */
328 }
329
330 /* convert ip/mask to CIDR notation */
331 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx)
332 {
333 int i;
334 uint32_t b;
335
336 /* convert to prefix notation */
337 pfx->len = 32;
338 pfx->prefix = ip;
339
340 if (!mask) /* bogus */
341 mask = 0xffffffff;
342
343 for (i = 0; i < 32 && ((b = ntohl(1 << i)), !(mask & b)); i++)
344 {
345 pfx->len--;
346 pfx->prefix &= ~b;
347 }
348 }
349
350 /* insert route into list; sorted */
351 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
352 struct bgp_route_list *new)
353 {
354 struct bgp_route_list *p = head;
355 struct bgp_route_list *e = 0;
356
357 while (p && memcmp(&p->dest, &new->dest, sizeof(p->dest)) < 0)
358 {
359 e = p;
360 p = p->next;
361 }
362
363 if (e)
364 {
365 new->next = e->next;
366 e->next = new;
367 }
368 else
369 {
370 new->next = head;
371 head = new;
372 }
373
374 return head;
375 }
376
377 /* add route to list for peers */
378 /*
379 * Note: this doesn't do route aggregation, nor drop routes if a less
380 * specific match already exists (partly because I'm lazy, but also so
381 * that if that route is later deleted we don't have to be concerned
382 * about adding back the more specific one).
383 */
384 int bgp_add_route(in_addr_t ip, in_addr_t mask)
385 {
386 struct bgp_route_list *r = bgp_routes;
387 struct bgp_route_list add;
388 int i;
389
390 bgp_cidr(ip, mask, &add.dest);
391 add.next = 0;
392
393 /* check for duplicate */
394 while (r)
395 {
396 i = memcmp(&r->dest, &add.dest, sizeof(r->dest));
397 if (!i)
398 return 1; /* already covered */
399
400 if (i > 0)
401 break;
402
403 r = r->next;
404 }
405
406 /* insert into route list; sorted */
407 if (!(r = malloc(sizeof(*r))))
408 {
409 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
410 fmtaddr(add.dest.prefix, 0), add.dest.len, strerror(errno));
411
412 return 0;
413 }
414
415 memcpy(r, &add, sizeof(*r));
416 bgp_routes = bgp_insert_route(bgp_routes, r);
417
418 /* flag established peers for update */
419 for (i = 0; i < BGP_NUM_PEERS; i++)
420 if (bgp_peers[i].state == Established)
421 bgp_peers[i].update_routes = 1;
422
423 LOG(4, 0, 0, "Registered BGP route %s/%d\n",
424 fmtaddr(add.dest.prefix, 0), add.dest.len);
425
426 return 1;
427 }
428
429 /* remove route from list for peers */
430 int bgp_del_route(in_addr_t ip, in_addr_t mask)
431 {
432 struct bgp_route_list *r = bgp_routes;
433 struct bgp_route_list *e = 0;
434 struct bgp_route_list del;
435 int i;
436
437 bgp_cidr(ip, mask, &del.dest);
438 del.next = 0;
439
440 /* find entry in routes list and remove */
441 while (r)
442 {
443 i = memcmp(&r->dest, &del.dest, sizeof(r->dest));
444 if (!i)
445 {
446 if (e)
447 e->next = r->next;
448 else
449 bgp_routes = r->next;
450
451 free(r);
452 break;
453 }
454
455 e = r;
456
457 if (i > 0)
458 r = 0; /* stop */
459 else
460 r = r->next;
461 }
462
463 /* not found */
464 if (!r)
465 return 1;
466
467 /* flag established peers for update */
468 for (i = 0; i < BGP_NUM_PEERS; i++)
469 if (bgp_peers[i].state == Established)
470 bgp_peers[i].update_routes = 1;
471
472 LOG(4, 0, 0, "Removed BGP route %s/%d\n",
473 fmtaddr(del.dest.prefix, 0), del.dest.len);
474
475 return 1;
476 }
477
478 /* enable or disable routing */
479 void bgp_enable_routing(int enable)
480 {
481 int i;
482
483 for (i = 0; i < BGP_NUM_PEERS; i++)
484 {
485 bgp_peers[i].routing = enable;
486
487 /* flag established peers for update */
488 if (bgp_peers[i].state == Established)
489 bgp_peers[i].update_routes = 1;
490 }
491
492 LOG(4, 0, 0, "%s BGP routing\n", enable ? "Enabled" : "Suspended");
493 }
494
495 #ifdef HAVE_EPOLL
496 # include <sys/epoll.h>
497 #else
498 # include "fake_epoll.h"
499 #endif
500
501 /* return a bitmask of the events required to poll this peer's fd */
502 int bgp_set_poll()
503 {
504 int i;
505
506 if (!bgp_configured)
507 return 0;
508
509 for (i = 0; i < BGP_NUM_PEERS; i++)
510 {
511 struct bgp_peer *peer = &bgp_peers[i];
512 int events = 0;
513
514 if (peer->state == Disabled || peer->state == Idle)
515 continue;
516
517 if (peer->inbuf->done < BGP_MAX_PACKET_SIZE)
518 events |= EPOLLIN;
519
520 if (peer->state == Connect || /* connection in progress */
521 peer->update_routes || /* routing updates */
522 peer->outbuf->packet.header.len) /* pending output */
523 events |= EPOLLOUT;
524
525 if (peer->events != events)
526 {
527 struct epoll_event ev;
528
529 ev.events = peer->events = events;
530 ev.data.ptr = &peer->edata;
531 epoll_ctl(epollfd, EPOLL_CTL_MOD, peer->sock, &ev);
532 }
533 }
534
535 return 1;
536 }
537
538 /* process bgp events/timers */
539 int bgp_process(uint32_t events[])
540 {
541 int i;
542
543 if (!bgp_configured)
544 return 0;
545
546 for (i = 0; i < BGP_NUM_PEERS; i++)
547 {
548 struct bgp_peer *peer = &bgp_peers[i];
549
550 if (*peer->name && peer->cli_flag == BGP_CLI_RESTART)
551 {
552 bgp_restart(peer);
553 continue;
554 }
555
556 if (peer->state == Disabled)
557 continue;
558
559 if (peer->cli_flag)
560 {
561 switch (peer->cli_flag)
562 {
563 case BGP_CLI_SUSPEND:
564 if (peer->routing)
565 {
566 peer->routing = 0;
567 if (peer->state == Established)
568 peer->update_routes = 1;
569 }
570
571 break;
572
573 case BGP_CLI_ENABLE:
574 if (!peer->routing)
575 {
576 peer->routing = 1;
577 if (peer->state == Established)
578 peer->update_routes = 1;
579 }
580
581 break;
582 }
583
584 peer->cli_flag = 0;
585 }
586
587 /* handle empty/fill of buffers */
588 if (events[i] & EPOLLOUT)
589 {
590 int r = 1;
591 if (peer->state == Connect)
592 r = bgp_handle_connect(peer);
593 else if (peer->outbuf->packet.header.len)
594 r = bgp_write(peer);
595
596 if (!r)
597 continue;
598 }
599
600 if (events[i] & (EPOLLIN|EPOLLHUP))
601 {
602 if (!bgp_read(peer))
603 continue;
604 }
605
606 /* process input buffer contents */
607 while (peer->inbuf->done >= sizeof(peer->inbuf->packet.header)
608 && !peer->outbuf->packet.header.len) /* may need to queue a response */
609 {
610 if (bgp_handle_input(peer) < 0)
611 continue;
612 }
613
614 /* process pending updates */
615 if (peer->update_routes
616 && !peer->outbuf->packet.header.len) /* ditto */
617 {
618 if (!bgp_send_update(peer))
619 continue;
620 }
621
622 /* process timers */
623 if (peer->state == Established)
624 {
625 if (time_now > peer->expire_time)
626 {
627 LOG(1, 0, 0, "No message from BGP peer %s in %ds\n",
628 peer->name, peer->hold);
629
630 bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0);
631 continue;
632 }
633
634 if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len)
635 bgp_send_keepalive(peer);
636 }
637 else if (peer->state == Idle)
638 {
639 if (time_now > peer->retry_time)
640 bgp_connect(peer);
641 }
642 else if (time_now > peer->state_time + BGP_STATE_TIME)
643 {
644 LOG(1, 0, 0, "%s timer expired for BGP peer %s\n",
645 bgp_state_str(peer->state), peer->name);
646
647 bgp_restart(peer);
648 }
649 }
650
651 return 1;
652 }
653
654 static void bgp_free_routes(struct bgp_route_list *routes)
655 {
656 struct bgp_route_list *tmp;
657
658 while ((tmp = routes))
659 {
660 routes = tmp->next;
661 free(tmp);
662 }
663 }
664
665 char const *bgp_state_str(enum bgp_state state)
666 {
667 switch (state)
668 {
669 case Disabled: return "Disabled";
670 case Idle: return "Idle";
671 case Connect: return "Connect";
672 case Active: return "Active";
673 case OpenSent: return "OpenSent";
674 case OpenConfirm: return "OpenConfirm";
675 case Established: return "Established";
676 }
677
678 return "?";
679 }
680
681 static char const *bgp_msg_type_str(uint8_t type)
682 {
683 switch (type)
684 {
685 case BGP_MSG_OPEN: return "OPEN";
686 case BGP_MSG_UPDATE: return "UPDATE";
687 case BGP_MSG_NOTIFICATION: return "NOTIFICATION";
688 case BGP_MSG_KEEPALIVE: return "KEEPALIVE";
689 }
690
691 return "?";
692 }
693
694 /* attempt to connect to peer */
695 static int bgp_connect(struct bgp_peer *peer)
696 {
697 static int bgp_port = 0;
698 struct sockaddr_in addr;
699 struct epoll_event ev;
700
701 if (!bgp_port)
702 {
703 struct servent *serv;
704 if (!(serv = getservbyname("bgp", "tcp")))
705 {
706 LOG(0, 0, 0, "Can't get bgp service (%s)\n", strerror(errno));
707 return 0;
708 }
709
710 bgp_port = serv->s_port;
711 }
712
713 if ((peer->sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
714 {
715 LOG(0, 0, 0, "Can't create a socket for BGP peer %s (%s)\n",
716 peer->name, strerror(errno));
717
718 peer->state = peer->next_state = Disabled;
719 return 0;
720 }
721
722 /* add to poll set */
723 ev.events = peer->events = EPOLLOUT;
724 ev.data.ptr = &peer->edata;
725 epoll_ctl(epollfd, EPOLL_CTL_ADD, peer->sock, &ev);
726
727 /* set to non-blocking */
728 fcntl(peer->sock, F_SETFL, fcntl(peer->sock, F_GETFL, 0) | O_NONBLOCK);
729
730 /* try connect */
731 memset(&addr, 0, sizeof(addr));
732 addr.sin_family = AF_INET;
733 addr.sin_port = bgp_port;
734 addr.sin_addr.s_addr = peer->addr;
735
736 while (connect(peer->sock, (struct sockaddr *) &addr, sizeof(addr)) == -1)
737 {
738 if (errno == EINTR) /* SIGALARM handler */
739 continue;
740
741 if (errno != EINPROGRESS)
742 {
743 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n",
744 inet_ntoa(addr.sin_addr), strerror(errno));
745
746 bgp_set_retry(peer);
747 return 0;
748 }
749
750 peer->state = Connect;
751 peer->state_time = time_now;
752
753 LOG(4, 0, 0, "BGP peer %s: state Connect\n", peer->name);
754 return 1;
755 }
756
757 peer->state = Active;
758 peer->state_time = time_now;
759 peer->retry_time = peer->retry_count = 0;
760
761 LOG(4, 0, 0, "BGP peer %s: state Active\n", inet_ntoa(addr.sin_addr));
762
763 return bgp_send_open(peer);
764 }
765
766 /* complete partial connection (state = Connect) */
767 static int bgp_handle_connect(struct bgp_peer *peer)
768 {
769 int err = 0;
770 socklen_t len = sizeof(int);
771 getsockopt(peer->sock, SOL_SOCKET, SO_ERROR, &err, &len);
772 if (err)
773 {
774 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n", peer->name,
775 strerror(err));
776
777 bgp_set_retry(peer);
778 return 0;
779 }
780
781 peer->state = Active;
782 peer->state_time = time_now;
783
784 LOG(4, 0, 0, "BGP peer %s: state Active\n", peer->name);
785
786 return bgp_send_open(peer);
787 }
788
789 /* initiate a write */
790 static int bgp_write(struct bgp_peer *peer)
791 {
792 int len = htons(peer->outbuf->packet.header.len);
793 int r;
794
795 while ((r = write(peer->sock, &peer->outbuf->packet + peer->outbuf->done,
796 len - peer->outbuf->done)) == -1)
797 {
798 if (errno == EINTR)
799 continue;
800
801 if (errno == EAGAIN)
802 return 1;
803
804 if (errno == EPIPE)
805 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
806 else
807 LOG(1, 0, 0, "Can't write to BGP peer %s (%s)\n", peer->name,
808 strerror(errno));
809
810 bgp_set_retry(peer);
811 return 0;
812 }
813
814 if (r < len)
815 {
816 peer->outbuf->done += r;
817 return 1;
818 }
819
820 LOG(4, 0, 0, "Sent %s to BGP peer %s\n",
821 bgp_msg_type_str(peer->outbuf->packet.header.type), peer->name);
822
823 peer->outbuf->packet.header.len = 0;
824 peer->outbuf->done = 0;
825
826 if (peer->state == Established)
827 peer->keepalive_time = time_now + peer->keepalive;
828
829 if (peer->state != peer->next_state)
830 {
831 if (peer->next_state == Disabled || peer->next_state == Idle)
832 {
833 bgp_clear(peer);
834 return 0;
835 }
836
837 peer->state = peer->next_state;
838 peer->state_time = time_now;
839
840 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
841 bgp_state_str(peer->state));
842 }
843
844 return 1;
845 }
846
847 /* initiate a read */
848 static int bgp_read(struct bgp_peer *peer)
849 {
850 int r;
851
852 while ((r = read(peer->sock, &peer->inbuf->packet + peer->inbuf->done,
853 BGP_MAX_PACKET_SIZE - peer->inbuf->done)) < 1)
854 {
855 if (!r)
856 {
857 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
858 }
859 else
860 {
861 if (errno == EINTR)
862 continue;
863
864 if (errno == EAGAIN)
865 return 1;
866
867 LOG(1, 0, 0, "Can't read from BGP peer %s (%s)\n", peer->name,
868 strerror(errno));
869 }
870
871 bgp_set_retry(peer);
872 return 0;
873 }
874
875 peer->inbuf->done += r;
876 return 1;
877 }
878
879 /* process buffered packets */
880 static int bgp_handle_input(struct bgp_peer *peer)
881 {
882 struct bgp_packet *p = &peer->inbuf->packet;
883 int len = ntohs(p->header.len);
884
885 if (len > BGP_MAX_PACKET_SIZE)
886 {
887 LOG(1, 0, 0, "Bad header length from BGP %s\n", peer->name);
888 bgp_send_notification(peer, BGP_ERR_HEADER, BGP_ERR_HDR_BAD_LEN);
889 return 0;
890 }
891
892 if (peer->inbuf->done < len)
893 return 0;
894
895 LOG(4, 0, 0, "Received %s from BGP peer %s\n",
896 bgp_msg_type_str(p->header.type), peer->name);
897
898 switch (p->header.type)
899 {
900 case BGP_MSG_OPEN:
901 {
902 struct bgp_data_open data;
903 int hold;
904 int i;
905 off_t param_offset, capability_offset;
906 struct bgp_opt_param *param;
907 uint8_t capabilities_len;
908 char *capabilities = NULL;
909 struct bgp_capability *capability;
910
911 for (i = 0; i < sizeof(p->header.marker); i++)
912 {
913 if ((unsigned char) p->header.marker[i] != 0xff)
914 {
915 LOG(1, 0, 0, "Invalid marker from BGP peer %s\n",
916 peer->name);
917
918 bgp_send_notification(peer, BGP_ERR_HEADER,
919 BGP_ERR_HDR_NOT_SYNC);
920
921 return 0;
922 }
923 }
924
925 if (peer->state != OpenSent)
926 {
927 LOG(1, 0, 0, "OPEN from BGP peer %s in %s state\n",
928 peer->name, bgp_state_str(peer->state));
929
930 bgp_send_notification(peer, BGP_ERR_FSM, 0);
931 return 0;
932 }
933
934 memcpy(&data, p->data, len - sizeof(p->header));
935
936 if (data.version != BGP_VERSION)
937 {
938 LOG(1, 0, 0, "Bad version (%d) sent by BGP peer %s\n",
939 (int) data.version, peer->name);
940
941 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_VERSION);
942 return 0;
943 }
944
945 if (ntohs(data.as) != peer->as)
946 {
947 LOG(1, 0, 0, "Bad AS sent by BGP peer %s (got %d, "
948 "expected %d)\n", peer->name, (int) htons(data.as),
949 (int) peer->as);
950
951 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_BAD_AS);
952 return 0;
953 }
954
955 if ((hold = ntohs(data.hold_time)) < 3)
956 {
957 LOG(1, 0, 0, "Bad hold time (%d) from BGP peer %s\n",
958 hold, peer->name);
959
960 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_HOLD_TIME);
961 return 0;
962 }
963
964 /* pick lowest hold time */
965 if (hold < peer->hold)
966 peer->hold = hold;
967
968 /* adjust our keepalive based on negotiated hold value */
969 if (peer->keepalive * 3 > peer->hold)
970 peer->keepalive = peer->hold / 3;
971
972 /* check for optional parameters */
973 /* 2 is for the size of type + len (both uint8_t) */
974 for (param_offset = 0;
975 param_offset < data.opt_len;
976 param_offset += 2 + param->len)
977 {
978 param = (struct bgp_opt_param *)(&data.opt_params + param_offset);
979
980 /* sensible check */
981 if (data.opt_len - param_offset < 2
982 || param->len > data.opt_len - param_offset - 2) {
983 LOG(1, 0, 0, "Malformed Optional Parameter list from BGP peer %s\n",
984 peer->name);
985
986 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_UNSPEC);
987 return 0;
988 }
989
990 /* we know only one parameter type */
991 if (param->type != BGP_CAPABILITY_PARAM_TYPE) {
992 LOG(1, 0, 0, "Unsupported Optional Parameter type %d from BGP peer %s\n",
993 param->type, peer->name);
994
995 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_UNSUP_PARAM);
996 return 0;
997 }
998
999 capabilities_len = param->len;
1000 capabilities = (char *)&param->value;
1001 }
1002
1003 /* look for BGP multiprotocol capability */
1004 if (capabilities)
1005 {
1006 for (capability_offset = 0;
1007 capability_offset < capabilities_len;
1008 capability_offset += 2 + capability->len)
1009 {
1010 capability = (struct bgp_capability *)(capabilities + capability_offset);
1011
1012 /* sensible check */
1013 if (capabilities_len - capability_offset < 2
1014 || capability->len > capabilities_len - capability_offset - 2) {
1015 LOG(1, 0, 0, "Malformed Capabilities list from BGP peer %s\n",
1016 peer->name);
1017
1018 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_UNSPEC);
1019 return 0;
1020 }
1021
1022 /* we only know one capability code */
1023 if (capability->code != XXX) {
1024 LOG(4, 0, 0, "Unsupported Capability code %d from BGP peer %s\n",
1025 capability->code, peer->name);
1026
1027 /* TODO: send _which_ capability is unsupported */
1028 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_UNSUP_CAP);
1029 /* we don't terminate, still; we just jump to the next one */
1030 }
1031 }
1032 }
1033
1034 /* next transition requires an exchange of keepalives */
1035 bgp_send_keepalive(peer);
1036 }
1037
1038 break;
1039
1040 case BGP_MSG_KEEPALIVE:
1041 if (peer->state == OpenConfirm)
1042 {
1043 peer->state = peer->next_state = Established;
1044 peer->state_time = time_now;
1045 peer->keepalive_time = time_now + peer->keepalive;
1046 peer->update_routes = 1;
1047 peer->retry_count = 0;
1048 peer->retry_time = 0;
1049
1050 LOG(4, 0, 0, "BGP peer %s: state Established\n", peer->name);
1051 }
1052
1053 break;
1054
1055 case BGP_MSG_NOTIFICATION:
1056 if (len > sizeof(p->header))
1057 {
1058 struct bgp_data_notification *notification =
1059 (struct bgp_data_notification *) p->data;
1060
1061 if (notification->error_code == BGP_ERR_CEASE)
1062 {
1063 LOG(4, 0, 0, "BGP peer %s sent CEASE\n", peer->name);
1064 bgp_restart(peer);
1065 return 0;
1066 }
1067
1068 /* FIXME: should handle more notifications */
1069 LOG(4, 0, 0, "BGP peer %s sent unhandled NOTIFICATION %d\n",
1070 peer->name, (int) notification->error_code);
1071 }
1072
1073 break;
1074 }
1075
1076 /* reset timer */
1077 peer->expire_time = time_now + peer->hold;
1078
1079 /* see if there's another message in the same packet/buffer */
1080 if (peer->inbuf->done > len)
1081 {
1082 peer->inbuf->done -= len;
1083 memmove(p, (char *) p + len, peer->inbuf->done);
1084 }
1085 else
1086 {
1087 peer->inbuf->packet.header.len = 0;
1088 peer->inbuf->done = 0;
1089 }
1090
1091 return peer->inbuf->done;
1092 }
1093
1094 /* send/buffer OPEN message */
1095 static int bgp_send_open(struct bgp_peer *peer)
1096 {
1097 struct bgp_data_open data;
1098 uint16_t len = sizeof(peer->outbuf->packet.header);
1099
1100 memset(peer->outbuf->packet.header.marker, 0xff,
1101 sizeof(peer->outbuf->packet.header.marker));
1102
1103 peer->outbuf->packet.header.type = BGP_MSG_OPEN;
1104
1105 data.version = BGP_VERSION;
1106 data.as = htons(our_as);
1107 data.hold_time = htons(peer->hold);
1108 data.identifier = my_address;
1109 data.opt_len = 0;
1110
1111 memcpy(peer->outbuf->packet.data, &data, BGP_DATA_OPEN_SIZE);
1112 len += BGP_DATA_OPEN_SIZE;
1113
1114 peer->outbuf->packet.header.len = htons(len);
1115 peer->outbuf->done = 0;
1116 peer->next_state = OpenSent;
1117
1118 return bgp_write(peer);
1119 }
1120
1121 /* send/buffer KEEPALIVE message */
1122 static int bgp_send_keepalive(struct bgp_peer *peer)
1123 {
1124 memset(peer->outbuf->packet.header.marker, 0xff,
1125 sizeof(peer->outbuf->packet.header.marker));
1126
1127 peer->outbuf->packet.header.type = BGP_MSG_KEEPALIVE;
1128 peer->outbuf->packet.header.len =
1129 htons(sizeof(peer->outbuf->packet.header));
1130
1131 peer->outbuf->done = 0;
1132 peer->next_state = (peer->state == OpenSent) ? OpenConfirm : peer->state;
1133
1134 return bgp_write(peer);
1135 }
1136
1137 /* send/buffer UPDATE message */
1138 static int bgp_send_update(struct bgp_peer *peer)
1139 {
1140 uint16_t unf_len = 0;
1141 uint16_t attr_len;
1142 uint16_t len = sizeof(peer->outbuf->packet.header);
1143 struct bgp_route_list *have = peer->routes;
1144 struct bgp_route_list *want = peer->routing ? bgp_routes : 0;
1145 struct bgp_route_list *e = 0;
1146 struct bgp_route_list *add = 0;
1147 int s;
1148
1149 char *data = (char *) &peer->outbuf->packet.data;
1150
1151 /* need leave room for attr_len, bgp_path_attrs and one prefix */
1152 char *max = (char *) &peer->outbuf->packet.data
1153 + sizeof(peer->outbuf->packet.data)
1154 - sizeof(attr_len) - peer->path_attr_len - sizeof(struct bgp_ip_prefix);
1155
1156 /* skip over unf_len */
1157 data += sizeof(unf_len);
1158 len += sizeof(unf_len);
1159
1160 memset(peer->outbuf->packet.header.marker, 0xff,
1161 sizeof(peer->outbuf->packet.header.marker));
1162
1163 peer->outbuf->packet.header.type = BGP_MSG_UPDATE;
1164
1165 peer->update_routes = 0; /* tentatively clear */
1166
1167 /* find differences */
1168 while ((have || want) && data < (max - sizeof(struct bgp_ip_prefix)))
1169 {
1170 if (have)
1171 s = want
1172 ? memcmp(&have->dest, &want->dest, sizeof(have->dest))
1173 : -1;
1174 else
1175 s = 1;
1176
1177 if (s < 0) /* found one to delete */
1178 {
1179 struct bgp_route_list *tmp = have;
1180 have = have->next;
1181
1182 s = BGP_IP_PREFIX_SIZE(tmp->dest);
1183 memcpy(data, &tmp->dest, s);
1184 data += s;
1185 unf_len += s;
1186 len += s;
1187
1188 LOG(5, 0, 0, "Withdrawing route %s/%d from BGP peer %s\n",
1189 fmtaddr(tmp->dest.prefix, 0), tmp->dest.len, peer->name);
1190
1191 free(tmp);
1192
1193 if (e)
1194 e->next = have;
1195 else
1196 peer->routes = have;
1197 }
1198 else
1199 {
1200 if (!s) /* same */
1201 {
1202 e = have; /* stash the last found to relink above */
1203 have = have->next;
1204 want = want->next;
1205 }
1206 else if (s > 0) /* addition reqd. */
1207 {
1208 if (add)
1209 {
1210 peer->update_routes = 1; /* only one add per packet */
1211 if (!have)
1212 break;
1213 }
1214 else
1215 add = want;
1216
1217 if (want)
1218 want = want->next;
1219 }
1220 }
1221 }
1222
1223 if (have || want)
1224 peer->update_routes = 1; /* more to do */
1225
1226 /* anything changed? */
1227 if (!(unf_len || add))
1228 return 1;
1229
1230 /* go back and insert unf_len */
1231 unf_len = htons(unf_len);
1232 memcpy(&peer->outbuf->packet.data, &unf_len, sizeof(unf_len));
1233
1234 if (add)
1235 {
1236 if (!(e = malloc(sizeof(*e))))
1237 {
1238 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
1239 fmtaddr(add->dest.prefix, 0), add->dest.len, strerror(errno));
1240
1241 return 0;
1242 }
1243
1244 memcpy(e, add, sizeof(*e));
1245 e->next = 0;
1246 peer->routes = bgp_insert_route(peer->routes, e);
1247
1248 attr_len = htons(peer->path_attr_len);
1249 memcpy(data, &attr_len, sizeof(attr_len));
1250 data += sizeof(attr_len);
1251 len += sizeof(attr_len);
1252
1253 memcpy(data, peer->path_attrs, peer->path_attr_len);
1254 data += peer->path_attr_len;
1255 len += peer->path_attr_len;
1256
1257 s = BGP_IP_PREFIX_SIZE(add->dest);
1258 memcpy(data, &add->dest, s);
1259 data += s;
1260 len += s;
1261
1262 LOG(5, 0, 0, "Advertising route %s/%d to BGP peer %s\n",
1263 fmtaddr(add->dest.prefix, 0), add->dest.len, peer->name);
1264 }
1265 else
1266 {
1267 attr_len = 0;
1268 memcpy(data, &attr_len, sizeof(attr_len));
1269 data += sizeof(attr_len);
1270 len += sizeof(attr_len);
1271 }
1272
1273 peer->outbuf->packet.header.len = htons(len);
1274 peer->outbuf->done = 0;
1275
1276 return bgp_write(peer);
1277 }
1278
1279 /* send/buffer NOTIFICATION message */
1280 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
1281 uint8_t subcode)
1282 {
1283 struct bgp_data_notification data;
1284 uint16_t len = 0;
1285
1286 data.error_code = code;
1287 len += sizeof(data.error_code);
1288
1289 data.error_subcode = subcode;
1290 len += sizeof(data.error_code);
1291
1292 memset(peer->outbuf->packet.header.marker, 0xff,
1293 sizeof(peer->outbuf->packet.header.marker));
1294
1295 peer->outbuf->packet.header.type = BGP_MSG_NOTIFICATION;
1296 peer->outbuf->packet.header.len =
1297 htons(sizeof(peer->outbuf->packet.header) + len);
1298
1299 memcpy(peer->outbuf->packet.data, &data, len);
1300
1301 peer->outbuf->done = 0;
1302 peer->next_state = code == BGP_ERR_CEASE ? Disabled : Idle;
1303
1304 /* we're dying; ignore any pending input */
1305 peer->inbuf->packet.header.len = 0;
1306 peer->inbuf->done = 0;
1307
1308 return bgp_write(peer);
1309 }