4b7f94912dbec40a22c6fcac73c57af21c2e0724
[l2tpns.git] / bgp.c
1 /*
2 * BGPv4
3 * Used to advertise routes for upstream (l2tp port, rather than gratiutious
4 * arp) and downstream--allowing routers to load-balance both.
5 *
6 * Implementation limitations:
7 * - We never listen for incoming connections (session always initiated by us).
8 * - Any routes advertised by the peer are accepted, but ignored.
9 * - No password support; neither RFC1771 (which no-one seems to do anyway)
10 * nor RFC2385 (which requires a kernel patch on 2.4 kernels).
11 */
12
13 char const *cvs_id_bgp = "$Id: bgp.c,v 1.12 2005/09/02 23:39:36 bodea Exp $";
14
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <string.h>
18 #include <time.h>
19 #include <errno.h>
20 #include <sys/socket.h>
21 #include <netinet/in.h>
22 #include <arpa/inet.h>
23 #include <netdb.h>
24 #include <fcntl.h>
25
26 #include "l2tpns.h"
27 #include "bgp.h"
28 #include "util.h"
29
30 static void bgp_clear(struct bgp_peer *peer);
31 static void bgp_set_retry(struct bgp_peer *peer);
32 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx);
33 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
34 struct bgp_route_list *new);
35
36 static void bgp_free_routes(struct bgp_route_list *routes);
37 static char const *bgp_msg_type_str(uint8_t type);
38 static int bgp_connect(struct bgp_peer *peer);
39 static int bgp_handle_connect(struct bgp_peer *peer);
40 static int bgp_write(struct bgp_peer *peer);
41 static int bgp_read(struct bgp_peer *peer);
42 static int bgp_handle_input(struct bgp_peer *peer);
43 static int bgp_send_open(struct bgp_peer *peer);
44 static int bgp_send_keepalive(struct bgp_peer *peer);
45 static int bgp_send_update(struct bgp_peer *peer);
46 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
47 uint8_t subcode);
48
49 static uint16_t our_as;
50 static struct bgp_route_list *bgp_routes = 0;
51
52 int bgp_configured = 0;
53 struct bgp_peer *bgp_peers = 0;
54
55 /* prepare peer structure, globals */
56 int bgp_setup(int as)
57 {
58 int i;
59 struct bgp_peer *peer;
60
61 for (i = 0; i < BGP_NUM_PEERS; i++)
62 {
63 peer = &bgp_peers[i];
64 memset(peer, 0, sizeof(*peer));
65
66 peer->addr = INADDR_NONE;
67 peer->sock = -1;
68 peer->state = peer->next_state = Disabled;
69
70 if (!((peer->outbuf = malloc(sizeof(*peer->outbuf)))
71 && (peer->inbuf = malloc(sizeof(*peer->inbuf)))))
72 {
73 LOG(0, 0, 0, "Can't allocate buffers for bgp peer (%s)\n",
74 strerror(errno));
75
76 return 0;
77 }
78
79 peer->edata.type = FD_TYPE_BGP;
80 peer->edata.index = i;
81 peer->events = 0;
82 }
83
84 if (as < 1)
85 as = 0;
86
87 if ((our_as = as))
88 return 0;
89
90 bgp_routes = 0;
91 bgp_configured = 0; /* set by bgp_start */
92
93 return 1;
94 }
95
96 /* start connection with a peer */
97 int bgp_start(struct bgp_peer *peer, char *name, int as, int keepalive,
98 int hold, struct in_addr update_source, int enable)
99 {
100 struct hostent *h;
101 int ibgp;
102 int i;
103 struct bgp_path_attr a;
104 char path_attrs[64];
105 char *p = path_attrs;
106 in_addr_t ip;
107 uint32_t metric = htonl(BGP_METRIC);
108 uint32_t no_export = htonl(BGP_COMMUNITY_NO_EXPORT);
109
110 if (!our_as)
111 return 0;
112
113 if (peer->state != Disabled)
114 bgp_halt(peer);
115
116 snprintf(peer->name, sizeof(peer->name), "%s", name);
117
118 if (!(h = gethostbyname(name)) || h->h_addrtype != AF_INET)
119 {
120 LOG(0, 0, 0, "Can't get address for BGP peer %s (%s)\n",
121 name, h ? "no address" : hstrerror(h_errno));
122
123 return 0;
124 }
125
126 memcpy(&peer->addr, h->h_addr, sizeof(peer->addr));
127 peer->source_addr = update_source.s_addr;
128 peer->as = as > 0 ? as : our_as;
129 ibgp = peer->as == our_as;
130
131 /* set initial timer values */
132 peer->init_keepalive = keepalive == -1 ? BGP_KEEPALIVE_TIME : keepalive;
133 peer->init_hold = hold == -1 ? BGP_HOLD_TIME : hold;
134
135 if (peer->init_hold < 3)
136 peer->init_hold = 3;
137
138 if (peer->init_keepalive * 3 > peer->init_hold)
139 peer->init_keepalive = peer->init_hold / 3;
140
141 /* clear buffers, go to Idle state */
142 peer->next_state = Idle;
143 bgp_clear(peer);
144
145 /* set initial routing state */
146 peer->routing = enable;
147
148 /* all our routes use the same attributes, so prepare it in advance */
149 if (peer->path_attrs)
150 free(peer->path_attrs);
151
152 peer->path_attr_len = 0;
153
154 /* ORIGIN */
155 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
156 a.code = BGP_PATH_ATTR_CODE_ORIGIN;
157 a.data.s.len = 1;
158 a.data.s.value[0] = BGP_PATH_ATTR_CODE_ORIGIN_IGP;
159
160 #define ADD_ATTRIBUTE() do { \
161 i = BGP_PATH_ATTR_SIZE(a); \
162 memcpy(p, &a, i); \
163 p += i; \
164 peer->path_attr_len += i; } while (0)
165
166 ADD_ATTRIBUTE();
167
168 /* AS_PATH */
169 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
170 a.code = BGP_PATH_ATTR_CODE_AS_PATH;
171 if (ibgp)
172 {
173 /* empty path */
174 a.data.s.len = 0;
175 }
176 else
177 {
178 /* just our AS */
179 struct {
180 uint8_t type;
181 uint8_t len;
182 uint16_t value;
183 } as_path = {
184 BGP_PATH_ATTR_CODE_AS_PATH_AS_SEQUENCE,
185 1,
186 htons(our_as),
187 };
188
189 a.data.s.len = sizeof(as_path);
190 memcpy(&a.data.s.value, &as_path, sizeof(as_path));
191 }
192
193 ADD_ATTRIBUTE();
194
195 /* NEXT_HOP */
196 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
197 a.code = BGP_PATH_ATTR_CODE_NEXT_HOP;
198 ip = my_address; /* we're it */
199 a.data.s.len = sizeof(ip);
200 memcpy(a.data.s.value, &ip, sizeof(ip));
201
202 ADD_ATTRIBUTE();
203
204 /* MULTI_EXIT_DISC */
205 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL;
206 a.code = BGP_PATH_ATTR_CODE_MULTI_EXIT_DISC;
207 a.data.s.len = sizeof(metric);
208 memcpy(a.data.s.value, &metric, sizeof(metric));
209
210 ADD_ATTRIBUTE();
211
212 if (ibgp)
213 {
214 uint32_t local_pref = htonl(BGP_LOCAL_PREF);
215
216 /* LOCAL_PREF */
217 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
218 a.code = BGP_PATH_ATTR_CODE_LOCAL_PREF;
219 a.data.s.len = sizeof(local_pref);
220 memcpy(a.data.s.value, &local_pref, sizeof(local_pref));
221
222 ADD_ATTRIBUTE();
223 }
224
225 /* COMMUNITIES */
226 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL | BGP_PATH_ATTR_FLAG_TRANS;
227 a.code = BGP_PATH_ATTR_CODE_COMMUNITIES;
228 a.data.s.len = sizeof(no_export);
229 memcpy(a.data.s.value, &no_export, sizeof(no_export));
230
231 ADD_ATTRIBUTE();
232
233 if (!(peer->path_attrs = malloc(peer->path_attr_len)))
234 {
235 LOG(0, 0, 0, "Can't allocate path_attrs for %s (%s)\n",
236 name, strerror(errno));
237
238 return 0;
239 }
240
241 memcpy(peer->path_attrs, path_attrs, peer->path_attr_len);
242
243 LOG(4, 0, 0, "Initiating BGP connection to %s (routing %s)\n",
244 name, enable ? "enabled" : "suspended");
245
246 /* we have at least one peer configured */
247 bgp_configured = 1;
248
249 /* connect */
250 return bgp_connect(peer);
251 }
252
253 /* clear counters, timers, routes and buffers; close socket; move to
254 next_state, which may be Disabled or Idle */
255 static void bgp_clear(struct bgp_peer *peer)
256 {
257 if (peer->sock != -1)
258 {
259 close(peer->sock);
260 peer->sock = -1;
261 }
262
263 peer->keepalive_time = 0;
264 peer->expire_time = 0;
265
266 peer->keepalive = peer->init_keepalive;
267 peer->hold = peer->init_hold;
268
269 bgp_free_routes(peer->routes);
270 peer->routes = 0;
271
272 peer->outbuf->packet.header.len = 0;
273 peer->outbuf->done = 0;
274 peer->inbuf->packet.header.len = 0;
275 peer->inbuf->done = 0;
276
277 peer->cli_flag = 0;
278 peer->events = 0;
279
280 if (peer->state != peer->next_state)
281 {
282 peer->state = peer->next_state;
283 peer->state_time = time_now;
284
285 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
286 bgp_state_str(peer->next_state));
287 }
288 }
289
290 /* initiate a clean shutdown */
291 void bgp_stop(struct bgp_peer *peer)
292 {
293 LOG(4, 0, 0, "Terminating BGP connection to %s\n", peer->name);
294 bgp_send_notification(peer, BGP_ERR_CEASE, 0);
295 }
296
297 /* drop connection (if any) and set state to Disabled */
298 void bgp_halt(struct bgp_peer *peer)
299 {
300 LOG(4, 0, 0, "Aborting BGP connection to %s\n", peer->name);
301 peer->next_state = Disabled;
302 bgp_clear(peer);
303 }
304
305 /* drop connection (if any) and set to Idle for connection retry */
306 int bgp_restart(struct bgp_peer *peer)
307 {
308 peer->next_state = Idle;
309 bgp_clear(peer);
310
311 /* restart now */
312 peer->retry_time = time_now;
313 peer->retry_count = 0;
314
315 /* connect */
316 return bgp_connect(peer);
317 }
318
319 static void bgp_set_retry(struct bgp_peer *peer)
320 {
321 if (peer->retry_count++ < BGP_MAX_RETRY)
322 {
323 peer->retry_time = time_now + (BGP_RETRY_BACKOFF * peer->retry_count);
324 peer->next_state = Idle;
325 bgp_clear(peer);
326 }
327 else
328 bgp_halt(peer); /* give up */
329 }
330
331 /* convert ip/mask to CIDR notation */
332 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx)
333 {
334 int i;
335 uint32_t b;
336
337 /* convert to prefix notation */
338 pfx->len = 32;
339 pfx->prefix = ip;
340
341 if (!mask) /* bogus */
342 mask = 0xffffffff;
343
344 for (i = 0; i < 32 && ((b = ntohl(1 << i)), !(mask & b)); i++)
345 {
346 pfx->len--;
347 pfx->prefix &= ~b;
348 }
349 }
350
351 /* insert route into list; sorted */
352 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
353 struct bgp_route_list *new)
354 {
355 struct bgp_route_list *p = head;
356 struct bgp_route_list *e = 0;
357
358 while (p && memcmp(&p->dest, &new->dest, sizeof(p->dest)) < 0)
359 {
360 e = p;
361 p = p->next;
362 }
363
364 if (e)
365 {
366 new->next = e->next;
367 e->next = new;
368 }
369 else
370 {
371 new->next = head;
372 head = new;
373 }
374
375 return head;
376 }
377
378 /* add route to list for peers */
379 /*
380 * Note: this doesn't do route aggregation, nor drop routes if a less
381 * specific match already exists (partly because I'm lazy, but also so
382 * that if that route is later deleted we don't have to be concerned
383 * about adding back the more specific one).
384 */
385 int bgp_add_route(in_addr_t ip, in_addr_t mask)
386 {
387 struct bgp_route_list *r = bgp_routes;
388 struct bgp_route_list add;
389 int i;
390
391 bgp_cidr(ip, mask, &add.dest);
392 add.next = 0;
393
394 /* check for duplicate */
395 while (r)
396 {
397 i = memcmp(&r->dest, &add.dest, sizeof(r->dest));
398 if (!i)
399 return 1; /* already covered */
400
401 if (i > 0)
402 break;
403
404 r = r->next;
405 }
406
407 /* insert into route list; sorted */
408 if (!(r = malloc(sizeof(*r))))
409 {
410 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
411 fmtaddr(add.dest.prefix, 0), add.dest.len, strerror(errno));
412
413 return 0;
414 }
415
416 memcpy(r, &add, sizeof(*r));
417 bgp_routes = bgp_insert_route(bgp_routes, r);
418
419 /* flag established peers for update */
420 for (i = 0; i < BGP_NUM_PEERS; i++)
421 if (bgp_peers[i].state == Established)
422 bgp_peers[i].update_routes = 1;
423
424 LOG(4, 0, 0, "Registered BGP route %s/%d\n",
425 fmtaddr(add.dest.prefix, 0), add.dest.len);
426
427 return 1;
428 }
429
430 /* remove route from list for peers */
431 int bgp_del_route(in_addr_t ip, in_addr_t mask)
432 {
433 struct bgp_route_list *r = bgp_routes;
434 struct bgp_route_list *e = 0;
435 struct bgp_route_list del;
436 int i;
437
438 bgp_cidr(ip, mask, &del.dest);
439 del.next = 0;
440
441 /* find entry in routes list and remove */
442 while (r)
443 {
444 i = memcmp(&r->dest, &del.dest, sizeof(r->dest));
445 if (!i)
446 {
447 if (e)
448 e->next = r->next;
449 else
450 bgp_routes = r->next;
451
452 free(r);
453 break;
454 }
455
456 e = r;
457
458 if (i > 0)
459 r = 0; /* stop */
460 else
461 r = r->next;
462 }
463
464 /* not found */
465 if (!r)
466 return 1;
467
468 /* flag established peers for update */
469 for (i = 0; i < BGP_NUM_PEERS; i++)
470 if (bgp_peers[i].state == Established)
471 bgp_peers[i].update_routes = 1;
472
473 LOG(4, 0, 0, "Removed BGP route %s/%d\n",
474 fmtaddr(del.dest.prefix, 0), del.dest.len);
475
476 return 1;
477 }
478
479 /* enable or disable routing */
480 void bgp_enable_routing(int enable)
481 {
482 int i;
483
484 for (i = 0; i < BGP_NUM_PEERS; i++)
485 {
486 bgp_peers[i].routing = enable;
487
488 /* flag established peers for update */
489 if (bgp_peers[i].state == Established)
490 bgp_peers[i].update_routes = 1;
491 }
492
493 LOG(4, 0, 0, "%s BGP routing\n", enable ? "Enabled" : "Suspended");
494 }
495
496 #ifdef HAVE_EPOLL
497 # include <sys/epoll.h>
498 #else
499 # include "fake_epoll.h"
500 #endif
501
502 /* return a bitmask of the events required to poll this peer's fd */
503 int bgp_set_poll()
504 {
505 int i;
506
507 if (!bgp_configured)
508 return 0;
509
510 for (i = 0; i < BGP_NUM_PEERS; i++)
511 {
512 struct bgp_peer *peer = &bgp_peers[i];
513 int events = 0;
514
515 if (peer->state == Disabled || peer->state == Idle)
516 continue;
517
518 if (peer->inbuf->done < BGP_MAX_PACKET_SIZE)
519 events |= EPOLLIN;
520
521 if (peer->state == Connect || /* connection in progress */
522 peer->update_routes || /* routing updates */
523 peer->outbuf->packet.header.len) /* pending output */
524 events |= EPOLLOUT;
525
526 if (peer->events != events)
527 {
528 struct epoll_event ev;
529
530 ev.events = peer->events = events;
531 ev.data.ptr = &peer->edata;
532 epoll_ctl(epollfd, EPOLL_CTL_MOD, peer->sock, &ev);
533 }
534 }
535
536 return 1;
537 }
538
539 /* process bgp events/timers */
540 int bgp_process(uint32_t events[])
541 {
542 int i;
543
544 if (!bgp_configured)
545 return 0;
546
547 for (i = 0; i < BGP_NUM_PEERS; i++)
548 {
549 struct bgp_peer *peer = &bgp_peers[i];
550
551 if (*peer->name && peer->cli_flag == BGP_CLI_RESTART)
552 {
553 bgp_restart(peer);
554 continue;
555 }
556
557 if (peer->state == Disabled)
558 continue;
559
560 if (peer->cli_flag)
561 {
562 switch (peer->cli_flag)
563 {
564 case BGP_CLI_SUSPEND:
565 if (peer->routing)
566 {
567 peer->routing = 0;
568 if (peer->state == Established)
569 peer->update_routes = 1;
570 }
571
572 break;
573
574 case BGP_CLI_ENABLE:
575 if (!peer->routing)
576 {
577 peer->routing = 1;
578 if (peer->state == Established)
579 peer->update_routes = 1;
580 }
581
582 break;
583 }
584
585 peer->cli_flag = 0;
586 }
587
588 /* handle empty/fill of buffers */
589 if (events[i] & EPOLLOUT)
590 {
591 int r = 1;
592 if (peer->state == Connect)
593 r = bgp_handle_connect(peer);
594 else if (peer->outbuf->packet.header.len)
595 r = bgp_write(peer);
596
597 if (!r)
598 continue;
599 }
600
601 if (events[i] & (EPOLLIN|EPOLLHUP))
602 {
603 if (!bgp_read(peer))
604 continue;
605 }
606
607 /* process input buffer contents */
608 while (peer->inbuf->done >= sizeof(peer->inbuf->packet.header)
609 && !peer->outbuf->packet.header.len) /* may need to queue a response */
610 {
611 if (bgp_handle_input(peer) < 0)
612 continue;
613 }
614
615 /* process pending updates */
616 if (peer->update_routes
617 && !peer->outbuf->packet.header.len) /* ditto */
618 {
619 if (!bgp_send_update(peer))
620 continue;
621 }
622
623 /* process timers */
624 if (peer->state == Established)
625 {
626 if (time_now > peer->expire_time)
627 {
628 LOG(1, 0, 0, "No message from BGP peer %s in %ds\n",
629 peer->name, peer->hold);
630
631 bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0);
632 continue;
633 }
634
635 if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len)
636 bgp_send_keepalive(peer);
637 }
638 else if (peer->state == Idle)
639 {
640 if (time_now > peer->retry_time)
641 bgp_connect(peer);
642 }
643 else if (time_now > peer->state_time + BGP_STATE_TIME)
644 {
645 LOG(1, 0, 0, "%s timer expired for BGP peer %s\n",
646 bgp_state_str(peer->state), peer->name);
647
648 bgp_restart(peer);
649 }
650 }
651
652 return 1;
653 }
654
655 static void bgp_free_routes(struct bgp_route_list *routes)
656 {
657 struct bgp_route_list *tmp;
658
659 while ((tmp = routes))
660 {
661 routes = tmp->next;
662 free(tmp);
663 }
664 }
665
666 char const *bgp_state_str(enum bgp_state state)
667 {
668 switch (state)
669 {
670 case Disabled: return "Disabled";
671 case Idle: return "Idle";
672 case Connect: return "Connect";
673 case Active: return "Active";
674 case OpenSent: return "OpenSent";
675 case OpenConfirm: return "OpenConfirm";
676 case Established: return "Established";
677 }
678
679 return "?";
680 }
681
682 static char const *bgp_msg_type_str(uint8_t type)
683 {
684 switch (type)
685 {
686 case BGP_MSG_OPEN: return "OPEN";
687 case BGP_MSG_UPDATE: return "UPDATE";
688 case BGP_MSG_NOTIFICATION: return "NOTIFICATION";
689 case BGP_MSG_KEEPALIVE: return "KEEPALIVE";
690 }
691
692 return "?";
693 }
694
695 /* attempt to connect to peer */
696 static int bgp_connect(struct bgp_peer *peer)
697 {
698 static int bgp_port = 0;
699 struct sockaddr_in addr;
700 struct sockaddr_in source_addr;
701 struct epoll_event ev;
702
703 if (!bgp_port)
704 {
705 struct servent *serv;
706 if (!(serv = getservbyname("bgp", "tcp")))
707 {
708 LOG(0, 0, 0, "Can't get bgp service (%s)\n", strerror(errno));
709 return 0;
710 }
711
712 bgp_port = serv->s_port;
713 }
714
715 if ((peer->sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
716 {
717 LOG(0, 0, 0, "Can't create a socket for BGP peer %s (%s)\n",
718 peer->name, strerror(errno));
719
720 peer->state = peer->next_state = Disabled;
721 return 0;
722 }
723
724 /* add to poll set */
725 ev.events = peer->events = EPOLLOUT;
726 ev.data.ptr = &peer->edata;
727 epoll_ctl(epollfd, EPOLL_CTL_ADD, peer->sock, &ev);
728
729 /* set to non-blocking */
730 fcntl(peer->sock, F_SETFL, fcntl(peer->sock, F_GETFL, 0) | O_NONBLOCK);
731
732 /* set source address */
733 memset(&source_addr, 0, sizeof(source_addr));
734 source_addr.sin_family = AF_INET;
735 source_addr.sin_addr.s_addr = peer->source_addr; /* defaults to INADDR_ANY */
736 if (bind(peer->sock, (struct sockaddr *) &source_addr, sizeof(source_addr)) < 0)
737 {
738 LOG(1, 0, 0, "Can't set source address to %s: %s\n",
739 inet_ntoa(source_addr.sin_addr), strerror(errno));
740
741 bgp_set_retry(peer);
742 return 0;
743 }
744
745 /* try connect */
746 memset(&addr, 0, sizeof(addr));
747 addr.sin_family = AF_INET;
748 addr.sin_port = bgp_port;
749 addr.sin_addr.s_addr = peer->addr;
750
751 while (connect(peer->sock, (struct sockaddr *) &addr, sizeof(addr)) == -1)
752 {
753 if (errno == EINTR) /* SIGALARM handler */
754 continue;
755
756 if (errno != EINPROGRESS)
757 {
758 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n",
759 inet_ntoa(addr.sin_addr), strerror(errno));
760
761 bgp_set_retry(peer);
762 return 0;
763 }
764
765 peer->state = Connect;
766 peer->state_time = time_now;
767
768 LOG(4, 0, 0, "BGP peer %s: state Connect\n", peer->name);
769 return 1;
770 }
771
772 peer->state = Active;
773 peer->state_time = time_now;
774 peer->retry_time = peer->retry_count = 0;
775
776 LOG(4, 0, 0, "BGP peer %s: state Active\n", inet_ntoa(addr.sin_addr));
777
778 return bgp_send_open(peer);
779 }
780
781 /* complete partial connection (state = Connect) */
782 static int bgp_handle_connect(struct bgp_peer *peer)
783 {
784 int err = 0;
785 socklen_t len = sizeof(int);
786 getsockopt(peer->sock, SOL_SOCKET, SO_ERROR, &err, &len);
787 if (err)
788 {
789 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n", peer->name,
790 strerror(err));
791
792 bgp_set_retry(peer);
793 return 0;
794 }
795
796 peer->state = Active;
797 peer->state_time = time_now;
798
799 LOG(4, 0, 0, "BGP peer %s: state Active\n", peer->name);
800
801 return bgp_send_open(peer);
802 }
803
804 /* initiate a write */
805 static int bgp_write(struct bgp_peer *peer)
806 {
807 int len = htons(peer->outbuf->packet.header.len);
808 int r;
809
810 while ((r = write(peer->sock, &peer->outbuf->packet + peer->outbuf->done,
811 len - peer->outbuf->done)) == -1)
812 {
813 if (errno == EINTR)
814 continue;
815
816 if (errno == EAGAIN)
817 return 1;
818
819 if (errno == EPIPE)
820 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
821 else
822 LOG(1, 0, 0, "Can't write to BGP peer %s (%s)\n", peer->name,
823 strerror(errno));
824
825 bgp_set_retry(peer);
826 return 0;
827 }
828
829 if (r < len)
830 {
831 peer->outbuf->done += r;
832 return 1;
833 }
834
835 LOG(4, 0, 0, "Sent %s to BGP peer %s\n",
836 bgp_msg_type_str(peer->outbuf->packet.header.type), peer->name);
837
838 peer->outbuf->packet.header.len = 0;
839 peer->outbuf->done = 0;
840
841 if (peer->state == Established)
842 peer->keepalive_time = time_now + peer->keepalive;
843
844 if (peer->state != peer->next_state)
845 {
846 if (peer->next_state == Disabled || peer->next_state == Idle)
847 {
848 bgp_clear(peer);
849 return 0;
850 }
851
852 peer->state = peer->next_state;
853 peer->state_time = time_now;
854
855 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
856 bgp_state_str(peer->state));
857 }
858
859 return 1;
860 }
861
862 /* initiate a read */
863 static int bgp_read(struct bgp_peer *peer)
864 {
865 int r;
866
867 while ((r = read(peer->sock, &peer->inbuf->packet + peer->inbuf->done,
868 BGP_MAX_PACKET_SIZE - peer->inbuf->done)) < 1)
869 {
870 if (!r)
871 {
872 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
873 }
874 else
875 {
876 if (errno == EINTR)
877 continue;
878
879 if (errno == EAGAIN)
880 return 1;
881
882 LOG(1, 0, 0, "Can't read from BGP peer %s (%s)\n", peer->name,
883 strerror(errno));
884 }
885
886 bgp_set_retry(peer);
887 return 0;
888 }
889
890 peer->inbuf->done += r;
891 return 1;
892 }
893
894 /* process buffered packets */
895 static int bgp_handle_input(struct bgp_peer *peer)
896 {
897 struct bgp_packet *p = &peer->inbuf->packet;
898 int len = ntohs(p->header.len);
899
900 if (len > BGP_MAX_PACKET_SIZE)
901 {
902 LOG(1, 0, 0, "Bad header length from BGP %s\n", peer->name);
903 bgp_send_notification(peer, BGP_ERR_HEADER, BGP_ERR_HDR_BAD_LEN);
904 return 0;
905 }
906
907 if (peer->inbuf->done < len)
908 return 0;
909
910 LOG(4, 0, 0, "Received %s from BGP peer %s\n",
911 bgp_msg_type_str(p->header.type), peer->name);
912
913 switch (p->header.type)
914 {
915 case BGP_MSG_OPEN:
916 {
917 struct bgp_data_open data;
918 int hold;
919 int i;
920
921 for (i = 0; i < sizeof(p->header.marker); i++)
922 {
923 if ((unsigned char) p->header.marker[i] != 0xff)
924 {
925 LOG(1, 0, 0, "Invalid marker from BGP peer %s\n",
926 peer->name);
927
928 bgp_send_notification(peer, BGP_ERR_HEADER,
929 BGP_ERR_HDR_NOT_SYNC);
930
931 return 0;
932 }
933 }
934
935 if (peer->state != OpenSent)
936 {
937 LOG(1, 0, 0, "OPEN from BGP peer %s in %s state\n",
938 peer->name, bgp_state_str(peer->state));
939
940 bgp_send_notification(peer, BGP_ERR_FSM, 0);
941 return 0;
942 }
943
944 memcpy(&data, p->data, len - sizeof(p->header));
945
946 if (data.version != BGP_VERSION)
947 {
948 LOG(1, 0, 0, "Bad version (%d) sent by BGP peer %s\n",
949 (int) data.version, peer->name);
950
951 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_VERSION);
952 return 0;
953 }
954
955 if (ntohs(data.as) != peer->as)
956 {
957 LOG(1, 0, 0, "Bad AS sent by BGP peer %s (got %d, "
958 "expected %d)\n", peer->name, (int) htons(data.as),
959 (int) peer->as);
960
961 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_BAD_AS);
962 return 0;
963 }
964
965 if ((hold = ntohs(data.hold_time)) < 3)
966 {
967 LOG(1, 0, 0, "Bad hold time (%d) from BGP peer %s\n",
968 hold, peer->name);
969
970 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_HOLD_TIME);
971 return 0;
972 }
973
974 /* pick lowest hold time */
975 if (hold < peer->hold)
976 peer->hold = hold;
977
978 /* adjust our keepalive based on negotiated hold value */
979 if (peer->keepalive * 3 > peer->hold)
980 peer->keepalive = peer->hold / 3;
981
982 /* next transition requires an exchange of keepalives */
983 bgp_send_keepalive(peer);
984
985 /* FIXME: may need to check for optional params */
986 }
987
988 break;
989
990 case BGP_MSG_KEEPALIVE:
991 if (peer->state == OpenConfirm)
992 {
993 peer->state = peer->next_state = Established;
994 peer->state_time = time_now;
995 peer->keepalive_time = time_now + peer->keepalive;
996 peer->update_routes = 1;
997 peer->retry_count = 0;
998 peer->retry_time = 0;
999
1000 LOG(4, 0, 0, "BGP peer %s: state Established\n", peer->name);
1001 }
1002
1003 break;
1004
1005 case BGP_MSG_NOTIFICATION:
1006 if (len > sizeof(p->header))
1007 {
1008 struct bgp_data_notification *notification =
1009 (struct bgp_data_notification *) p->data;
1010
1011 if (notification->error_code == BGP_ERR_CEASE)
1012 {
1013 LOG(4, 0, 0, "BGP peer %s sent CEASE\n", peer->name);
1014 bgp_restart(peer);
1015 return 0;
1016 }
1017
1018 /* FIXME: should handle more notifications */
1019 LOG(4, 0, 0, "BGP peer %s sent unhandled NOTIFICATION %d\n",
1020 peer->name, (int) notification->error_code);
1021 }
1022
1023 break;
1024 }
1025
1026 /* reset timer */
1027 peer->expire_time = time_now + peer->hold;
1028
1029 /* see if there's another message in the same packet/buffer */
1030 if (peer->inbuf->done > len)
1031 {
1032 peer->inbuf->done -= len;
1033 memmove(p, (char *) p + len, peer->inbuf->done);
1034 }
1035 else
1036 {
1037 peer->inbuf->packet.header.len = 0;
1038 peer->inbuf->done = 0;
1039 }
1040
1041 return peer->inbuf->done;
1042 }
1043
1044 /* send/buffer OPEN message */
1045 static int bgp_send_open(struct bgp_peer *peer)
1046 {
1047 struct bgp_data_open data;
1048 uint16_t len = sizeof(peer->outbuf->packet.header);
1049
1050 memset(peer->outbuf->packet.header.marker, 0xff,
1051 sizeof(peer->outbuf->packet.header.marker));
1052
1053 peer->outbuf->packet.header.type = BGP_MSG_OPEN;
1054
1055 data.version = BGP_VERSION;
1056 data.as = htons(our_as);
1057 data.hold_time = htons(peer->hold);
1058 data.identifier = my_address;
1059 data.opt_len = 0;
1060
1061 memcpy(peer->outbuf->packet.data, &data, BGP_DATA_OPEN_SIZE);
1062 len += BGP_DATA_OPEN_SIZE;
1063
1064 peer->outbuf->packet.header.len = htons(len);
1065 peer->outbuf->done = 0;
1066 peer->next_state = OpenSent;
1067
1068 return bgp_write(peer);
1069 }
1070
1071 /* send/buffer KEEPALIVE message */
1072 static int bgp_send_keepalive(struct bgp_peer *peer)
1073 {
1074 memset(peer->outbuf->packet.header.marker, 0xff,
1075 sizeof(peer->outbuf->packet.header.marker));
1076
1077 peer->outbuf->packet.header.type = BGP_MSG_KEEPALIVE;
1078 peer->outbuf->packet.header.len =
1079 htons(sizeof(peer->outbuf->packet.header));
1080
1081 peer->outbuf->done = 0;
1082 peer->next_state = (peer->state == OpenSent) ? OpenConfirm : peer->state;
1083
1084 return bgp_write(peer);
1085 }
1086
1087 /* send/buffer UPDATE message */
1088 static int bgp_send_update(struct bgp_peer *peer)
1089 {
1090 uint16_t unf_len = 0;
1091 uint16_t attr_len;
1092 uint16_t len = sizeof(peer->outbuf->packet.header);
1093 struct bgp_route_list *have = peer->routes;
1094 struct bgp_route_list *want = peer->routing ? bgp_routes : 0;
1095 struct bgp_route_list *e = 0;
1096 struct bgp_route_list *add = 0;
1097 int s;
1098
1099 char *data = (char *) &peer->outbuf->packet.data;
1100
1101 /* need leave room for attr_len, bgp_path_attrs and one prefix */
1102 char *max = (char *) &peer->outbuf->packet.data
1103 + sizeof(peer->outbuf->packet.data)
1104 - sizeof(attr_len) - peer->path_attr_len - sizeof(struct bgp_ip_prefix);
1105
1106 /* skip over unf_len */
1107 data += sizeof(unf_len);
1108 len += sizeof(unf_len);
1109
1110 memset(peer->outbuf->packet.header.marker, 0xff,
1111 sizeof(peer->outbuf->packet.header.marker));
1112
1113 peer->outbuf->packet.header.type = BGP_MSG_UPDATE;
1114
1115 peer->update_routes = 0; /* tentatively clear */
1116
1117 /* find differences */
1118 while ((have || want) && data < (max - sizeof(struct bgp_ip_prefix)))
1119 {
1120 if (have)
1121 s = want
1122 ? memcmp(&have->dest, &want->dest, sizeof(have->dest))
1123 : -1;
1124 else
1125 s = 1;
1126
1127 if (s < 0) /* found one to delete */
1128 {
1129 struct bgp_route_list *tmp = have;
1130 have = have->next;
1131
1132 s = BGP_IP_PREFIX_SIZE(tmp->dest);
1133 memcpy(data, &tmp->dest, s);
1134 data += s;
1135 unf_len += s;
1136 len += s;
1137
1138 LOG(5, 0, 0, "Withdrawing route %s/%d from BGP peer %s\n",
1139 fmtaddr(tmp->dest.prefix, 0), tmp->dest.len, peer->name);
1140
1141 free(tmp);
1142
1143 if (e)
1144 e->next = have;
1145 else
1146 peer->routes = have;
1147 }
1148 else
1149 {
1150 if (!s) /* same */
1151 {
1152 e = have; /* stash the last found to relink above */
1153 have = have->next;
1154 want = want->next;
1155 }
1156 else if (s > 0) /* addition reqd. */
1157 {
1158 if (add)
1159 {
1160 peer->update_routes = 1; /* only one add per packet */
1161 if (!have)
1162 break;
1163 }
1164 else
1165 add = want;
1166
1167 if (want)
1168 want = want->next;
1169 }
1170 }
1171 }
1172
1173 if (have || want)
1174 peer->update_routes = 1; /* more to do */
1175
1176 /* anything changed? */
1177 if (!(unf_len || add))
1178 return 1;
1179
1180 /* go back and insert unf_len */
1181 unf_len = htons(unf_len);
1182 memcpy(&peer->outbuf->packet.data, &unf_len, sizeof(unf_len));
1183
1184 if (add)
1185 {
1186 if (!(e = malloc(sizeof(*e))))
1187 {
1188 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
1189 fmtaddr(add->dest.prefix, 0), add->dest.len, strerror(errno));
1190
1191 return 0;
1192 }
1193
1194 memcpy(e, add, sizeof(*e));
1195 e->next = 0;
1196 peer->routes = bgp_insert_route(peer->routes, e);
1197
1198 attr_len = htons(peer->path_attr_len);
1199 memcpy(data, &attr_len, sizeof(attr_len));
1200 data += sizeof(attr_len);
1201 len += sizeof(attr_len);
1202
1203 memcpy(data, peer->path_attrs, peer->path_attr_len);
1204 data += peer->path_attr_len;
1205 len += peer->path_attr_len;
1206
1207 s = BGP_IP_PREFIX_SIZE(add->dest);
1208 memcpy(data, &add->dest, s);
1209 data += s;
1210 len += s;
1211
1212 LOG(5, 0, 0, "Advertising route %s/%d to BGP peer %s\n",
1213 fmtaddr(add->dest.prefix, 0), add->dest.len, peer->name);
1214 }
1215 else
1216 {
1217 attr_len = 0;
1218 memcpy(data, &attr_len, sizeof(attr_len));
1219 data += sizeof(attr_len);
1220 len += sizeof(attr_len);
1221 }
1222
1223 peer->outbuf->packet.header.len = htons(len);
1224 peer->outbuf->done = 0;
1225
1226 return bgp_write(peer);
1227 }
1228
1229 /* send/buffer NOTIFICATION message */
1230 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
1231 uint8_t subcode)
1232 {
1233 struct bgp_data_notification data;
1234 uint16_t len = 0;
1235
1236 data.error_code = code;
1237 len += sizeof(data.error_code);
1238
1239 data.error_subcode = subcode;
1240 len += sizeof(data.error_code);
1241
1242 memset(peer->outbuf->packet.header.marker, 0xff,
1243 sizeof(peer->outbuf->packet.header.marker));
1244
1245 peer->outbuf->packet.header.type = BGP_MSG_NOTIFICATION;
1246 peer->outbuf->packet.header.len =
1247 htons(sizeof(peer->outbuf->packet.header) + len);
1248
1249 memcpy(peer->outbuf->packet.data, &data, len);
1250
1251 peer->outbuf->done = 0;
1252 peer->next_state = code == BGP_ERR_CEASE ? Disabled : Idle;
1253
1254 /* we're dying; ignore any pending input */
1255 peer->inbuf->packet.header.len = 0;
1256 peer->inbuf->done = 0;
1257
1258 return bgp_write(peer);
1259 }