Forgot to remove a prototype.
[l2tpns.git] / bgp.c
1 /*
2 * BGPv4
3 * Used to advertise routes for upstream (l2tp port, rather than gratiutious
4 * arp) and downstream--allowing routers to load-balance both.
5 *
6 * Implementation limitations:
7 * - We never listen for incoming connections (session always initiated by us).
8 * - Any routes advertised by the peer are accepted, but ignored.
9 * - No password support; neither RFC1771 (which no-one seems to do anyway)
10 * nor RFC2385 (which requires a kernel patch on 2.4 kernels).
11 */
12
13 char const *cvs_id_bgp = "$Id: bgp.c,v 1.12 2005/09/02 23:39:36 bodea Exp $";
14
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <string.h>
18 #include <time.h>
19 #include <errno.h>
20 #include <sys/socket.h>
21 #include <netinet/in.h>
22 #include <arpa/inet.h>
23 #include <netdb.h>
24 #include <fcntl.h>
25
26 #include "l2tpns.h"
27 #include "bgp.h"
28 #include "util.h"
29
30 static void bgp_clear(struct bgp_peer *peer);
31 static void bgp_set_retry(struct bgp_peer *peer);
32 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
33 struct bgp_route_list *new);
34
35 static void bgp_free_routes(struct bgp_route_list *routes);
36 static char const *bgp_msg_type_str(uint8_t type);
37 static int bgp_connect(struct bgp_peer *peer);
38 static int bgp_handle_connect(struct bgp_peer *peer);
39 static int bgp_write(struct bgp_peer *peer);
40 static int bgp_read(struct bgp_peer *peer);
41 static int bgp_handle_input(struct bgp_peer *peer);
42 static int bgp_send_open(struct bgp_peer *peer);
43 static int bgp_send_keepalive(struct bgp_peer *peer);
44 static int bgp_send_update(struct bgp_peer *peer);
45 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
46 uint8_t subcode);
47
48 static uint16_t our_as;
49 static struct bgp_route_list *bgp_routes = 0;
50
51 int bgp_configured = 0;
52 struct bgp_peer *bgp_peers = 0;
53
54 /* prepare peer structure, globals */
55 int bgp_setup(int as)
56 {
57 int i;
58 struct bgp_peer *peer;
59
60 for (i = 0; i < BGP_NUM_PEERS; i++)
61 {
62 peer = &bgp_peers[i];
63 memset(peer, 0, sizeof(*peer));
64
65 peer->addr = INADDR_NONE;
66 peer->sock = -1;
67 peer->state = peer->next_state = Disabled;
68
69 if (!((peer->outbuf = malloc(sizeof(*peer->outbuf)))
70 && (peer->inbuf = malloc(sizeof(*peer->inbuf)))))
71 {
72 LOG(0, 0, 0, "Can't allocate buffers for bgp peer (%s)\n",
73 strerror(errno));
74
75 return 0;
76 }
77
78 peer->edata.type = FD_TYPE_BGP;
79 peer->edata.index = i;
80 peer->events = 0;
81 }
82
83 if (as < 1)
84 as = 0;
85
86 if ((our_as = as))
87 return 0;
88
89 bgp_routes = 0;
90 bgp_configured = 0; /* set by bgp_start */
91
92 return 1;
93 }
94
95 /* start connection with a peer */
96 int bgp_start(struct bgp_peer *peer, char *name, int as, int keepalive,
97 int hold, int enable)
98 {
99 struct hostent *h;
100 int ibgp;
101 int i;
102 struct bgp_path_attr a;
103 char path_attrs[64];
104 char *p = path_attrs;
105 in_addr_t ip;
106 uint32_t metric = htonl(BGP_METRIC);
107 uint32_t no_export = htonl(BGP_COMMUNITY_NO_EXPORT);
108
109 if (!our_as)
110 return 0;
111
112 if (peer->state != Disabled)
113 bgp_halt(peer);
114
115 snprintf(peer->name, sizeof(peer->name), "%s", name);
116
117 if (!(h = gethostbyname(name)) || h->h_addrtype != AF_INET)
118 {
119 LOG(0, 0, 0, "Can't get address for BGP peer %s (%s)\n",
120 name, h ? "no address" : hstrerror(h_errno));
121
122 return 0;
123 }
124
125 memcpy(&peer->addr, h->h_addr, sizeof(peer->addr));
126 peer->as = as > 0 ? as : our_as;
127 ibgp = peer->as == our_as;
128
129 /* set initial timer values */
130 peer->init_keepalive = keepalive == -1 ? BGP_KEEPALIVE_TIME : keepalive;
131 peer->init_hold = hold == -1 ? BGP_HOLD_TIME : hold;
132
133 if (peer->init_hold < 3)
134 peer->init_hold = 3;
135
136 if (peer->init_keepalive * 3 > peer->init_hold)
137 peer->init_keepalive = peer->init_hold / 3;
138
139 /* clear buffers, go to Idle state */
140 peer->next_state = Idle;
141 bgp_clear(peer);
142
143 /* set initial routing state */
144 peer->routing = enable;
145
146 /* all our routes use the same attributes, so prepare it in advance */
147 if (peer->path_attrs)
148 free(peer->path_attrs);
149
150 peer->path_attr_len = 0;
151
152 /* ORIGIN */
153 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
154 a.code = BGP_PATH_ATTR_CODE_ORIGIN;
155 a.data.s.len = 1;
156 a.data.s.value[0] = BGP_PATH_ATTR_CODE_ORIGIN_IGP;
157
158 #define ADD_ATTRIBUTE() do { \
159 i = BGP_PATH_ATTR_SIZE(a); \
160 memcpy(p, &a, i); \
161 p += i; \
162 peer->path_attr_len += i; } while (0)
163
164 ADD_ATTRIBUTE();
165
166 /* AS_PATH */
167 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
168 a.code = BGP_PATH_ATTR_CODE_AS_PATH;
169 if (ibgp)
170 {
171 /* empty path */
172 a.data.s.len = 0;
173 }
174 else
175 {
176 /* just our AS */
177 struct {
178 uint8_t type;
179 uint8_t len;
180 uint16_t value;
181 } as_path = {
182 BGP_PATH_ATTR_CODE_AS_PATH_AS_SEQUENCE,
183 1,
184 htons(our_as),
185 };
186
187 a.data.s.len = sizeof(as_path);
188 memcpy(&a.data.s.value, &as_path, sizeof(as_path));
189 }
190
191 ADD_ATTRIBUTE();
192
193 /* NEXT_HOP */
194 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
195 a.code = BGP_PATH_ATTR_CODE_NEXT_HOP;
196 ip = my_address; /* we're it */
197 a.data.s.len = sizeof(ip);
198 memcpy(a.data.s.value, &ip, sizeof(ip));
199
200 ADD_ATTRIBUTE();
201
202 /* MULTI_EXIT_DISC */
203 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL;
204 a.code = BGP_PATH_ATTR_CODE_MULTI_EXIT_DISC;
205 a.data.s.len = sizeof(metric);
206 memcpy(a.data.s.value, &metric, sizeof(metric));
207
208 ADD_ATTRIBUTE();
209
210 if (ibgp)
211 {
212 uint32_t local_pref = htonl(BGP_LOCAL_PREF);
213
214 /* LOCAL_PREF */
215 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
216 a.code = BGP_PATH_ATTR_CODE_LOCAL_PREF;
217 a.data.s.len = sizeof(local_pref);
218 memcpy(a.data.s.value, &local_pref, sizeof(local_pref));
219
220 ADD_ATTRIBUTE();
221 }
222
223 /* COMMUNITIES */
224 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL | BGP_PATH_ATTR_FLAG_TRANS;
225 a.code = BGP_PATH_ATTR_CODE_COMMUNITIES;
226 a.data.s.len = sizeof(no_export);
227 memcpy(a.data.s.value, &no_export, sizeof(no_export));
228
229 ADD_ATTRIBUTE();
230
231 if (!(peer->path_attrs = malloc(peer->path_attr_len)))
232 {
233 LOG(0, 0, 0, "Can't allocate path_attrs for %s (%s)\n",
234 name, strerror(errno));
235
236 return 0;
237 }
238
239 memcpy(peer->path_attrs, path_attrs, peer->path_attr_len);
240
241 LOG(4, 0, 0, "Initiating BGP connection to %s (routing %s)\n",
242 name, enable ? "enabled" : "suspended");
243
244 /* we have at least one peer configured */
245 bgp_configured = 1;
246
247 /* connect */
248 return bgp_connect(peer);
249 }
250
251 /* clear counters, timers, routes and buffers; close socket; move to
252 next_state, which may be Disabled or Idle */
253 static void bgp_clear(struct bgp_peer *peer)
254 {
255 if (peer->sock != -1)
256 {
257 close(peer->sock);
258 peer->sock = -1;
259 }
260
261 peer->keepalive_time = 0;
262 peer->expire_time = 0;
263
264 peer->keepalive = peer->init_keepalive;
265 peer->hold = peer->init_hold;
266
267 bgp_free_routes(peer->routes);
268 peer->routes = 0;
269
270 peer->outbuf->packet.header.len = 0;
271 peer->outbuf->done = 0;
272 peer->inbuf->packet.header.len = 0;
273 peer->inbuf->done = 0;
274
275 peer->cli_flag = 0;
276 peer->events = 0;
277
278 if (peer->state != peer->next_state)
279 {
280 peer->state = peer->next_state;
281 peer->state_time = time_now;
282
283 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
284 bgp_state_str(peer->next_state));
285 }
286 }
287
288 /* initiate a clean shutdown */
289 void bgp_stop(struct bgp_peer *peer)
290 {
291 LOG(4, 0, 0, "Terminating BGP connection to %s\n", peer->name);
292 bgp_send_notification(peer, BGP_ERR_CEASE, 0);
293 }
294
295 /* drop connection (if any) and set state to Disabled */
296 void bgp_halt(struct bgp_peer *peer)
297 {
298 LOG(4, 0, 0, "Aborting BGP connection to %s\n", peer->name);
299 peer->next_state = Disabled;
300 bgp_clear(peer);
301 }
302
303 /* drop connection (if any) and set to Idle for connection retry */
304 int bgp_restart(struct bgp_peer *peer)
305 {
306 peer->next_state = Idle;
307 bgp_clear(peer);
308
309 /* restart now */
310 peer->retry_time = time_now;
311 peer->retry_count = 0;
312
313 /* connect */
314 return bgp_connect(peer);
315 }
316
317 static void bgp_set_retry(struct bgp_peer *peer)
318 {
319 if (peer->retry_count++ < BGP_MAX_RETRY)
320 {
321 peer->retry_time = time_now + (BGP_RETRY_BACKOFF * peer->retry_count);
322 peer->next_state = Idle;
323 bgp_clear(peer);
324 }
325 else
326 bgp_halt(peer); /* give up */
327 }
328
329 /* insert route into list; sorted */
330 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
331 struct bgp_route_list *new)
332 {
333 struct bgp_route_list *p = head;
334 struct bgp_route_list *e = 0;
335
336 while (p && memcmp(&p->dest, &new->dest, sizeof(p->dest)) < 0)
337 {
338 e = p;
339 p = p->next;
340 }
341
342 if (e)
343 {
344 new->next = e->next;
345 e->next = new;
346 }
347 else
348 {
349 new->next = head;
350 head = new;
351 }
352
353 return head;
354 }
355
356 /* add route to list for peers */
357 /*
358 * Note: this doesn't do route aggregation, nor drop routes if a less
359 * specific match already exists (partly because I'm lazy, but also so
360 * that if that route is later deleted we don't have to be concerned
361 * about adding back the more specific one).
362 */
363 int bgp_add_route(in_addr_t ip, int prefixlen)
364 {
365 struct bgp_route_list *r = bgp_routes;
366 struct bgp_route_list add;
367 int i;
368
369 add.dest.prefix = ip;
370 add.dest.len = prefixlen;
371 add.next = 0;
372
373 /* check for duplicate */
374 while (r)
375 {
376 i = memcmp(&r->dest, &add.dest, sizeof(r->dest));
377 if (!i)
378 return 1; /* already covered */
379
380 if (i > 0)
381 break;
382
383 r = r->next;
384 }
385
386 /* insert into route list; sorted */
387 if (!(r = malloc(sizeof(*r))))
388 {
389 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
390 fmtaddr(add.dest.prefix, 0), add.dest.len, strerror(errno));
391
392 return 0;
393 }
394
395 memcpy(r, &add, sizeof(*r));
396 bgp_routes = bgp_insert_route(bgp_routes, r);
397
398 /* flag established peers for update */
399 for (i = 0; i < BGP_NUM_PEERS; i++)
400 if (bgp_peers[i].state == Established)
401 bgp_peers[i].update_routes = 1;
402
403 LOG(4, 0, 0, "Registered BGP route %s/%d\n",
404 fmtaddr(add.dest.prefix, 0), add.dest.len);
405
406 return 1;
407 }
408
409 /* remove route from list for peers */
410 int bgp_del_route(in_addr_t ip, int prefixlen)
411 {
412 struct bgp_route_list *r = bgp_routes;
413 struct bgp_route_list *e = 0;
414 struct bgp_route_list del;
415 int i;
416
417 del.dest.prefix = ip;
418 del.dest.len = prefixlen;
419 del.next = 0;
420
421 /* find entry in routes list and remove */
422 while (r)
423 {
424 i = memcmp(&r->dest, &del.dest, sizeof(r->dest));
425 if (!i)
426 {
427 if (e)
428 e->next = r->next;
429 else
430 bgp_routes = r->next;
431
432 free(r);
433 break;
434 }
435
436 e = r;
437
438 if (i > 0)
439 r = 0; /* stop */
440 else
441 r = r->next;
442 }
443
444 /* not found */
445 if (!r)
446 return 1;
447
448 /* flag established peers for update */
449 for (i = 0; i < BGP_NUM_PEERS; i++)
450 if (bgp_peers[i].state == Established)
451 bgp_peers[i].update_routes = 1;
452
453 LOG(4, 0, 0, "Removed BGP route %s/%d\n",
454 fmtaddr(del.dest.prefix, 0), del.dest.len);
455
456 return 1;
457 }
458
459 /* enable or disable routing */
460 void bgp_enable_routing(int enable)
461 {
462 int i;
463
464 for (i = 0; i < BGP_NUM_PEERS; i++)
465 {
466 bgp_peers[i].routing = enable;
467
468 /* flag established peers for update */
469 if (bgp_peers[i].state == Established)
470 bgp_peers[i].update_routes = 1;
471 }
472
473 LOG(4, 0, 0, "%s BGP routing\n", enable ? "Enabled" : "Suspended");
474 }
475
476 #ifdef HAVE_EPOLL
477 # include <sys/epoll.h>
478 #else
479 # include "fake_epoll.h"
480 #endif
481
482 /* return a bitmask of the events required to poll this peer's fd */
483 int bgp_set_poll()
484 {
485 int i;
486
487 if (!bgp_configured)
488 return 0;
489
490 for (i = 0; i < BGP_NUM_PEERS; i++)
491 {
492 struct bgp_peer *peer = &bgp_peers[i];
493 int events = 0;
494
495 if (peer->state == Disabled || peer->state == Idle)
496 continue;
497
498 if (peer->inbuf->done < BGP_MAX_PACKET_SIZE)
499 events |= EPOLLIN;
500
501 if (peer->state == Connect || /* connection in progress */
502 peer->update_routes || /* routing updates */
503 peer->outbuf->packet.header.len) /* pending output */
504 events |= EPOLLOUT;
505
506 if (peer->events != events)
507 {
508 struct epoll_event ev;
509
510 ev.events = peer->events = events;
511 ev.data.ptr = &peer->edata;
512 epoll_ctl(epollfd, EPOLL_CTL_MOD, peer->sock, &ev);
513 }
514 }
515
516 return 1;
517 }
518
519 /* process bgp events/timers */
520 int bgp_process(uint32_t events[])
521 {
522 int i;
523
524 if (!bgp_configured)
525 return 0;
526
527 for (i = 0; i < BGP_NUM_PEERS; i++)
528 {
529 struct bgp_peer *peer = &bgp_peers[i];
530
531 if (*peer->name && peer->cli_flag == BGP_CLI_RESTART)
532 {
533 bgp_restart(peer);
534 continue;
535 }
536
537 if (peer->state == Disabled)
538 continue;
539
540 if (peer->cli_flag)
541 {
542 switch (peer->cli_flag)
543 {
544 case BGP_CLI_SUSPEND:
545 if (peer->routing)
546 {
547 peer->routing = 0;
548 if (peer->state == Established)
549 peer->update_routes = 1;
550 }
551
552 break;
553
554 case BGP_CLI_ENABLE:
555 if (!peer->routing)
556 {
557 peer->routing = 1;
558 if (peer->state == Established)
559 peer->update_routes = 1;
560 }
561
562 break;
563 }
564
565 peer->cli_flag = 0;
566 }
567
568 /* handle empty/fill of buffers */
569 if (events[i] & EPOLLOUT)
570 {
571 int r = 1;
572 if (peer->state == Connect)
573 r = bgp_handle_connect(peer);
574 else if (peer->outbuf->packet.header.len)
575 r = bgp_write(peer);
576
577 if (!r)
578 continue;
579 }
580
581 if (events[i] & (EPOLLIN|EPOLLHUP))
582 {
583 if (!bgp_read(peer))
584 continue;
585 }
586
587 /* process input buffer contents */
588 while (peer->inbuf->done >= sizeof(peer->inbuf->packet.header)
589 && !peer->outbuf->packet.header.len) /* may need to queue a response */
590 {
591 if (bgp_handle_input(peer) < 0)
592 continue;
593 }
594
595 /* process pending updates */
596 if (peer->update_routes
597 && !peer->outbuf->packet.header.len) /* ditto */
598 {
599 if (!bgp_send_update(peer))
600 continue;
601 }
602
603 /* process timers */
604 if (peer->state == Established)
605 {
606 if (time_now > peer->expire_time)
607 {
608 LOG(1, 0, 0, "No message from BGP peer %s in %ds\n",
609 peer->name, peer->hold);
610
611 bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0);
612 continue;
613 }
614
615 if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len)
616 bgp_send_keepalive(peer);
617 }
618 else if (peer->state == Idle)
619 {
620 if (time_now > peer->retry_time)
621 bgp_connect(peer);
622 }
623 else if (time_now > peer->state_time + BGP_STATE_TIME)
624 {
625 LOG(1, 0, 0, "%s timer expired for BGP peer %s\n",
626 bgp_state_str(peer->state), peer->name);
627
628 bgp_restart(peer);
629 }
630 }
631
632 return 1;
633 }
634
635 static void bgp_free_routes(struct bgp_route_list *routes)
636 {
637 struct bgp_route_list *tmp;
638
639 while ((tmp = routes))
640 {
641 routes = tmp->next;
642 free(tmp);
643 }
644 }
645
646 char const *bgp_state_str(enum bgp_state state)
647 {
648 switch (state)
649 {
650 case Disabled: return "Disabled";
651 case Idle: return "Idle";
652 case Connect: return "Connect";
653 case Active: return "Active";
654 case OpenSent: return "OpenSent";
655 case OpenConfirm: return "OpenConfirm";
656 case Established: return "Established";
657 }
658
659 return "?";
660 }
661
662 static char const *bgp_msg_type_str(uint8_t type)
663 {
664 switch (type)
665 {
666 case BGP_MSG_OPEN: return "OPEN";
667 case BGP_MSG_UPDATE: return "UPDATE";
668 case BGP_MSG_NOTIFICATION: return "NOTIFICATION";
669 case BGP_MSG_KEEPALIVE: return "KEEPALIVE";
670 }
671
672 return "?";
673 }
674
675 /* attempt to connect to peer */
676 static int bgp_connect(struct bgp_peer *peer)
677 {
678 static int bgp_port = 0;
679 struct sockaddr_in addr;
680 struct epoll_event ev;
681
682 if (!bgp_port)
683 {
684 struct servent *serv;
685 if (!(serv = getservbyname("bgp", "tcp")))
686 {
687 LOG(0, 0, 0, "Can't get bgp service (%s)\n", strerror(errno));
688 return 0;
689 }
690
691 bgp_port = serv->s_port;
692 }
693
694 if ((peer->sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
695 {
696 LOG(0, 0, 0, "Can't create a socket for BGP peer %s (%s)\n",
697 peer->name, strerror(errno));
698
699 peer->state = peer->next_state = Disabled;
700 return 0;
701 }
702
703 /* add to poll set */
704 ev.events = peer->events = EPOLLOUT;
705 ev.data.ptr = &peer->edata;
706 epoll_ctl(epollfd, EPOLL_CTL_ADD, peer->sock, &ev);
707
708 /* set to non-blocking */
709 fcntl(peer->sock, F_SETFL, fcntl(peer->sock, F_GETFL, 0) | O_NONBLOCK);
710
711 /* try connect */
712 memset(&addr, 0, sizeof(addr));
713 addr.sin_family = AF_INET;
714 addr.sin_port = bgp_port;
715 addr.sin_addr.s_addr = peer->addr;
716
717 while (connect(peer->sock, (struct sockaddr *) &addr, sizeof(addr)) == -1)
718 {
719 if (errno == EINTR) /* SIGALARM handler */
720 continue;
721
722 if (errno != EINPROGRESS)
723 {
724 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n",
725 inet_ntoa(addr.sin_addr), strerror(errno));
726
727 bgp_set_retry(peer);
728 return 0;
729 }
730
731 peer->state = Connect;
732 peer->state_time = time_now;
733
734 LOG(4, 0, 0, "BGP peer %s: state Connect\n", peer->name);
735 return 1;
736 }
737
738 peer->state = Active;
739 peer->state_time = time_now;
740 peer->retry_time = peer->retry_count = 0;
741
742 LOG(4, 0, 0, "BGP peer %s: state Active\n", inet_ntoa(addr.sin_addr));
743
744 return bgp_send_open(peer);
745 }
746
747 /* complete partial connection (state = Connect) */
748 static int bgp_handle_connect(struct bgp_peer *peer)
749 {
750 int err = 0;
751 socklen_t len = sizeof(int);
752 getsockopt(peer->sock, SOL_SOCKET, SO_ERROR, &err, &len);
753 if (err)
754 {
755 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n", peer->name,
756 strerror(err));
757
758 bgp_set_retry(peer);
759 return 0;
760 }
761
762 peer->state = Active;
763 peer->state_time = time_now;
764
765 LOG(4, 0, 0, "BGP peer %s: state Active\n", peer->name);
766
767 return bgp_send_open(peer);
768 }
769
770 /* initiate a write */
771 static int bgp_write(struct bgp_peer *peer)
772 {
773 int len = htons(peer->outbuf->packet.header.len);
774 int r;
775
776 while ((r = write(peer->sock, &peer->outbuf->packet + peer->outbuf->done,
777 len - peer->outbuf->done)) == -1)
778 {
779 if (errno == EINTR)
780 continue;
781
782 if (errno == EAGAIN)
783 return 1;
784
785 if (errno == EPIPE)
786 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
787 else
788 LOG(1, 0, 0, "Can't write to BGP peer %s (%s)\n", peer->name,
789 strerror(errno));
790
791 bgp_set_retry(peer);
792 return 0;
793 }
794
795 if (r < len)
796 {
797 peer->outbuf->done += r;
798 return 1;
799 }
800
801 LOG(4, 0, 0, "Sent %s to BGP peer %s\n",
802 bgp_msg_type_str(peer->outbuf->packet.header.type), peer->name);
803
804 peer->outbuf->packet.header.len = 0;
805 peer->outbuf->done = 0;
806
807 if (peer->state == Established)
808 peer->keepalive_time = time_now + peer->keepalive;
809
810 if (peer->state != peer->next_state)
811 {
812 if (peer->next_state == Disabled || peer->next_state == Idle)
813 {
814 bgp_clear(peer);
815 return 0;
816 }
817
818 peer->state = peer->next_state;
819 peer->state_time = time_now;
820
821 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
822 bgp_state_str(peer->state));
823 }
824
825 return 1;
826 }
827
828 /* initiate a read */
829 static int bgp_read(struct bgp_peer *peer)
830 {
831 int r;
832
833 while ((r = read(peer->sock, &peer->inbuf->packet + peer->inbuf->done,
834 BGP_MAX_PACKET_SIZE - peer->inbuf->done)) < 1)
835 {
836 if (!r)
837 {
838 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
839 }
840 else
841 {
842 if (errno == EINTR)
843 continue;
844
845 if (errno == EAGAIN)
846 return 1;
847
848 LOG(1, 0, 0, "Can't read from BGP peer %s (%s)\n", peer->name,
849 strerror(errno));
850 }
851
852 bgp_set_retry(peer);
853 return 0;
854 }
855
856 peer->inbuf->done += r;
857 return 1;
858 }
859
860 /* process buffered packets */
861 static int bgp_handle_input(struct bgp_peer *peer)
862 {
863 struct bgp_packet *p = &peer->inbuf->packet;
864 int len = ntohs(p->header.len);
865
866 if (len > BGP_MAX_PACKET_SIZE)
867 {
868 LOG(1, 0, 0, "Bad header length from BGP %s\n", peer->name);
869 bgp_send_notification(peer, BGP_ERR_HEADER, BGP_ERR_HDR_BAD_LEN);
870 return 0;
871 }
872
873 if (peer->inbuf->done < len)
874 return 0;
875
876 LOG(4, 0, 0, "Received %s from BGP peer %s\n",
877 bgp_msg_type_str(p->header.type), peer->name);
878
879 switch (p->header.type)
880 {
881 case BGP_MSG_OPEN:
882 {
883 struct bgp_data_open data;
884 int hold;
885 int i;
886
887 for (i = 0; i < sizeof(p->header.marker); i++)
888 {
889 if ((unsigned char) p->header.marker[i] != 0xff)
890 {
891 LOG(1, 0, 0, "Invalid marker from BGP peer %s\n",
892 peer->name);
893
894 bgp_send_notification(peer, BGP_ERR_HEADER,
895 BGP_ERR_HDR_NOT_SYNC);
896
897 return 0;
898 }
899 }
900
901 if (peer->state != OpenSent)
902 {
903 LOG(1, 0, 0, "OPEN from BGP peer %s in %s state\n",
904 peer->name, bgp_state_str(peer->state));
905
906 bgp_send_notification(peer, BGP_ERR_FSM, 0);
907 return 0;
908 }
909
910 memcpy(&data, p->data, len - sizeof(p->header));
911
912 if (data.version != BGP_VERSION)
913 {
914 LOG(1, 0, 0, "Bad version (%d) sent by BGP peer %s\n",
915 (int) data.version, peer->name);
916
917 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_VERSION);
918 return 0;
919 }
920
921 if (ntohs(data.as) != peer->as)
922 {
923 LOG(1, 0, 0, "Bad AS sent by BGP peer %s (got %d, "
924 "expected %d)\n", peer->name, (int) htons(data.as),
925 (int) peer->as);
926
927 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_BAD_AS);
928 return 0;
929 }
930
931 if ((hold = ntohs(data.hold_time)) < 3)
932 {
933 LOG(1, 0, 0, "Bad hold time (%d) from BGP peer %s\n",
934 hold, peer->name);
935
936 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_HOLD_TIME);
937 return 0;
938 }
939
940 /* pick lowest hold time */
941 if (hold < peer->hold)
942 peer->hold = hold;
943
944 /* adjust our keepalive based on negotiated hold value */
945 if (peer->keepalive * 3 > peer->hold)
946 peer->keepalive = peer->hold / 3;
947
948 /* next transition requires an exchange of keepalives */
949 bgp_send_keepalive(peer);
950
951 /* FIXME: may need to check for optional params */
952 }
953
954 break;
955
956 case BGP_MSG_KEEPALIVE:
957 if (peer->state == OpenConfirm)
958 {
959 peer->state = peer->next_state = Established;
960 peer->state_time = time_now;
961 peer->keepalive_time = time_now + peer->keepalive;
962 peer->update_routes = 1;
963 peer->retry_count = 0;
964 peer->retry_time = 0;
965
966 LOG(4, 0, 0, "BGP peer %s: state Established\n", peer->name);
967 }
968
969 break;
970
971 case BGP_MSG_NOTIFICATION:
972 if (len > sizeof(p->header))
973 {
974 struct bgp_data_notification *notification =
975 (struct bgp_data_notification *) p->data;
976
977 if (notification->error_code == BGP_ERR_CEASE)
978 {
979 LOG(4, 0, 0, "BGP peer %s sent CEASE\n", peer->name);
980 bgp_restart(peer);
981 return 0;
982 }
983
984 /* FIXME: should handle more notifications */
985 LOG(4, 0, 0, "BGP peer %s sent unhandled NOTIFICATION %d\n",
986 peer->name, (int) notification->error_code);
987 }
988
989 break;
990 }
991
992 /* reset timer */
993 peer->expire_time = time_now + peer->hold;
994
995 /* see if there's another message in the same packet/buffer */
996 if (peer->inbuf->done > len)
997 {
998 peer->inbuf->done -= len;
999 memmove(p, (char *) p + len, peer->inbuf->done);
1000 }
1001 else
1002 {
1003 peer->inbuf->packet.header.len = 0;
1004 peer->inbuf->done = 0;
1005 }
1006
1007 return peer->inbuf->done;
1008 }
1009
1010 /* send/buffer OPEN message */
1011 static int bgp_send_open(struct bgp_peer *peer)
1012 {
1013 struct bgp_data_open data;
1014 uint16_t len = sizeof(peer->outbuf->packet.header);
1015
1016 memset(peer->outbuf->packet.header.marker, 0xff,
1017 sizeof(peer->outbuf->packet.header.marker));
1018
1019 peer->outbuf->packet.header.type = BGP_MSG_OPEN;
1020
1021 data.version = BGP_VERSION;
1022 data.as = htons(our_as);
1023 data.hold_time = htons(peer->hold);
1024 data.identifier = my_address;
1025 data.opt_len = 0;
1026
1027 memcpy(peer->outbuf->packet.data, &data, BGP_DATA_OPEN_SIZE);
1028 len += BGP_DATA_OPEN_SIZE;
1029
1030 peer->outbuf->packet.header.len = htons(len);
1031 peer->outbuf->done = 0;
1032 peer->next_state = OpenSent;
1033
1034 return bgp_write(peer);
1035 }
1036
1037 /* send/buffer KEEPALIVE message */
1038 static int bgp_send_keepalive(struct bgp_peer *peer)
1039 {
1040 memset(peer->outbuf->packet.header.marker, 0xff,
1041 sizeof(peer->outbuf->packet.header.marker));
1042
1043 peer->outbuf->packet.header.type = BGP_MSG_KEEPALIVE;
1044 peer->outbuf->packet.header.len =
1045 htons(sizeof(peer->outbuf->packet.header));
1046
1047 peer->outbuf->done = 0;
1048 peer->next_state = (peer->state == OpenSent) ? OpenConfirm : peer->state;
1049
1050 return bgp_write(peer);
1051 }
1052
1053 /* send/buffer UPDATE message */
1054 static int bgp_send_update(struct bgp_peer *peer)
1055 {
1056 uint16_t unf_len = 0;
1057 uint16_t attr_len;
1058 uint16_t len = sizeof(peer->outbuf->packet.header);
1059 struct bgp_route_list *have = peer->routes;
1060 struct bgp_route_list *want = peer->routing ? bgp_routes : 0;
1061 struct bgp_route_list *e = 0;
1062 struct bgp_route_list *add = 0;
1063 int s;
1064
1065 char *data = (char *) &peer->outbuf->packet.data;
1066
1067 /* need leave room for attr_len, bgp_path_attrs and one prefix */
1068 char *max = (char *) &peer->outbuf->packet.data
1069 + sizeof(peer->outbuf->packet.data)
1070 - sizeof(attr_len) - peer->path_attr_len - sizeof(struct bgp_ip_prefix);
1071
1072 /* skip over unf_len */
1073 data += sizeof(unf_len);
1074 len += sizeof(unf_len);
1075
1076 memset(peer->outbuf->packet.header.marker, 0xff,
1077 sizeof(peer->outbuf->packet.header.marker));
1078
1079 peer->outbuf->packet.header.type = BGP_MSG_UPDATE;
1080
1081 peer->update_routes = 0; /* tentatively clear */
1082
1083 /* find differences */
1084 while ((have || want) && data < (max - sizeof(struct bgp_ip_prefix)))
1085 {
1086 if (have)
1087 s = want
1088 ? memcmp(&have->dest, &want->dest, sizeof(have->dest))
1089 : -1;
1090 else
1091 s = 1;
1092
1093 if (s < 0) /* found one to delete */
1094 {
1095 struct bgp_route_list *tmp = have;
1096 have = have->next;
1097
1098 s = BGP_IP_PREFIX_SIZE(tmp->dest);
1099 memcpy(data, &tmp->dest, s);
1100 data += s;
1101 unf_len += s;
1102 len += s;
1103
1104 LOG(5, 0, 0, "Withdrawing route %s/%d from BGP peer %s\n",
1105 fmtaddr(tmp->dest.prefix, 0), tmp->dest.len, peer->name);
1106
1107 free(tmp);
1108
1109 if (e)
1110 e->next = have;
1111 else
1112 peer->routes = have;
1113 }
1114 else
1115 {
1116 if (!s) /* same */
1117 {
1118 e = have; /* stash the last found to relink above */
1119 have = have->next;
1120 want = want->next;
1121 }
1122 else if (s > 0) /* addition reqd. */
1123 {
1124 if (add)
1125 {
1126 peer->update_routes = 1; /* only one add per packet */
1127 if (!have)
1128 break;
1129 }
1130 else
1131 add = want;
1132
1133 if (want)
1134 want = want->next;
1135 }
1136 }
1137 }
1138
1139 if (have || want)
1140 peer->update_routes = 1; /* more to do */
1141
1142 /* anything changed? */
1143 if (!(unf_len || add))
1144 return 1;
1145
1146 /* go back and insert unf_len */
1147 unf_len = htons(unf_len);
1148 memcpy(&peer->outbuf->packet.data, &unf_len, sizeof(unf_len));
1149
1150 if (add)
1151 {
1152 if (!(e = malloc(sizeof(*e))))
1153 {
1154 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
1155 fmtaddr(add->dest.prefix, 0), add->dest.len, strerror(errno));
1156
1157 return 0;
1158 }
1159
1160 memcpy(e, add, sizeof(*e));
1161 e->next = 0;
1162 peer->routes = bgp_insert_route(peer->routes, e);
1163
1164 attr_len = htons(peer->path_attr_len);
1165 memcpy(data, &attr_len, sizeof(attr_len));
1166 data += sizeof(attr_len);
1167 len += sizeof(attr_len);
1168
1169 memcpy(data, peer->path_attrs, peer->path_attr_len);
1170 data += peer->path_attr_len;
1171 len += peer->path_attr_len;
1172
1173 s = BGP_IP_PREFIX_SIZE(add->dest);
1174 memcpy(data, &add->dest, s);
1175 data += s;
1176 len += s;
1177
1178 LOG(5, 0, 0, "Advertising route %s/%d to BGP peer %s\n",
1179 fmtaddr(add->dest.prefix, 0), add->dest.len, peer->name);
1180 }
1181 else
1182 {
1183 attr_len = 0;
1184 memcpy(data, &attr_len, sizeof(attr_len));
1185 data += sizeof(attr_len);
1186 len += sizeof(attr_len);
1187 }
1188
1189 peer->outbuf->packet.header.len = htons(len);
1190 peer->outbuf->done = 0;
1191
1192 return bgp_write(peer);
1193 }
1194
1195 /* send/buffer NOTIFICATION message */
1196 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
1197 uint8_t subcode)
1198 {
1199 struct bgp_data_notification data;
1200 uint16_t len = 0;
1201
1202 data.error_code = code;
1203 len += sizeof(data.error_code);
1204
1205 data.error_subcode = subcode;
1206 len += sizeof(data.error_code);
1207
1208 memset(peer->outbuf->packet.header.marker, 0xff,
1209 sizeof(peer->outbuf->packet.header.marker));
1210
1211 peer->outbuf->packet.header.type = BGP_MSG_NOTIFICATION;
1212 peer->outbuf->packet.header.len =
1213 htons(sizeof(peer->outbuf->packet.header) + len);
1214
1215 memcpy(peer->outbuf->packet.data, &data, len);
1216
1217 peer->outbuf->done = 0;
1218 peer->next_state = code == BGP_ERR_CEASE ? Disabled : Idle;
1219
1220 /* we're dying; ignore any pending input */
1221 peer->inbuf->packet.header.len = 0;
1222 peer->inbuf->done = 0;
1223
1224 return bgp_write(peer);
1225 }