Merge remote-tracking branch 'new-upstream/master' into common-cvs-git-upstream
[l2tpns.git] / bgp.c
1 /*
2 * BGPv4
3 * Used to advertise routes for upstream (l2tp port, rather than gratiutious
4 * arp) and downstream--allowing routers to load-balance both.
5 *
6 * Implementation limitations:
7 * - We never listen for incoming connections (session always initiated by us).
8 * - Any routes advertised by the peer are accepted, but ignored.
9 * - No password support; neither RFC1771 (which no-one seems to do anyway)
10 * nor RFC2385 (which requires a kernel patch on 2.4 kernels).
11 */
12
13 #include <stdlib.h>
14 #include <unistd.h>
15 #include <string.h>
16 #include <time.h>
17 #include <errno.h>
18 #include <sys/socket.h>
19 #include <netinet/in.h>
20 #include <arpa/inet.h>
21 #include <netdb.h>
22 #include <fcntl.h>
23
24 #include "l2tpns.h"
25 #include "bgp.h"
26 #include "util.h"
27
28 static void bgp_clear(struct bgp_peer *peer);
29 static void bgp_set_retry(struct bgp_peer *peer);
30 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx);
31 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
32 struct bgp_route_list *new);
33
34 static void bgp_free_routes(struct bgp_route_list *routes);
35 static char const *bgp_msg_type_str(uint8_t type);
36 static int bgp_connect(struct bgp_peer *peer);
37 static int bgp_handle_connect(struct bgp_peer *peer);
38 static int bgp_write(struct bgp_peer *peer);
39 static int bgp_read(struct bgp_peer *peer);
40 static int bgp_handle_input(struct bgp_peer *peer);
41 static int bgp_send_open(struct bgp_peer *peer);
42 static int bgp_send_keepalive(struct bgp_peer *peer);
43 static int bgp_send_update(struct bgp_peer *peer);
44 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
45 uint8_t subcode);
46
47 static uint16_t our_as;
48 static struct bgp_route_list *bgp_routes = 0;
49
50 int bgp_configured = 0;
51 struct bgp_peer *bgp_peers = 0;
52
53 /* prepare peer structure, globals */
54 int bgp_setup(int as)
55 {
56 int i;
57 struct bgp_peer *peer;
58
59 for (i = 0; i < BGP_NUM_PEERS; i++)
60 {
61 peer = &bgp_peers[i];
62 memset(peer, 0, sizeof(*peer));
63
64 peer->addr = INADDR_NONE;
65 peer->sock = -1;
66 peer->state = peer->next_state = Disabled;
67
68 if (!((peer->outbuf = malloc(sizeof(*peer->outbuf)))
69 && (peer->inbuf = malloc(sizeof(*peer->inbuf)))))
70 {
71 LOG(0, 0, 0, "Can't allocate buffers for bgp peer (%s)\n",
72 strerror(errno));
73
74 return 0;
75 }
76
77 peer->edata.type = FD_TYPE_BGP;
78 peer->edata.index = i;
79 peer->events = 0;
80 }
81
82 if (as < 1)
83 as = 0;
84
85 if ((our_as = as))
86 return 0;
87
88 bgp_routes = 0;
89 bgp_configured = 0; /* set by bgp_start */
90
91 return 1;
92 }
93
94 /* start connection with a peer */
95 int bgp_start(struct bgp_peer *peer, char *name, int as, int keepalive,
96 int hold, int enable)
97 {
98 struct hostent *h;
99 int ibgp;
100 int i;
101 struct bgp_path_attr a;
102 char path_attrs[64];
103 char *p = path_attrs;
104 in_addr_t ip;
105 uint32_t metric = htonl(BGP_METRIC);
106 uint32_t no_export = htonl(BGP_COMMUNITY_NO_EXPORT);
107
108 if (!our_as)
109 return 0;
110
111 if (peer->state != Disabled)
112 bgp_halt(peer);
113
114 snprintf(peer->name, sizeof(peer->name), "%s", name);
115
116 if (!(h = gethostbyname(name)) || h->h_addrtype != AF_INET)
117 {
118 LOG(0, 0, 0, "Can't get address for BGP peer %s (%s)\n",
119 name, h ? "no address" : hstrerror(h_errno));
120
121 return 0;
122 }
123
124 memcpy(&peer->addr, h->h_addr, sizeof(peer->addr));
125 peer->as = as > 0 ? as : our_as;
126 ibgp = peer->as == our_as;
127
128 /* set initial timer values */
129 peer->init_keepalive = keepalive == -1 ? BGP_KEEPALIVE_TIME : keepalive;
130 peer->init_hold = hold == -1 ? BGP_HOLD_TIME : hold;
131
132 if (peer->init_hold < 3)
133 peer->init_hold = 3;
134
135 if (peer->init_keepalive * 3 > peer->init_hold)
136 peer->init_keepalive = peer->init_hold / 3;
137
138 /* clear buffers, go to Idle state */
139 peer->next_state = Idle;
140 bgp_clear(peer);
141
142 /* set initial routing state */
143 peer->routing = enable;
144
145 /* all our routes use the same attributes, so prepare it in advance */
146 if (peer->path_attrs)
147 free(peer->path_attrs);
148
149 peer->path_attr_len = 0;
150
151 /* ORIGIN */
152 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
153 a.code = BGP_PATH_ATTR_CODE_ORIGIN;
154 a.data.s.len = 1;
155 a.data.s.value[0] = BGP_PATH_ATTR_CODE_ORIGIN_IGP;
156
157 #define ADD_ATTRIBUTE() do { \
158 i = BGP_PATH_ATTR_SIZE(a); \
159 memcpy(p, &a, i); \
160 p += i; \
161 peer->path_attr_len += i; } while (0)
162
163 ADD_ATTRIBUTE();
164
165 /* AS_PATH */
166 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
167 a.code = BGP_PATH_ATTR_CODE_AS_PATH;
168 if (ibgp)
169 {
170 /* empty path */
171 a.data.s.len = 0;
172 }
173 else
174 {
175 /* just our AS */
176 struct {
177 uint8_t type;
178 uint8_t len;
179 uint16_t value;
180 } as_path = {
181 BGP_PATH_ATTR_CODE_AS_PATH_AS_SEQUENCE,
182 1,
183 htons(our_as),
184 };
185
186 a.data.s.len = sizeof(as_path);
187 memcpy(&a.data.s.value, &as_path, sizeof(as_path));
188 }
189
190 ADD_ATTRIBUTE();
191
192 /* NEXT_HOP */
193 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
194 a.code = BGP_PATH_ATTR_CODE_NEXT_HOP;
195 ip = my_address; /* we're it */
196 a.data.s.len = sizeof(ip);
197 memcpy(a.data.s.value, &ip, sizeof(ip));
198
199 ADD_ATTRIBUTE();
200
201 /* MULTI_EXIT_DISC */
202 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL;
203 a.code = BGP_PATH_ATTR_CODE_MULTI_EXIT_DISC;
204 a.data.s.len = sizeof(metric);
205 memcpy(a.data.s.value, &metric, sizeof(metric));
206
207 ADD_ATTRIBUTE();
208
209 if (ibgp)
210 {
211 uint32_t local_pref = htonl(BGP_LOCAL_PREF);
212
213 /* LOCAL_PREF */
214 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
215 a.code = BGP_PATH_ATTR_CODE_LOCAL_PREF;
216 a.data.s.len = sizeof(local_pref);
217 memcpy(a.data.s.value, &local_pref, sizeof(local_pref));
218
219 ADD_ATTRIBUTE();
220 }
221
222 /* COMMUNITIES */
223 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL | BGP_PATH_ATTR_FLAG_TRANS;
224 a.code = BGP_PATH_ATTR_CODE_COMMUNITIES;
225 a.data.s.len = sizeof(no_export);
226 memcpy(a.data.s.value, &no_export, sizeof(no_export));
227
228 ADD_ATTRIBUTE();
229
230 if (!(peer->path_attrs = malloc(peer->path_attr_len)))
231 {
232 LOG(0, 0, 0, "Can't allocate path_attrs for %s (%s)\n",
233 name, strerror(errno));
234
235 return 0;
236 }
237
238 memcpy(peer->path_attrs, path_attrs, peer->path_attr_len);
239
240 LOG(4, 0, 0, "Initiating BGP connection to %s (routing %s)\n",
241 name, enable ? "enabled" : "suspended");
242
243 /* we have at least one peer configured */
244 bgp_configured = 1;
245
246 /* connect */
247 return bgp_connect(peer);
248 }
249
250 /* clear counters, timers, routes and buffers; close socket; move to
251 next_state, which may be Disabled or Idle */
252 static void bgp_clear(struct bgp_peer *peer)
253 {
254 if (peer->sock != -1)
255 {
256 close(peer->sock);
257 peer->sock = -1;
258 }
259
260 peer->keepalive_time = 0;
261 peer->expire_time = 0;
262
263 peer->keepalive = peer->init_keepalive;
264 peer->hold = peer->init_hold;
265
266 bgp_free_routes(peer->routes);
267 peer->routes = 0;
268
269 peer->outbuf->packet.header.len = 0;
270 peer->outbuf->done = 0;
271 peer->inbuf->packet.header.len = 0;
272 peer->inbuf->done = 0;
273
274 peer->cli_flag = 0;
275 peer->events = 0;
276
277 if (peer->state != peer->next_state)
278 {
279 peer->state = peer->next_state;
280 peer->state_time = time_now;
281
282 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
283 bgp_state_str(peer->next_state));
284 }
285 }
286
287 /* initiate a clean shutdown */
288 void bgp_stop(struct bgp_peer *peer)
289 {
290 LOG(4, 0, 0, "Terminating BGP connection to %s\n", peer->name);
291 bgp_send_notification(peer, BGP_ERR_CEASE, 0);
292 }
293
294 /* drop connection (if any) and set state to Disabled */
295 void bgp_halt(struct bgp_peer *peer)
296 {
297 LOG(4, 0, 0, "Aborting BGP connection to %s\n", peer->name);
298 peer->next_state = Disabled;
299 bgp_clear(peer);
300 }
301
302 /* drop connection (if any) and set to Idle for connection retry */
303 int bgp_restart(struct bgp_peer *peer)
304 {
305 peer->next_state = Idle;
306 bgp_clear(peer);
307
308 /* restart now */
309 peer->retry_time = time_now;
310 peer->retry_count = 0;
311
312 /* connect */
313 return bgp_connect(peer);
314 }
315
316 static void bgp_set_retry(struct bgp_peer *peer)
317 {
318 if (peer->retry_count++ < BGP_MAX_RETRY)
319 {
320 peer->retry_time = time_now + (BGP_RETRY_BACKOFF * peer->retry_count);
321 peer->next_state = Idle;
322 bgp_clear(peer);
323 }
324 else
325 bgp_halt(peer); /* give up */
326 }
327
328 /* convert ip/mask to CIDR notation */
329 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx)
330 {
331 int i;
332 uint32_t b;
333
334 /* convert to prefix notation */
335 pfx->len = 32;
336 pfx->prefix = ip;
337
338 if (!mask) /* bogus */
339 mask = 0xffffffff;
340
341 for (i = 0; i < 32 && ((b = ntohl(1 << i)), !(mask & b)); i++)
342 {
343 pfx->len--;
344 pfx->prefix &= ~b;
345 }
346 }
347
348 /* insert route into list; sorted */
349 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
350 struct bgp_route_list *new)
351 {
352 struct bgp_route_list *p = head;
353 struct bgp_route_list *e = 0;
354
355 while (p && memcmp(&p->dest, &new->dest, sizeof(p->dest)) < 0)
356 {
357 e = p;
358 p = p->next;
359 }
360
361 if (e)
362 {
363 new->next = e->next;
364 e->next = new;
365 }
366 else
367 {
368 new->next = head;
369 head = new;
370 }
371
372 return head;
373 }
374
375 /* add route to list for peers */
376 /*
377 * Note: this doesn't do route aggregation, nor drop routes if a less
378 * specific match already exists (partly because I'm lazy, but also so
379 * that if that route is later deleted we don't have to be concerned
380 * about adding back the more specific one).
381 */
382 int bgp_add_route(in_addr_t ip, in_addr_t mask)
383 {
384 struct bgp_route_list *r = bgp_routes;
385 struct bgp_route_list add;
386 int i;
387
388 bgp_cidr(ip, mask, &add.dest);
389 add.next = 0;
390
391 /* check for duplicate */
392 while (r)
393 {
394 i = memcmp(&r->dest, &add.dest, sizeof(r->dest));
395 if (!i)
396 return 1; /* already covered */
397
398 if (i > 0)
399 break;
400
401 r = r->next;
402 }
403
404 /* insert into route list; sorted */
405 if (!(r = malloc(sizeof(*r))))
406 {
407 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
408 fmtaddr(add.dest.prefix, 0), add.dest.len, strerror(errno));
409
410 return 0;
411 }
412
413 memcpy(r, &add, sizeof(*r));
414 bgp_routes = bgp_insert_route(bgp_routes, r);
415
416 /* flag established peers for update */
417 for (i = 0; i < BGP_NUM_PEERS; i++)
418 if (bgp_peers[i].state == Established)
419 bgp_peers[i].update_routes = 1;
420
421 LOG(4, 0, 0, "Registered BGP route %s/%d\n",
422 fmtaddr(add.dest.prefix, 0), add.dest.len);
423
424 return 1;
425 }
426
427 /* remove route from list for peers */
428 int bgp_del_route(in_addr_t ip, in_addr_t mask)
429 {
430 struct bgp_route_list *r = bgp_routes;
431 struct bgp_route_list *e = 0;
432 struct bgp_route_list del;
433 int i;
434
435 bgp_cidr(ip, mask, &del.dest);
436 del.next = 0;
437
438 /* find entry in routes list and remove */
439 while (r)
440 {
441 i = memcmp(&r->dest, &del.dest, sizeof(r->dest));
442 if (!i)
443 {
444 if (e)
445 e->next = r->next;
446 else
447 bgp_routes = r->next;
448
449 free(r);
450 break;
451 }
452
453 e = r;
454
455 if (i > 0)
456 r = 0; /* stop */
457 else
458 r = r->next;
459 }
460
461 /* not found */
462 if (!r)
463 return 1;
464
465 /* flag established peers for update */
466 for (i = 0; i < BGP_NUM_PEERS; i++)
467 if (bgp_peers[i].state == Established)
468 bgp_peers[i].update_routes = 1;
469
470 LOG(4, 0, 0, "Removed BGP route %s/%d\n",
471 fmtaddr(del.dest.prefix, 0), del.dest.len);
472
473 return 1;
474 }
475
476 /* enable or disable routing */
477 void bgp_enable_routing(int enable)
478 {
479 int i;
480
481 for (i = 0; i < BGP_NUM_PEERS; i++)
482 {
483 bgp_peers[i].routing = enable;
484
485 /* flag established peers for update */
486 if (bgp_peers[i].state == Established)
487 bgp_peers[i].update_routes = 1;
488 }
489
490 LOG(4, 0, 0, "%s BGP routing\n", enable ? "Enabled" : "Suspended");
491 }
492
493 #ifdef HAVE_EPOLL
494 # include <sys/epoll.h>
495 #else
496 # include "fake_epoll.h"
497 #endif
498
499 /* return a bitmask of the events required to poll this peer's fd */
500 int bgp_set_poll()
501 {
502 int i;
503
504 if (!bgp_configured)
505 return 0;
506
507 for (i = 0; i < BGP_NUM_PEERS; i++)
508 {
509 struct bgp_peer *peer = &bgp_peers[i];
510 int events = 0;
511
512 if (peer->state == Disabled || peer->state == Idle)
513 continue;
514
515 if (peer->inbuf->done < BGP_MAX_PACKET_SIZE)
516 events |= EPOLLIN;
517
518 if (peer->state == Connect || /* connection in progress */
519 peer->update_routes || /* routing updates */
520 peer->outbuf->packet.header.len) /* pending output */
521 events |= EPOLLOUT;
522
523 if (peer->events != events)
524 {
525 struct epoll_event ev;
526
527 ev.events = peer->events = events;
528 ev.data.ptr = &peer->edata;
529 epoll_ctl(epollfd, EPOLL_CTL_MOD, peer->sock, &ev);
530 }
531 }
532
533 return 1;
534 }
535
536 /* process bgp events/timers */
537 int bgp_process(uint32_t events[])
538 {
539 int i;
540
541 if (!bgp_configured)
542 return 0;
543
544 for (i = 0; i < BGP_NUM_PEERS; i++)
545 {
546 struct bgp_peer *peer = &bgp_peers[i];
547
548 if (*peer->name && peer->cli_flag == BGP_CLI_RESTART)
549 {
550 bgp_restart(peer);
551 continue;
552 }
553
554 if (peer->state == Disabled)
555 continue;
556
557 if (peer->cli_flag)
558 {
559 switch (peer->cli_flag)
560 {
561 case BGP_CLI_SUSPEND:
562 if (peer->routing)
563 {
564 peer->routing = 0;
565 if (peer->state == Established)
566 peer->update_routes = 1;
567 }
568
569 break;
570
571 case BGP_CLI_ENABLE:
572 if (!peer->routing)
573 {
574 peer->routing = 1;
575 if (peer->state == Established)
576 peer->update_routes = 1;
577 }
578
579 break;
580 }
581
582 peer->cli_flag = 0;
583 }
584
585 /* handle empty/fill of buffers */
586 if (events[i] & EPOLLOUT)
587 {
588 int r = 1;
589 if (peer->state == Connect)
590 r = bgp_handle_connect(peer);
591 else if (peer->outbuf->packet.header.len)
592 r = bgp_write(peer);
593
594 if (!r)
595 continue;
596 }
597
598 if (events[i] & (EPOLLIN|EPOLLHUP))
599 {
600 if (!bgp_read(peer))
601 continue;
602 }
603
604 /* process input buffer contents */
605 while (peer->inbuf->done >= sizeof(peer->inbuf->packet.header)
606 && !peer->outbuf->packet.header.len) /* may need to queue a response */
607 {
608 if (bgp_handle_input(peer) < 0)
609 continue;
610 }
611
612 /* process pending updates */
613 if (peer->update_routes
614 && !peer->outbuf->packet.header.len) /* ditto */
615 {
616 if (!bgp_send_update(peer))
617 continue;
618 }
619
620 /* process timers */
621 if (peer->state == Established)
622 {
623 if (time_now > peer->expire_time)
624 {
625 LOG(1, 0, 0, "No message from BGP peer %s in %ds\n",
626 peer->name, peer->hold);
627
628 bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0);
629 continue;
630 }
631
632 if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len)
633 bgp_send_keepalive(peer);
634 }
635 else if (peer->state == Idle)
636 {
637 if (time_now > peer->retry_time)
638 bgp_connect(peer);
639 }
640 else if (time_now > peer->state_time + BGP_STATE_TIME)
641 {
642 LOG(1, 0, 0, "%s timer expired for BGP peer %s\n",
643 bgp_state_str(peer->state), peer->name);
644
645 bgp_restart(peer);
646 }
647 }
648
649 return 1;
650 }
651
652 static void bgp_free_routes(struct bgp_route_list *routes)
653 {
654 struct bgp_route_list *tmp;
655
656 while ((tmp = routes))
657 {
658 routes = tmp->next;
659 free(tmp);
660 }
661 }
662
663 char const *bgp_state_str(enum bgp_state state)
664 {
665 switch (state)
666 {
667 case Disabled: return "Disabled";
668 case Idle: return "Idle";
669 case Connect: return "Connect";
670 case Active: return "Active";
671 case OpenSent: return "OpenSent";
672 case OpenConfirm: return "OpenConfirm";
673 case Established: return "Established";
674 }
675
676 return "?";
677 }
678
679 static char const *bgp_msg_type_str(uint8_t type)
680 {
681 switch (type)
682 {
683 case BGP_MSG_OPEN: return "OPEN";
684 case BGP_MSG_UPDATE: return "UPDATE";
685 case BGP_MSG_NOTIFICATION: return "NOTIFICATION";
686 case BGP_MSG_KEEPALIVE: return "KEEPALIVE";
687 }
688
689 return "?";
690 }
691
692 /* attempt to connect to peer */
693 static int bgp_connect(struct bgp_peer *peer)
694 {
695 static int bgp_port = 0;
696 struct sockaddr_in addr;
697 struct epoll_event ev;
698
699 if (!bgp_port)
700 {
701 struct servent *serv;
702 if (!(serv = getservbyname("bgp", "tcp")))
703 {
704 LOG(0, 0, 0, "Can't get bgp service (%s)\n", strerror(errno));
705 return 0;
706 }
707
708 bgp_port = serv->s_port;
709 }
710
711 if ((peer->sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
712 {
713 LOG(0, 0, 0, "Can't create a socket for BGP peer %s (%s)\n",
714 peer->name, strerror(errno));
715
716 peer->state = peer->next_state = Disabled;
717 return 0;
718 }
719
720 /* add to poll set */
721 ev.events = peer->events = EPOLLOUT;
722 ev.data.ptr = &peer->edata;
723 epoll_ctl(epollfd, EPOLL_CTL_ADD, peer->sock, &ev);
724
725 /* set to non-blocking */
726 fcntl(peer->sock, F_SETFL, fcntl(peer->sock, F_GETFL, 0) | O_NONBLOCK);
727
728 /* try connect */
729 memset(&addr, 0, sizeof(addr));
730 addr.sin_family = AF_INET;
731 addr.sin_port = bgp_port;
732 addr.sin_addr.s_addr = peer->addr;
733
734 while (connect(peer->sock, (struct sockaddr *) &addr, sizeof(addr)) == -1)
735 {
736 if (errno == EINTR) /* SIGALARM handler */
737 continue;
738
739 if (errno != EINPROGRESS)
740 {
741 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n",
742 inet_ntoa(addr.sin_addr), strerror(errno));
743
744 bgp_set_retry(peer);
745 return 0;
746 }
747
748 peer->state = Connect;
749 peer->state_time = time_now;
750
751 LOG(4, 0, 0, "BGP peer %s: state Connect\n", peer->name);
752 return 1;
753 }
754
755 peer->state = Active;
756 peer->state_time = time_now;
757 peer->retry_time = peer->retry_count = 0;
758
759 LOG(4, 0, 0, "BGP peer %s: state Active\n", inet_ntoa(addr.sin_addr));
760
761 return bgp_send_open(peer);
762 }
763
764 /* complete partial connection (state = Connect) */
765 static int bgp_handle_connect(struct bgp_peer *peer)
766 {
767 int err = 0;
768 socklen_t len = sizeof(int);
769 getsockopt(peer->sock, SOL_SOCKET, SO_ERROR, &err, &len);
770 if (err)
771 {
772 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n", peer->name,
773 strerror(err));
774
775 bgp_set_retry(peer);
776 return 0;
777 }
778
779 peer->state = Active;
780 peer->state_time = time_now;
781
782 LOG(4, 0, 0, "BGP peer %s: state Active\n", peer->name);
783
784 return bgp_send_open(peer);
785 }
786
787 /* initiate a write */
788 static int bgp_write(struct bgp_peer *peer)
789 {
790 int len = htons(peer->outbuf->packet.header.len);
791 int r;
792
793 while ((r = write(peer->sock, &peer->outbuf->packet + peer->outbuf->done,
794 len - peer->outbuf->done)) == -1)
795 {
796 if (errno == EINTR)
797 continue;
798
799 if (errno == EAGAIN)
800 return 1;
801
802 if (errno == EPIPE)
803 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
804 else
805 LOG(1, 0, 0, "Can't write to BGP peer %s (%s)\n", peer->name,
806 strerror(errno));
807
808 bgp_set_retry(peer);
809 return 0;
810 }
811
812 if (r < len)
813 {
814 peer->outbuf->done += r;
815 return 1;
816 }
817
818 LOG(4, 0, 0, "Sent %s to BGP peer %s\n",
819 bgp_msg_type_str(peer->outbuf->packet.header.type), peer->name);
820
821 peer->outbuf->packet.header.len = 0;
822 peer->outbuf->done = 0;
823
824 if (peer->state == Established)
825 peer->keepalive_time = time_now + peer->keepalive;
826
827 if (peer->state != peer->next_state)
828 {
829 if (peer->next_state == Disabled || peer->next_state == Idle)
830 {
831 bgp_clear(peer);
832 return 0;
833 }
834
835 peer->state = peer->next_state;
836 peer->state_time = time_now;
837
838 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
839 bgp_state_str(peer->state));
840 }
841
842 return 1;
843 }
844
845 /* initiate a read */
846 static int bgp_read(struct bgp_peer *peer)
847 {
848 int r;
849
850 while ((r = read(peer->sock, &peer->inbuf->packet + peer->inbuf->done,
851 BGP_MAX_PACKET_SIZE - peer->inbuf->done)) < 1)
852 {
853 if (!r)
854 {
855 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
856 }
857 else
858 {
859 if (errno == EINTR)
860 continue;
861
862 if (errno == EAGAIN)
863 return 1;
864
865 LOG(1, 0, 0, "Can't read from BGP peer %s (%s)\n", peer->name,
866 strerror(errno));
867 }
868
869 bgp_set_retry(peer);
870 return 0;
871 }
872
873 peer->inbuf->done += r;
874 return 1;
875 }
876
877 /* process buffered packets */
878 static int bgp_handle_input(struct bgp_peer *peer)
879 {
880 struct bgp_packet *p = &peer->inbuf->packet;
881 int len = ntohs(p->header.len);
882
883 if (len > BGP_MAX_PACKET_SIZE)
884 {
885 LOG(1, 0, 0, "Bad header length from BGP %s\n", peer->name);
886 bgp_send_notification(peer, BGP_ERR_HEADER, BGP_ERR_HDR_BAD_LEN);
887 return 0;
888 }
889
890 if (peer->inbuf->done < len)
891 return 0;
892
893 LOG(4, 0, 0, "Received %s from BGP peer %s\n",
894 bgp_msg_type_str(p->header.type), peer->name);
895
896 switch (p->header.type)
897 {
898 case BGP_MSG_OPEN:
899 {
900 struct bgp_data_open data;
901 int hold;
902 int i;
903
904 for (i = 0; i < sizeof(p->header.marker); i++)
905 {
906 if ((unsigned char) p->header.marker[i] != 0xff)
907 {
908 LOG(1, 0, 0, "Invalid marker from BGP peer %s\n",
909 peer->name);
910
911 bgp_send_notification(peer, BGP_ERR_HEADER,
912 BGP_ERR_HDR_NOT_SYNC);
913
914 return 0;
915 }
916 }
917
918 if (peer->state != OpenSent)
919 {
920 LOG(1, 0, 0, "OPEN from BGP peer %s in %s state\n",
921 peer->name, bgp_state_str(peer->state));
922
923 bgp_send_notification(peer, BGP_ERR_FSM, 0);
924 return 0;
925 }
926
927 memcpy(&data, p->data, len - sizeof(p->header));
928
929 if (data.version != BGP_VERSION)
930 {
931 LOG(1, 0, 0, "Bad version (%d) sent by BGP peer %s\n",
932 (int) data.version, peer->name);
933
934 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_VERSION);
935 return 0;
936 }
937
938 if (ntohs(data.as) != peer->as)
939 {
940 LOG(1, 0, 0, "Bad AS sent by BGP peer %s (got %d, "
941 "expected %d)\n", peer->name, (int) htons(data.as),
942 (int) peer->as);
943
944 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_BAD_AS);
945 return 0;
946 }
947
948 if ((hold = ntohs(data.hold_time)) < 3)
949 {
950 LOG(1, 0, 0, "Bad hold time (%d) from BGP peer %s\n",
951 hold, peer->name);
952
953 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_HOLD_TIME);
954 return 0;
955 }
956
957 /* pick lowest hold time */
958 if (hold < peer->hold)
959 peer->hold = hold;
960
961 /* adjust our keepalive based on negotiated hold value */
962 if (peer->keepalive * 3 > peer->hold)
963 peer->keepalive = peer->hold / 3;
964
965 /* next transition requires an exchange of keepalives */
966 bgp_send_keepalive(peer);
967
968 /* FIXME: may need to check for optional params */
969 }
970
971 break;
972
973 case BGP_MSG_KEEPALIVE:
974 if (peer->state == OpenConfirm)
975 {
976 peer->state = peer->next_state = Established;
977 peer->state_time = time_now;
978 peer->keepalive_time = time_now + peer->keepalive;
979 peer->update_routes = 1;
980 peer->retry_count = 0;
981 peer->retry_time = 0;
982
983 LOG(4, 0, 0, "BGP peer %s: state Established\n", peer->name);
984 }
985
986 break;
987
988 case BGP_MSG_NOTIFICATION:
989 if (len > sizeof(p->header))
990 {
991 struct bgp_data_notification *notification =
992 (struct bgp_data_notification *) p->data;
993
994 if (notification->error_code == BGP_ERR_CEASE)
995 {
996 LOG(4, 0, 0, "BGP peer %s sent CEASE\n", peer->name);
997 bgp_restart(peer);
998 return 0;
999 }
1000
1001 /* FIXME: should handle more notifications */
1002 LOG(4, 0, 0, "BGP peer %s sent unhandled NOTIFICATION %d\n",
1003 peer->name, (int) notification->error_code);
1004 }
1005
1006 break;
1007 }
1008
1009 /* reset timer */
1010 peer->expire_time = time_now + peer->hold;
1011
1012 /* see if there's another message in the same packet/buffer */
1013 if (peer->inbuf->done > len)
1014 {
1015 peer->inbuf->done -= len;
1016 memmove(p, (char *) p + len, peer->inbuf->done);
1017 }
1018 else
1019 {
1020 peer->inbuf->packet.header.len = 0;
1021 peer->inbuf->done = 0;
1022 }
1023
1024 return peer->inbuf->done;
1025 }
1026
1027 /* send/buffer OPEN message */
1028 static int bgp_send_open(struct bgp_peer *peer)
1029 {
1030 struct bgp_data_open data;
1031 uint16_t len = sizeof(peer->outbuf->packet.header);
1032
1033 memset(peer->outbuf->packet.header.marker, 0xff,
1034 sizeof(peer->outbuf->packet.header.marker));
1035
1036 peer->outbuf->packet.header.type = BGP_MSG_OPEN;
1037
1038 data.version = BGP_VERSION;
1039 data.as = htons(our_as);
1040 data.hold_time = htons(peer->hold);
1041 data.identifier = my_address;
1042 data.opt_len = 0;
1043
1044 memcpy(peer->outbuf->packet.data, &data, BGP_DATA_OPEN_SIZE);
1045 len += BGP_DATA_OPEN_SIZE;
1046
1047 peer->outbuf->packet.header.len = htons(len);
1048 peer->outbuf->done = 0;
1049 peer->next_state = OpenSent;
1050
1051 return bgp_write(peer);
1052 }
1053
1054 /* send/buffer KEEPALIVE message */
1055 static int bgp_send_keepalive(struct bgp_peer *peer)
1056 {
1057 memset(peer->outbuf->packet.header.marker, 0xff,
1058 sizeof(peer->outbuf->packet.header.marker));
1059
1060 peer->outbuf->packet.header.type = BGP_MSG_KEEPALIVE;
1061 peer->outbuf->packet.header.len =
1062 htons(sizeof(peer->outbuf->packet.header));
1063
1064 peer->outbuf->done = 0;
1065 peer->next_state = (peer->state == OpenSent) ? OpenConfirm : peer->state;
1066
1067 return bgp_write(peer);
1068 }
1069
1070 /* send/buffer UPDATE message */
1071 static int bgp_send_update(struct bgp_peer *peer)
1072 {
1073 uint16_t unf_len = 0;
1074 uint16_t attr_len;
1075 uint16_t len = sizeof(peer->outbuf->packet.header);
1076 struct bgp_route_list *have = peer->routes;
1077 struct bgp_route_list *want = peer->routing ? bgp_routes : 0;
1078 struct bgp_route_list *e = 0;
1079 struct bgp_route_list *add = 0;
1080 int s;
1081
1082 char *data = (char *) &peer->outbuf->packet.data;
1083
1084 /* need leave room for attr_len, bgp_path_attrs and one prefix */
1085 char *max = (char *) &peer->outbuf->packet.data
1086 + sizeof(peer->outbuf->packet.data)
1087 - sizeof(attr_len) - peer->path_attr_len - sizeof(struct bgp_ip_prefix);
1088
1089 /* skip over unf_len */
1090 data += sizeof(unf_len);
1091 len += sizeof(unf_len);
1092
1093 memset(peer->outbuf->packet.header.marker, 0xff,
1094 sizeof(peer->outbuf->packet.header.marker));
1095
1096 peer->outbuf->packet.header.type = BGP_MSG_UPDATE;
1097
1098 peer->update_routes = 0; /* tentatively clear */
1099
1100 /* find differences */
1101 while ((have || want) && data < (max - sizeof(struct bgp_ip_prefix)))
1102 {
1103 if (have)
1104 s = want
1105 ? memcmp(&have->dest, &want->dest, sizeof(have->dest))
1106 : -1;
1107 else
1108 s = 1;
1109
1110 if (s < 0) /* found one to delete */
1111 {
1112 struct bgp_route_list *tmp = have;
1113 have = have->next;
1114
1115 s = BGP_IP_PREFIX_SIZE(tmp->dest);
1116 memcpy(data, &tmp->dest, s);
1117 data += s;
1118 unf_len += s;
1119 len += s;
1120
1121 LOG(5, 0, 0, "Withdrawing route %s/%d from BGP peer %s\n",
1122 fmtaddr(tmp->dest.prefix, 0), tmp->dest.len, peer->name);
1123
1124 free(tmp);
1125
1126 if (e)
1127 e->next = have;
1128 else
1129 peer->routes = have;
1130 }
1131 else
1132 {
1133 if (!s) /* same */
1134 {
1135 e = have; /* stash the last found to relink above */
1136 have = have->next;
1137 want = want->next;
1138 }
1139 else if (s > 0) /* addition reqd. */
1140 {
1141 if (add)
1142 {
1143 peer->update_routes = 1; /* only one add per packet */
1144 if (!have)
1145 break;
1146 }
1147 else
1148 add = want;
1149
1150 if (want)
1151 want = want->next;
1152 }
1153 }
1154 }
1155
1156 if (have || want)
1157 peer->update_routes = 1; /* more to do */
1158
1159 /* anything changed? */
1160 if (!(unf_len || add))
1161 return 1;
1162
1163 /* go back and insert unf_len */
1164 unf_len = htons(unf_len);
1165 memcpy(&peer->outbuf->packet.data, &unf_len, sizeof(unf_len));
1166
1167 if (add)
1168 {
1169 if (!(e = malloc(sizeof(*e))))
1170 {
1171 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
1172 fmtaddr(add->dest.prefix, 0), add->dest.len, strerror(errno));
1173
1174 return 0;
1175 }
1176
1177 memcpy(e, add, sizeof(*e));
1178 e->next = 0;
1179 peer->routes = bgp_insert_route(peer->routes, e);
1180
1181 attr_len = htons(peer->path_attr_len);
1182 memcpy(data, &attr_len, sizeof(attr_len));
1183 data += sizeof(attr_len);
1184 len += sizeof(attr_len);
1185
1186 memcpy(data, peer->path_attrs, peer->path_attr_len);
1187 data += peer->path_attr_len;
1188 len += peer->path_attr_len;
1189
1190 s = BGP_IP_PREFIX_SIZE(add->dest);
1191 memcpy(data, &add->dest, s);
1192 data += s;
1193 len += s;
1194
1195 LOG(5, 0, 0, "Advertising route %s/%d to BGP peer %s\n",
1196 fmtaddr(add->dest.prefix, 0), add->dest.len, peer->name);
1197 }
1198 else
1199 {
1200 attr_len = 0;
1201 memcpy(data, &attr_len, sizeof(attr_len));
1202 data += sizeof(attr_len);
1203 len += sizeof(attr_len);
1204 }
1205
1206 peer->outbuf->packet.header.len = htons(len);
1207 peer->outbuf->done = 0;
1208
1209 return bgp_write(peer);
1210 }
1211
1212 /* send/buffer NOTIFICATION message */
1213 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
1214 uint8_t subcode)
1215 {
1216 struct bgp_data_notification data;
1217 uint16_t len = 0;
1218
1219 data.error_code = code;
1220 len += sizeof(data.error_code);
1221
1222 data.error_subcode = subcode;
1223 len += sizeof(data.error_code);
1224
1225 memset(peer->outbuf->packet.header.marker, 0xff,
1226 sizeof(peer->outbuf->packet.header.marker));
1227
1228 peer->outbuf->packet.header.type = BGP_MSG_NOTIFICATION;
1229 peer->outbuf->packet.header.len =
1230 htons(sizeof(peer->outbuf->packet.header) + len);
1231
1232 memcpy(peer->outbuf->packet.data, &data, len);
1233
1234 peer->outbuf->done = 0;
1235 peer->next_state = code == BGP_ERR_CEASE ? Disabled : Idle;
1236
1237 /* we're dying; ignore any pending input */
1238 peer->inbuf->packet.header.len = 0;
1239 peer->inbuf->done = 0;
1240
1241 return bgp_write(peer);
1242 }