improved load balancing algorithm.
[l2tpns.git] / bgp.c
1 /*
2 * BGPv4
3 * Used to advertise routes for upstream (l2tp port, rather than gratiutious
4 * arp) and downstream--allowing routers to load-balance both.
5 *
6 * Implementation limitations:
7 * - We never listen for incoming connections (session always initiated by us).
8 * - Any routes advertised by the peer are accepted, but ignored.
9 * - No password support; neither RFC1771 (which no-one seems to do anyway)
10 * nor RFC2385 (which requires a kernel patch on 2.4 kernels).
11 */
12
13 #include <stdlib.h>
14 #include <unistd.h>
15 #include <string.h>
16 #include <time.h>
17 #include <errno.h>
18 #include <sys/socket.h>
19 #include <netinet/in.h>
20 #include <arpa/inet.h>
21 #include <netdb.h>
22 #include <fcntl.h>
23 #include <linux/rtnetlink.h>
24
25 #include "l2tpns.h"
26 #include "bgp.h"
27 #include "util.h"
28
29 static void bgp_clear(struct bgp_peer *peer);
30 static void bgp_set_retry(struct bgp_peer *peer);
31 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
32 struct bgp_route_list *new);
33 static struct bgp_route6_list *bgp_insert_route6(struct bgp_route6_list *head,
34 struct bgp_route6_list *new);
35
36 static void bgp_process_timers(struct bgp_peer *peer);
37 static void bgp_free_routes(struct bgp_route_list *routes);
38 static void bgp_free_routes6(struct bgp_route6_list *routes);
39 static char const *bgp_msg_type_str(uint8_t type);
40 static int bgp_connect(struct bgp_peer *peer);
41 static int bgp_handle_connect(struct bgp_peer *peer);
42 static int bgp_write(struct bgp_peer *peer);
43 static int bgp_read(struct bgp_peer *peer);
44 static int bgp_handle_input(struct bgp_peer *peer);
45 static int bgp_send_open(struct bgp_peer *peer);
46 static int bgp_send_keepalive(struct bgp_peer *peer);
47 static int bgp_send_update(struct bgp_peer *peer);
48 static int bgp_send_update6(struct bgp_peer *peer);
49 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
50 uint8_t subcode);
51
52 static uint16_t our_as;
53 static struct bgp_route_list *bgp_routes = 0;
54 static struct bgp_route6_list *bgp_routes6 = 0;
55
56 int bgp_configured = 0;
57 struct bgp_peer *bgp_peers = 0;
58
59 /* prepare peer structure, globals */
60 int bgp_setup(int as)
61 {
62 int i;
63 struct bgp_peer *peer;
64
65 for (i = 0; i < BGP_NUM_PEERS; i++)
66 {
67 peer = &bgp_peers[i];
68 memset(peer, 0, sizeof(*peer));
69
70 peer->addr = INADDR_NONE;
71 peer->sock = -1;
72 peer->state = peer->next_state = Disabled;
73
74 if (!((peer->outbuf = malloc(sizeof(*peer->outbuf)))
75 && (peer->inbuf = malloc(sizeof(*peer->inbuf)))))
76 {
77 LOG(0, 0, 0, "Can't allocate buffers for bgp peer (%s)\n",
78 strerror(errno));
79
80 return 0;
81 }
82
83 peer->edata.type = FD_TYPE_BGP;
84 peer->edata.index = i;
85 peer->events = 0;
86 }
87
88 if (as < 1)
89 as = 0;
90
91 if ((our_as = as))
92 return 0;
93
94 bgp_routes = 0;
95 bgp_routes6 = 0;
96 bgp_configured = 0; /* set by bgp_start */
97
98 return 1;
99 }
100
101 /* start connection with a peer */
102 int bgp_start(struct bgp_peer *peer, char *name, int as, int keepalive,
103 int hold, struct in_addr update_source, int enable)
104 {
105 struct hostent *h;
106 int ibgp;
107 int i;
108 struct bgp_path_attr a;
109 char path_attrs[64];
110 char *p = path_attrs;
111 in_addr_t ip;
112 uint32_t metric = htonl(BGP_METRIC);
113 uint32_t no_export = htonl(BGP_COMMUNITY_NO_EXPORT);
114
115 if (!our_as)
116 return 0;
117
118 if (peer->state != Disabled)
119 bgp_halt(peer);
120
121 snprintf(peer->name, sizeof(peer->name), "%s", name);
122
123 if (!(h = gethostbyname(name)) || h->h_addrtype != AF_INET)
124 {
125 LOG(0, 0, 0, "Can't get address for BGP peer %s (%s)\n",
126 name, h ? "no address" : hstrerror(h_errno));
127
128 return 0;
129 }
130
131 memcpy(&peer->addr, h->h_addr, sizeof(peer->addr));
132 peer->source_addr = update_source.s_addr;
133 peer->as = as > 0 ? as : our_as;
134 ibgp = peer->as == our_as;
135
136 /* set initial timer values */
137 peer->init_keepalive = keepalive == -1 ? BGP_KEEPALIVE_TIME : keepalive;
138 peer->init_hold = hold == -1 ? BGP_HOLD_TIME : hold;
139
140 if (peer->init_hold < 3)
141 peer->init_hold = 3;
142
143 if (peer->init_keepalive * 3 > peer->init_hold)
144 peer->init_keepalive = peer->init_hold / 3;
145
146 /* clear buffers, go to Idle state */
147 peer->next_state = Idle;
148 bgp_clear(peer);
149
150 /* set initial routing state */
151 peer->routing = enable;
152
153 /* all our routes use the same attributes, so prepare it in advance */
154 if (peer->path_attrs)
155 free(peer->path_attrs);
156
157 peer->path_attr_len = 0;
158
159 /* ORIGIN */
160 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
161 a.code = BGP_PATH_ATTR_CODE_ORIGIN;
162 a.data.s.len = 1;
163 a.data.s.value[0] = BGP_PATH_ATTR_CODE_ORIGIN_IGP;
164
165 #define ADD_ATTRIBUTE() do { \
166 i = BGP_PATH_ATTR_SIZE(a); \
167 memcpy(p, &a, i); \
168 p += i; \
169 peer->path_attr_len += i; } while (0)
170
171 ADD_ATTRIBUTE();
172
173 /* AS_PATH */
174 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
175 a.code = BGP_PATH_ATTR_CODE_AS_PATH;
176 if (ibgp)
177 {
178 /* empty path */
179 a.data.s.len = 0;
180 }
181 else
182 {
183 /* just our AS */
184 struct {
185 uint8_t type;
186 uint8_t len;
187 uint16_t value;
188 } as_path = {
189 BGP_PATH_ATTR_CODE_AS_PATH_AS_SEQUENCE,
190 1,
191 htons(our_as),
192 };
193
194 a.data.s.len = sizeof(as_path);
195 memcpy(&a.data.s.value, &as_path, sizeof(as_path));
196 }
197
198 ADD_ATTRIBUTE();
199
200 /* MULTI_EXIT_DISC */
201 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL;
202 a.code = BGP_PATH_ATTR_CODE_MULTI_EXIT_DISC;
203 a.data.s.len = sizeof(metric);
204 memcpy(a.data.s.value, &metric, sizeof(metric));
205
206 ADD_ATTRIBUTE();
207
208 if (ibgp)
209 {
210 uint32_t local_pref = htonl(BGP_LOCAL_PREF);
211
212 /* LOCAL_PREF */
213 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
214 a.code = BGP_PATH_ATTR_CODE_LOCAL_PREF;
215 a.data.s.len = sizeof(local_pref);
216 memcpy(a.data.s.value, &local_pref, sizeof(local_pref));
217
218 ADD_ATTRIBUTE();
219 }
220
221 /* COMMUNITIES */
222 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL | BGP_PATH_ATTR_FLAG_TRANS;
223 a.code = BGP_PATH_ATTR_CODE_COMMUNITIES;
224 a.data.s.len = sizeof(no_export);
225 memcpy(a.data.s.value, &no_export, sizeof(no_export));
226
227 ADD_ATTRIBUTE();
228
229 /* remember the len before adding NEXT_HOP */
230 peer->path_attr_len_without_nexthop = peer->path_attr_len;
231
232 /* NEXT_HOP */
233 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
234 a.code = BGP_PATH_ATTR_CODE_NEXT_HOP;
235 if (config->nexthop_address)
236 {
237 ip = config->nexthop_address;
238 }
239 else
240 {
241 ip = my_address; /* we're it */
242 }
243 a.data.s.len = sizeof(ip);
244 memcpy(a.data.s.value, &ip, sizeof(ip));
245
246 ADD_ATTRIBUTE();
247
248 if (!(peer->path_attrs = malloc(peer->path_attr_len)))
249 {
250 LOG(0, 0, 0, "Can't allocate path_attrs for %s (%s)\n",
251 name, strerror(errno));
252
253 return 0;
254 }
255
256 memcpy(peer->path_attrs, path_attrs, peer->path_attr_len);
257
258 /* multiprotocol attributes initialization */
259 if (config->ipv6_prefix.s6_addr[0])
260 {
261 struct bgp_attr_mp_reach_nlri_partial mp_reach_nlri_partial;
262 struct bgp_attr_mp_unreach_nlri_partial mp_unreach_nlri_partial;
263
264 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL;
265 a.code = BGP_PATH_ATTR_CODE_MP_REACH_NLRI;
266 a.data.s.len = 0; /* will be set on UPDATE */
267
268 mp_reach_nlri_partial.afi = htons(BGP_MP_AFI_IPv6);
269 mp_reach_nlri_partial.safi = BGP_MP_SAFI_UNICAST;
270 mp_reach_nlri_partial.reserved = 0;
271 mp_reach_nlri_partial.next_hop_len = 16;
272
273 /* use the defined nexthop6, or our address in ipv6_prefix */
274 if (config->nexthop6_address.s6_addr[0])
275 memcpy(&mp_reach_nlri_partial.next_hop,
276 &config->nexthop6_address.s6_addr, 16);
277 else
278 {
279 /* our address is ipv6prefix::1 */
280 memcpy(&mp_reach_nlri_partial.next_hop,
281 &config->ipv6_prefix.s6_addr, 16);
282 mp_reach_nlri_partial.next_hop[15] = 1;
283 }
284
285 memcpy(&a.data.s.value, &mp_reach_nlri_partial,
286 sizeof(struct bgp_attr_mp_reach_nlri_partial));
287 memcpy(&peer->mp_reach_nlri_partial, &a,
288 BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE);
289
290 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL | BGP_PATH_ATTR_FLAG_EXTLEN;
291 a.code = BGP_PATH_ATTR_CODE_MP_UNREACH_NLRI;
292 a.data.e.len = 0; /* will be set on UPDATE */
293
294 mp_unreach_nlri_partial.afi = htons(BGP_MP_AFI_IPv6);
295 mp_unreach_nlri_partial.safi = BGP_MP_SAFI_UNICAST;
296
297 memcpy(&a.data.e.value, &mp_unreach_nlri_partial,
298 sizeof(struct bgp_attr_mp_unreach_nlri_partial));
299 memcpy(&peer->mp_unreach_nlri_partial, &a,
300 BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE);
301 }
302
303 peer->mp_handling = HandlingUnknown;
304
305 LOG(4, 0, 0, "Initiating BGP connection to %s (routing %s)\n",
306 name, enable ? "enabled" : "suspended");
307
308 /* we have at least one peer configured */
309 bgp_configured = 1;
310
311 /* connect */
312 return bgp_connect(peer);
313 }
314
315 /* clear counters, timers, routes and buffers; close socket; move to
316 next_state, which may be Disabled or Idle */
317 static void bgp_clear(struct bgp_peer *peer)
318 {
319 if (peer->sock != -1)
320 {
321 close(peer->sock);
322 peer->sock = -1;
323 }
324
325 peer->keepalive_time = 0;
326 peer->expire_time = 0;
327
328 peer->keepalive = peer->init_keepalive;
329 peer->hold = peer->init_hold;
330
331 bgp_free_routes(peer->routes);
332 peer->routes = 0;
333 bgp_free_routes6(peer->routes6);
334 peer->routes6 = 0;
335
336 peer->outbuf->packet.header.len = 0;
337 peer->outbuf->done = 0;
338 peer->inbuf->packet.header.len = 0;
339 peer->inbuf->done = 0;
340
341 peer->cli_flag = 0;
342 peer->events = 0;
343
344 if (peer->state != peer->next_state)
345 {
346 peer->state = peer->next_state;
347 peer->state_time = time_now;
348
349 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
350 bgp_state_str(peer->next_state));
351 }
352 }
353
354 /* initiate a clean shutdown */
355 void bgp_stop(struct bgp_peer *peer)
356 {
357 LOG(4, 0, 0, "Terminating BGP connection to %s\n", peer->name);
358 bgp_send_notification(peer, BGP_ERR_CEASE, 0);
359 }
360
361 /* drop connection (if any) and set state to Disabled */
362 void bgp_halt(struct bgp_peer *peer)
363 {
364 LOG(4, 0, 0, "Aborting BGP connection to %s\n", peer->name);
365 peer->next_state = Disabled;
366 bgp_clear(peer);
367 }
368
369 /* drop connection (if any) and set to Idle for connection retry */
370 int bgp_restart(struct bgp_peer *peer)
371 {
372 peer->next_state = Idle;
373 bgp_clear(peer);
374
375 /* restart now */
376 peer->retry_time = time_now;
377 peer->retry_count = 0;
378
379 /* connect */
380 return bgp_connect(peer);
381 }
382
383 static void bgp_set_retry(struct bgp_peer *peer)
384 {
385 if (peer->retry_count++ < BGP_MAX_RETRY)
386 {
387 peer->retry_time = time_now + (BGP_RETRY_BACKOFF * peer->retry_count);
388 peer->next_state = Idle;
389 bgp_clear(peer);
390 }
391 else
392 bgp_halt(peer); /* give up */
393 }
394
395 /* insert route into list; sorted */
396 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
397 struct bgp_route_list *new)
398 {
399 struct bgp_route_list *p = head;
400 struct bgp_route_list *e = 0;
401
402 while (p && memcmp(&p->dest, &new->dest, sizeof(p->dest)) < 0)
403 {
404 e = p;
405 p = p->next;
406 }
407
408 if (e)
409 {
410 new->next = e->next;
411 e->next = new;
412 }
413 else
414 {
415 new->next = head;
416 head = new;
417 }
418
419 return head;
420 }
421
422 /* insert route6 into list; sorted */
423 static struct bgp_route6_list *bgp_insert_route6(struct bgp_route6_list *head,
424 struct bgp_route6_list *new)
425 {
426 struct bgp_route6_list *p = head;
427 struct bgp_route6_list *e = 0;
428
429 while (p && memcmp(&p->dest, &new->dest, sizeof(p->dest)) < 0)
430 {
431 e = p;
432 p = p->next;
433 }
434
435 if (e)
436 {
437 new->next = e->next;
438 e->next = new;
439 }
440 else
441 {
442 new->next = head;
443 head = new;
444 }
445
446 return head;
447 }
448
449 /* add route to list for peers */
450 /*
451 * Note: this doesn't do route aggregation, nor drop routes if a less
452 * specific match already exists (partly because I'm lazy, but also so
453 * that if that route is later deleted we don't have to be concerned
454 * about adding back the more specific one).
455 */
456 int bgp_add_route(in_addr_t ip, int prefixlen)
457 {
458 struct bgp_route_list *r = bgp_routes;
459 struct bgp_route_list add;
460 int i;
461
462 add.dest.prefix = ip;
463 add.dest.len = prefixlen;
464 add.next = 0;
465
466 /* check for duplicate */
467 while (r)
468 {
469 i = memcmp(&r->dest, &add.dest, sizeof(r->dest));
470 if (!i)
471 return 1; /* already covered */
472
473 if (i > 0)
474 break;
475
476 r = r->next;
477 }
478
479 /* insert into route list; sorted */
480 if (!(r = malloc(sizeof(*r))))
481 {
482 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
483 fmtaddr(add.dest.prefix, 0), add.dest.len, strerror(errno));
484
485 return 0;
486 }
487
488 memcpy(r, &add, sizeof(*r));
489 bgp_routes = bgp_insert_route(bgp_routes, r);
490
491 /* flag established peers for update */
492 for (i = 0; i < BGP_NUM_PEERS; i++)
493 if (bgp_peers[i].state == Established)
494 bgp_peers[i].update_routes = 1;
495
496 LOG(4, 0, 0, "Registered BGP route %s/%d\n",
497 fmtaddr(add.dest.prefix, 0), add.dest.len);
498
499 return 1;
500 }
501
502 /* add route to list for peers */
503 /*
504 * Note: same provisions as above
505 */
506 int bgp_add_route6(struct in6_addr ip, int prefixlen)
507 {
508 struct bgp_route6_list *r = bgp_routes6;
509 struct bgp_route6_list add;
510 int i;
511 char ipv6addr[INET6_ADDRSTRLEN];
512
513 memcpy(&add.dest.prefix, &ip.s6_addr, 16);
514 add.dest.len = prefixlen;
515 add.next = 0;
516
517 /* check for duplicate */
518 while (r)
519 {
520 i = memcmp(&r->dest, &add.dest, sizeof(r->dest));
521 if (!i)
522 return 1; /* already covered */
523
524 if (i > 0)
525 break;
526
527 r = r->next;
528 }
529
530 /* insert into route list; sorted */
531 if (!(r = malloc(sizeof(*r))))
532 {
533 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
534 inet_ntop(AF_INET6, &ip, ipv6addr, INET6_ADDRSTRLEN), add.dest.len,
535 strerror(errno));
536
537 return 0;
538 }
539
540 memcpy(r, &add, sizeof(*r));
541 bgp_routes6 = bgp_insert_route6(bgp_routes6, r);
542
543 /* flag established peers for update */
544 for (i = 0; i < BGP_NUM_PEERS; i++)
545 if (bgp_peers[i].state == Established
546 && bgp_peers[i].mp_handling == HandleIPv6Routes)
547 bgp_peers[i].update_routes6 = 1;
548
549 LOG(4, 0, 0, "Registered BGP route %s/%d\n",
550 inet_ntop(AF_INET6, &ip, ipv6addr, INET6_ADDRSTRLEN), add.dest.len);
551
552 return 1;
553 }
554
555 /* remove route from list for peers */
556 int bgp_del_route(in_addr_t ip, int prefixlen)
557 {
558 struct bgp_route_list *r = bgp_routes;
559 struct bgp_route_list *e = 0;
560 struct bgp_route_list del;
561 int i;
562
563 del.dest.prefix = ip;
564 del.dest.len = prefixlen;
565 del.next = 0;
566
567 /* find entry in routes list and remove */
568 while (r)
569 {
570 i = memcmp(&r->dest, &del.dest, sizeof(r->dest));
571 if (!i)
572 {
573 if (e)
574 e->next = r->next;
575 else
576 bgp_routes = r->next;
577
578 free(r);
579 break;
580 }
581
582 e = r;
583
584 if (i > 0)
585 r = 0; /* stop */
586 else
587 r = r->next;
588 }
589
590 /* not found */
591 if (!r)
592 return 1;
593
594 /* flag established peers for update */
595 for (i = 0; i < BGP_NUM_PEERS; i++)
596 if (bgp_peers[i].state == Established)
597 bgp_peers[i].update_routes = 1;
598
599 LOG(4, 0, 0, "Removed BGP route %s/%d\n",
600 fmtaddr(del.dest.prefix, 0), del.dest.len);
601
602 return 1;
603 }
604
605 /* remove route from list for peers */
606 int bgp_del_route6(struct in6_addr ip, int prefixlen)
607 {
608 struct bgp_route6_list *r = bgp_routes6;
609 struct bgp_route6_list *e = 0;
610 struct bgp_route6_list del;
611 int i;
612 char ipv6addr[INET6_ADDRSTRLEN];
613
614 memcpy(&del.dest.prefix, &ip.s6_addr, 16);
615 del.dest.len = prefixlen;
616 del.next = 0;
617
618 /* find entry in routes list and remove */
619 while (r)
620 {
621 i = memcmp(&r->dest, &del.dest, sizeof(r->dest));
622 if (!i)
623 {
624 if (e)
625 e->next = r->next;
626 else
627 bgp_routes6 = r->next;
628
629 free(r);
630 break;
631 }
632
633 e = r;
634
635 if (i > 0)
636 r = 0; /* stop */
637 else
638 r = r->next;
639 }
640
641 /* not found */
642 if (!r)
643 return 1;
644
645 /* flag established peers for update */
646 for (i = 0; i < BGP_NUM_PEERS; i++)
647 if (bgp_peers[i].state == Established
648 && bgp_peers[i].mp_handling == HandleIPv6Routes)
649 bgp_peers[i].update_routes6 = 1;
650
651 LOG(4, 0, 0, "Removed BGP route %s/%d\n",
652 inet_ntop(AF_INET6, &ip, ipv6addr, INET6_ADDRSTRLEN), del.dest.len);
653
654 return 1;
655 }
656
657 /* enable or disable routing */
658 void bgp_enable_routing(int enable)
659 {
660 int i;
661
662 for (i = 0; i < BGP_NUM_PEERS; i++)
663 {
664 bgp_peers[i].routing = enable;
665
666 /* flag established peers for update */
667 if (bgp_peers[i].state == Established)
668 bgp_peers[i].update_routes = 1;
669 }
670
671 LOG(4, 0, 0, "%s BGP routing\n", enable ? "Enabled" : "Suspended");
672 }
673
674 #ifdef HAVE_EPOLL
675 # include <sys/epoll.h>
676 #else
677 # include "fake_epoll.h"
678 #endif
679
680 /* return a bitmask of the events required to poll this peer's fd */
681 int bgp_set_poll()
682 {
683 int i;
684
685 if (!bgp_configured)
686 return 0;
687
688 for (i = 0; i < BGP_NUM_PEERS; i++)
689 {
690 struct bgp_peer *peer = &bgp_peers[i];
691 int events = 0;
692
693 if (peer->state == Disabled || peer->state == Idle)
694 continue;
695
696 if (peer->inbuf->done < BGP_MAX_PACKET_SIZE)
697 events |= EPOLLIN;
698
699 if (peer->state == Connect || /* connection in progress */
700 peer->update_routes || /* routing updates */
701 peer->outbuf->packet.header.len) /* pending output */
702 events |= EPOLLOUT;
703
704 if (peer->events != events)
705 {
706 struct epoll_event ev;
707
708 ev.events = peer->events = events;
709 ev.data.ptr = &peer->edata;
710 epoll_ctl(epollfd, EPOLL_CTL_MOD, peer->sock, &ev);
711 }
712 }
713
714 return 1;
715 }
716
717 /* process bgp events/timers */
718 int bgp_process(uint32_t events[])
719 {
720 int i;
721
722 if (!bgp_configured)
723 return 0;
724
725 for (i = 0; i < BGP_NUM_PEERS; i++)
726 {
727 struct bgp_peer *peer = &bgp_peers[i];
728
729 if (*peer->name && peer->cli_flag == BGP_CLI_RESTART)
730 {
731 bgp_restart(peer);
732 continue;
733 }
734
735 if (peer->state == Disabled)
736 continue;
737
738 if (peer->cli_flag)
739 {
740 switch (peer->cli_flag)
741 {
742 case BGP_CLI_SUSPEND:
743 if (peer->routing)
744 {
745 peer->routing = 0;
746 if (peer->state == Established)
747 peer->update_routes = 1;
748 }
749
750 break;
751
752 case BGP_CLI_ENABLE:
753 if (!peer->routing)
754 {
755 peer->routing = 1;
756 if (peer->state == Established)
757 peer->update_routes = 1;
758 }
759
760 break;
761 }
762
763 peer->cli_flag = 0;
764 }
765
766 /* handle empty/fill of buffers */
767 if (events[i] & EPOLLOUT)
768 {
769 int r = 1;
770 if (peer->state == Connect)
771 r = bgp_handle_connect(peer);
772 else if (peer->outbuf->packet.header.len)
773 r = bgp_write(peer);
774
775 if (!r)
776 continue;
777 }
778
779 if (events[i] & (EPOLLIN|EPOLLHUP))
780 {
781 if (!bgp_read(peer))
782 continue;
783 }
784
785 /* process input buffer contents */
786 while (peer->inbuf->done >= sizeof(peer->inbuf->packet.header)
787 && !peer->outbuf->packet.header.len) /* may need to queue a response */
788 {
789 if (bgp_handle_input(peer) < 0)
790 continue;
791 }
792
793 /* process pending updates */
794 if (peer->update_routes
795 && !peer->outbuf->packet.header.len) /* ditto */
796 {
797 if (!bgp_send_update(peer))
798 continue;
799 }
800
801 /* process pending IPv6 updates */
802 if (peer->update_routes6
803 && !peer->outbuf->packet.header.len) /* ditto */
804 {
805 if (!bgp_send_update6(peer))
806 continue;
807 }
808
809 /* process timers */
810 bgp_process_timers(peer);
811 }
812
813 return 1;
814 }
815
816 /* process bgp timers only */
817 void bgp_process_peers_timers()
818 {
819 int i;
820
821 if (!bgp_configured)
822 return;
823
824 for (i = 0; i < BGP_NUM_PEERS; i++)
825 {
826 struct bgp_peer *peer = &bgp_peers[i];
827
828 if (peer->state == Disabled)
829 continue;
830
831 bgp_process_timers(peer);
832 }
833 }
834
835 static void bgp_process_timers(struct bgp_peer *peer)
836 {
837 if (peer->state == Established)
838 {
839 if (time_now > peer->expire_time)
840 {
841 LOG(1, 0, 0, "No message from BGP peer %s in %ds\n",
842 peer->name, peer->hold);
843
844 bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0);
845 return;
846 }
847
848 if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len)
849 bgp_send_keepalive(peer);
850 }
851 else if (peer->state == Idle)
852 {
853 if (time_now > peer->retry_time)
854 bgp_connect(peer);
855 }
856 else if (time_now > peer->state_time + BGP_STATE_TIME)
857 {
858 LOG(1, 0, 0, "%s timer expired for BGP peer %s\n",
859 bgp_state_str(peer->state), peer->name);
860
861 bgp_restart(peer);
862 }
863 }
864
865 static void bgp_free_routes(struct bgp_route_list *routes)
866 {
867 struct bgp_route_list *tmp;
868
869 while ((tmp = routes))
870 {
871 routes = tmp->next;
872 free(tmp);
873 }
874 }
875
876 static void bgp_free_routes6(struct bgp_route6_list *routes)
877 {
878 struct bgp_route6_list *tmp;
879
880 while ((tmp = routes))
881 {
882 routes = tmp->next;
883 free(tmp);
884 }
885 }
886
887 char const *bgp_state_str(enum bgp_state state)
888 {
889 switch (state)
890 {
891 case Disabled: return "Disabled";
892 case Idle: return "Idle";
893 case Connect: return "Connect";
894 case Active: return "Active";
895 case OpenSent: return "OpenSent";
896 case OpenConfirm: return "OpenConfirm";
897 case Established: return "Established";
898 }
899
900 return "?";
901 }
902
903 static char const *bgp_msg_type_str(uint8_t type)
904 {
905 switch (type)
906 {
907 case BGP_MSG_OPEN: return "OPEN";
908 case BGP_MSG_UPDATE: return "UPDATE";
909 case BGP_MSG_NOTIFICATION: return "NOTIFICATION";
910 case BGP_MSG_KEEPALIVE: return "KEEPALIVE";
911 }
912
913 return "?";
914 }
915
916 /* attempt to connect to peer */
917 static int bgp_connect(struct bgp_peer *peer)
918 {
919 static int bgp_port = 0;
920 struct sockaddr_in addr;
921 struct sockaddr_in source_addr;
922 struct epoll_event ev;
923
924 if (!bgp_port)
925 {
926 struct servent *serv;
927 if (!(serv = getservbyname("bgp", "tcp")))
928 {
929 LOG(0, 0, 0, "Can't get bgp service (%s)\n", strerror(errno));
930 return 0;
931 }
932
933 bgp_port = serv->s_port;
934 }
935
936 if ((peer->sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
937 {
938 LOG(0, 0, 0, "Can't create a socket for BGP peer %s (%s)\n",
939 peer->name, strerror(errno));
940
941 peer->state = peer->next_state = Disabled;
942 return 0;
943 }
944
945 /* add to poll set */
946 ev.events = peer->events = EPOLLOUT;
947 ev.data.ptr = &peer->edata;
948 epoll_ctl(epollfd, EPOLL_CTL_ADD, peer->sock, &ev);
949
950 /* set to non-blocking */
951 fcntl(peer->sock, F_SETFL, fcntl(peer->sock, F_GETFL, 0) | O_NONBLOCK);
952
953 /* set source address */
954 memset(&source_addr, 0, sizeof(source_addr));
955 source_addr.sin_family = AF_INET;
956 source_addr.sin_addr.s_addr = peer->source_addr; /* defaults to INADDR_ANY */
957 if (bind(peer->sock, (struct sockaddr *) &source_addr, sizeof(source_addr)) < 0)
958 {
959 LOG(1, 0, 0, "Can't set source address to %s: %s\n",
960 inet_ntoa(source_addr.sin_addr), strerror(errno));
961
962 bgp_set_retry(peer);
963 return 0;
964 }
965
966 /* try connect */
967 memset(&addr, 0, sizeof(addr));
968 addr.sin_family = AF_INET;
969 addr.sin_port = bgp_port;
970 addr.sin_addr.s_addr = peer->addr;
971
972 while (connect(peer->sock, (struct sockaddr *) &addr, sizeof(addr)) == -1)
973 {
974 if (errno == EINTR) /* SIGALARM handler */
975 continue;
976
977 if (errno != EINPROGRESS)
978 {
979 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n",
980 inet_ntoa(addr.sin_addr), strerror(errno));
981
982 bgp_set_retry(peer);
983 return 0;
984 }
985
986 peer->state = Connect;
987 peer->state_time = time_now;
988
989 LOG(4, 0, 0, "BGP peer %s: state Connect\n", peer->name);
990 return 1;
991 }
992
993 peer->state = Active;
994 peer->state_time = time_now;
995 peer->retry_time = peer->retry_count = 0;
996
997 LOG(4, 0, 0, "BGP peer %s: state Active\n", inet_ntoa(addr.sin_addr));
998
999 return bgp_send_open(peer);
1000 }
1001
1002 /* complete partial connection (state = Connect) */
1003 static int bgp_handle_connect(struct bgp_peer *peer)
1004 {
1005 int err = 0;
1006 socklen_t len = sizeof(int);
1007 getsockopt(peer->sock, SOL_SOCKET, SO_ERROR, &err, &len);
1008 if (err)
1009 {
1010 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n", peer->name,
1011 strerror(err));
1012
1013 bgp_set_retry(peer);
1014 return 0;
1015 }
1016
1017 peer->state = Active;
1018 peer->state_time = time_now;
1019
1020 LOG(4, 0, 0, "BGP peer %s: state Active\n", peer->name);
1021
1022 return bgp_send_open(peer);
1023 }
1024
1025 /* initiate a write */
1026 static int bgp_write(struct bgp_peer *peer)
1027 {
1028 int len = htons(peer->outbuf->packet.header.len);
1029 int r;
1030
1031 while ((r = write(peer->sock, &peer->outbuf->packet + peer->outbuf->done,
1032 len - peer->outbuf->done)) == -1)
1033 {
1034 if (errno == EINTR)
1035 continue;
1036
1037 if (errno == EAGAIN)
1038 return 1;
1039
1040 if (errno == EPIPE)
1041 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
1042 else
1043 LOG(1, 0, 0, "Can't write to BGP peer %s (%s)\n", peer->name,
1044 strerror(errno));
1045
1046 bgp_set_retry(peer);
1047 return 0;
1048 }
1049
1050 if (r < len)
1051 {
1052 peer->outbuf->done += r;
1053 return 1;
1054 }
1055
1056 LOG(4, 0, 0, "Sent %s to BGP peer %s\n",
1057 bgp_msg_type_str(peer->outbuf->packet.header.type), peer->name);
1058
1059 peer->outbuf->packet.header.len = 0;
1060 peer->outbuf->done = 0;
1061
1062 if (peer->state == Established)
1063 peer->keepalive_time = time_now + peer->keepalive;
1064
1065 if (peer->state != peer->next_state)
1066 {
1067 if (peer->next_state == Disabled || peer->next_state == Idle)
1068 {
1069 bgp_clear(peer);
1070 return 0;
1071 }
1072
1073 peer->state = peer->next_state;
1074 peer->state_time = time_now;
1075
1076 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
1077 bgp_state_str(peer->state));
1078 }
1079
1080 return 1;
1081 }
1082
1083 /* initiate a read */
1084 static int bgp_read(struct bgp_peer *peer)
1085 {
1086 int r;
1087
1088 while ((r = read(peer->sock, &peer->inbuf->packet + peer->inbuf->done,
1089 BGP_MAX_PACKET_SIZE - peer->inbuf->done)) < 1)
1090 {
1091 if (!r)
1092 {
1093 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
1094 }
1095 else
1096 {
1097 if (errno == EINTR)
1098 continue;
1099
1100 if (errno == EAGAIN)
1101 return 1;
1102
1103 LOG(1, 0, 0, "Can't read from BGP peer %s (%s)\n", peer->name,
1104 strerror(errno));
1105 }
1106
1107 bgp_set_retry(peer);
1108 return 0;
1109 }
1110
1111 peer->inbuf->done += r;
1112 return 1;
1113 }
1114
1115 /* process buffered packets */
1116 static int bgp_handle_input(struct bgp_peer *peer)
1117 {
1118 struct bgp_packet *p = &peer->inbuf->packet;
1119 int len = ntohs(p->header.len);
1120
1121 if (len > BGP_MAX_PACKET_SIZE)
1122 {
1123 LOG(1, 0, 0, "Bad header length from BGP %s\n", peer->name);
1124 bgp_send_notification(peer, BGP_ERR_HEADER, BGP_ERR_HDR_BAD_LEN);
1125 return 0;
1126 }
1127
1128 if (peer->inbuf->done < len)
1129 return 0;
1130
1131 LOG(4, 0, 0, "Received %s from BGP peer %s\n",
1132 bgp_msg_type_str(p->header.type), peer->name);
1133
1134 switch (p->header.type)
1135 {
1136 case BGP_MSG_OPEN:
1137 {
1138 struct bgp_data_open data;
1139 int hold;
1140 int i;
1141 off_t param_offset, capability_offset;
1142 struct bgp_opt_param *param;
1143 uint8_t capabilities_len;
1144 char *capabilities = NULL;
1145 struct bgp_capability *capability;
1146 struct bgp_mp_cap_param *mp_cap;
1147
1148 for (i = 0; i < sizeof(p->header.marker); i++)
1149 {
1150 if ((unsigned char) p->header.marker[i] != 0xff)
1151 {
1152 LOG(1, 0, 0, "Invalid marker from BGP peer %s\n",
1153 peer->name);
1154
1155 bgp_send_notification(peer, BGP_ERR_HEADER,
1156 BGP_ERR_HDR_NOT_SYNC);
1157
1158 return 0;
1159 }
1160 }
1161
1162 if (peer->state != OpenSent)
1163 {
1164 LOG(1, 0, 0, "OPEN from BGP peer %s in %s state\n",
1165 peer->name, bgp_state_str(peer->state));
1166
1167 bgp_send_notification(peer, BGP_ERR_FSM, 0);
1168 return 0;
1169 }
1170
1171 memcpy(&data, p->data, len - sizeof(p->header));
1172
1173 if (data.version != BGP_VERSION)
1174 {
1175 LOG(1, 0, 0, "Bad version (%d) sent by BGP peer %s\n",
1176 (int) data.version, peer->name);
1177
1178 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_VERSION);
1179 return 0;
1180 }
1181
1182 if (ntohs(data.as) != peer->as)
1183 {
1184 LOG(1, 0, 0, "Bad AS sent by BGP peer %s (got %d, "
1185 "expected %d)\n", peer->name, (int) htons(data.as),
1186 (int) peer->as);
1187
1188 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_BAD_AS);
1189 return 0;
1190 }
1191
1192 if ((hold = ntohs(data.hold_time)) < 3)
1193 {
1194 LOG(1, 0, 0, "Bad hold time (%d) from BGP peer %s\n",
1195 hold, peer->name);
1196
1197 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_HOLD_TIME);
1198 return 0;
1199 }
1200
1201 /* pick lowest hold time */
1202 if (hold < peer->hold)
1203 peer->hold = hold;
1204
1205 /* adjust our keepalive based on negotiated hold value */
1206 if (peer->keepalive * 3 > peer->hold)
1207 peer->keepalive = peer->hold / 3;
1208
1209 /* check for optional parameters */
1210 /* 2 is for the size of type + len (both uint8_t) */
1211 for (param_offset = 0;
1212 param_offset < data.opt_len;
1213 param_offset += 2 + param->len)
1214 {
1215 param = (struct bgp_opt_param *)((char *)&data.opt_params + param_offset);
1216
1217 /* sensible check */
1218 if (data.opt_len - param_offset < 2
1219 || param->len > data.opt_len - param_offset - 2)
1220 {
1221 LOG(1, 0, 0, "Malformed Optional Parameter list from BGP peer %s\n",
1222 peer->name);
1223
1224 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_UNSPEC);
1225 return 0;
1226 }
1227
1228 /* we know only one parameter type */
1229 if (param->type != BGP_PARAM_TYPE_CAPABILITY)
1230 {
1231 LOG(1, 0, 0, "Unsupported Optional Parameter type %d from BGP peer %s\n",
1232 param->type, peer->name);
1233
1234 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_UNSUP_PARAM);
1235 return 0;
1236 }
1237
1238 capabilities_len = param->len;
1239 capabilities = (char *)&param->value;
1240
1241 /* look for BGP multiprotocol capability */
1242 for (capability_offset = 0;
1243 capability_offset < capabilities_len;
1244 capability_offset += 2 + capability->len)
1245 {
1246 capability = (struct bgp_capability *)(capabilities + capability_offset);
1247
1248 /* sensible check */
1249 if (capabilities_len - capability_offset < 2
1250 || capability->len > capabilities_len - capability_offset - 2)
1251 {
1252 LOG(1, 0, 0, "Malformed Capabilities list from BGP peer %s\n",
1253 peer->name);
1254
1255 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_UNSPEC);
1256 return 0;
1257 }
1258
1259 /* we only know one capability code */
1260 if (capability->code != BGP_CAP_CODE_MP
1261 && capability->len != sizeof(struct bgp_mp_cap_param))
1262 {
1263 LOG(4, 0, 0, "Unsupported Capability code %d from BGP peer %s\n",
1264 capability->code, peer->name);
1265
1266 /* we don't terminate, still; we just jump to the next one */
1267 continue;
1268 }
1269
1270 mp_cap = (struct bgp_mp_cap_param *)&capability->value;
1271 /* the only <AFI, SAFI> tuple we support */
1272 if (ntohs(mp_cap->afi) != BGP_MP_AFI_IPv6 && mp_cap->safi != BGP_MP_SAFI_UNICAST)
1273 {
1274 LOG(4, 0, 0, "Unsupported multiprotocol AFI %d and SAFI %d from BGP peer %s\n",
1275 mp_cap->afi, mp_cap->safi, peer->name);
1276
1277 /* we don't terminate, still; we just jump to the next one */
1278 continue;
1279 }
1280
1281 /* yes it can! */
1282 peer->mp_handling = HandleIPv6Routes;
1283 }
1284 }
1285
1286 if (peer->mp_handling != HandleIPv6Routes)
1287 {
1288 peer->mp_handling = DoesntHandleIPv6Routes;
1289 if (config->ipv6_prefix.s6_addr[0])
1290 LOG(1, 0, 0, "Warning: BGP peer %s doesn't handle IPv6 prefixes updates\n",
1291 peer->name);
1292 }
1293
1294 /* next transition requires an exchange of keepalives */
1295 bgp_send_keepalive(peer);
1296 }
1297
1298 break;
1299
1300 case BGP_MSG_KEEPALIVE:
1301 if (peer->state == OpenConfirm)
1302 {
1303 peer->state = peer->next_state = Established;
1304 peer->state_time = time_now;
1305 peer->keepalive_time = time_now + peer->keepalive;
1306 peer->update_routes = 1;
1307 peer->retry_count = 0;
1308 peer->retry_time = 0;
1309
1310 LOG(4, 0, 0, "BGP peer %s: state Established\n", peer->name);
1311 }
1312
1313 break;
1314
1315 case BGP_MSG_NOTIFICATION:
1316 if (len > sizeof(p->header))
1317 {
1318 struct bgp_data_notification *notification =
1319 (struct bgp_data_notification *) p->data;
1320
1321 if (notification->error_code == BGP_ERR_CEASE)
1322 {
1323 LOG(4, 0, 0, "BGP peer %s sent CEASE\n", peer->name);
1324 bgp_set_retry(peer);
1325 return 0;
1326 }
1327
1328 if (notification->error_code == BGP_ERR_OPEN
1329 && notification->error_subcode == BGP_ERR_OPN_UNSUP_PARAM)
1330 {
1331 LOG(4, 0, 0, "BGP peer %s doesn't support BGP Capabilities\n", peer->name);
1332 peer->mp_handling = DoesntHandleIPv6Routes;
1333 bgp_set_retry(peer);
1334 return 0;
1335 }
1336
1337 if (notification->error_code == BGP_ERR_OPEN
1338 && notification->error_subcode == BGP_ERR_OPN_UNSUP_CAP)
1339 {
1340 /* the only capability we advertise is this one, so upon receiving
1341 an "unsupported capability" message, we disable IPv6 routes for
1342 this peer */
1343 LOG(4, 0, 0, "BGP peer %s doesn't support IPv6 routes advertisement\n", peer->name);
1344 peer->mp_handling = DoesntHandleIPv6Routes;
1345 break;
1346 }
1347
1348 /* FIXME: should handle more notifications */
1349 LOG(4, 0, 0, "BGP peer %s sent unhandled NOTIFICATION %d\n",
1350 peer->name, (int) notification->error_code);
1351 }
1352
1353 break;
1354 }
1355
1356 /* reset timer */
1357 peer->expire_time = time_now + peer->hold;
1358
1359 /* see if there's another message in the same packet/buffer */
1360 if (peer->inbuf->done > len)
1361 {
1362 peer->inbuf->done -= len;
1363 memmove(p, (char *) p + len, peer->inbuf->done);
1364 }
1365 else
1366 {
1367 peer->inbuf->packet.header.len = 0;
1368 peer->inbuf->done = 0;
1369 }
1370
1371 return peer->inbuf->done;
1372 }
1373
1374 /* send/buffer OPEN message */
1375 static int bgp_send_open(struct bgp_peer *peer)
1376 {
1377 struct bgp_data_open data;
1378 struct bgp_mp_cap_param mp_ipv6 = { htons(BGP_MP_AFI_IPv6), 0, BGP_MP_SAFI_UNICAST };
1379 struct bgp_capability cap_mp_ipv6;
1380 struct bgp_opt_param param_cap_mp_ipv6;
1381 uint16_t len = sizeof(peer->outbuf->packet.header);
1382
1383 memset(peer->outbuf->packet.header.marker, 0xff,
1384 sizeof(peer->outbuf->packet.header.marker));
1385
1386 peer->outbuf->packet.header.type = BGP_MSG_OPEN;
1387
1388 data.version = BGP_VERSION;
1389 data.as = htons(our_as);
1390 data.hold_time = htons(peer->hold);
1391 /* use the source IP we use as identifier, if available */
1392 if (peer->source_addr != INADDR_ANY)
1393 data.identifier = peer->source_addr;
1394 else
1395 data.identifier = my_address;
1396
1397 /* if we know peer doesn't support MP (mp_handling == DoesntHandleIPv6Routes)
1398 then don't add this parameter */
1399 if (config->ipv6_prefix.s6_addr[0]
1400 && (peer->mp_handling == HandlingUnknown
1401 || peer->mp_handling == HandleIPv6Routes))
1402 {
1403 /* construct the param and capability */
1404 cap_mp_ipv6.code = BGP_CAP_CODE_MP;
1405 cap_mp_ipv6.len = sizeof(mp_ipv6);
1406 memcpy(&cap_mp_ipv6.value, &mp_ipv6, cap_mp_ipv6.len);
1407
1408 param_cap_mp_ipv6.type = BGP_PARAM_TYPE_CAPABILITY;
1409 param_cap_mp_ipv6.len = 2 + sizeof(mp_ipv6);
1410 memcpy(&param_cap_mp_ipv6.value, &cap_mp_ipv6, param_cap_mp_ipv6.len);
1411
1412 data.opt_len = 2 + param_cap_mp_ipv6.len;
1413 memcpy(&data.opt_params, &param_cap_mp_ipv6, data.opt_len);
1414 }
1415 else
1416 data.opt_len = 0;
1417
1418 memcpy(peer->outbuf->packet.data, &data, BGP_DATA_OPEN_SIZE + data.opt_len);
1419 len += BGP_DATA_OPEN_SIZE + data.opt_len;
1420
1421 peer->outbuf->packet.header.len = htons(len);
1422 peer->outbuf->done = 0;
1423 peer->next_state = OpenSent;
1424
1425 return bgp_write(peer);
1426 }
1427
1428 /* send/buffer KEEPALIVE message */
1429 static int bgp_send_keepalive(struct bgp_peer *peer)
1430 {
1431 memset(peer->outbuf->packet.header.marker, 0xff,
1432 sizeof(peer->outbuf->packet.header.marker));
1433
1434 peer->outbuf->packet.header.type = BGP_MSG_KEEPALIVE;
1435 peer->outbuf->packet.header.len =
1436 htons(sizeof(peer->outbuf->packet.header));
1437
1438 peer->outbuf->done = 0;
1439 peer->next_state = (peer->state == OpenSent) ? OpenConfirm : peer->state;
1440
1441 return bgp_write(peer);
1442 }
1443
1444 /* send/buffer UPDATE message */
1445 static int bgp_send_update(struct bgp_peer *peer)
1446 {
1447 uint16_t unf_len = 0;
1448 uint16_t attr_len;
1449 uint16_t len = sizeof(peer->outbuf->packet.header);
1450 struct bgp_route_list *have = peer->routes;
1451 struct bgp_route_list *want = peer->routing ? bgp_routes : 0;
1452 struct bgp_route_list *e = 0;
1453 struct bgp_route_list *add = 0;
1454 int s;
1455
1456 char *data = (char *) &peer->outbuf->packet.data;
1457
1458 /* need leave room for attr_len, bgp_path_attrs and one prefix */
1459 char *max = (char *) &peer->outbuf->packet.data
1460 + sizeof(peer->outbuf->packet.data)
1461 - sizeof(attr_len) - peer->path_attr_len - sizeof(struct bgp_ip_prefix);
1462
1463 /* skip over unf_len */
1464 data += sizeof(unf_len);
1465 len += sizeof(unf_len);
1466
1467 memset(peer->outbuf->packet.header.marker, 0xff,
1468 sizeof(peer->outbuf->packet.header.marker));
1469
1470 peer->outbuf->packet.header.type = BGP_MSG_UPDATE;
1471
1472 peer->update_routes = 0; /* tentatively clear */
1473
1474 /* find differences */
1475 while ((have || want) && data < (max - sizeof(struct bgp_ip_prefix)))
1476 {
1477 if (have)
1478 s = want
1479 ? memcmp(&have->dest, &want->dest, sizeof(have->dest))
1480 : -1;
1481 else
1482 s = 1;
1483
1484 if (s < 0) /* found one to delete */
1485 {
1486 struct bgp_route_list *tmp = have;
1487 have = have->next;
1488
1489 s = BGP_IP_PREFIX_SIZE(tmp->dest);
1490 memcpy(data, &tmp->dest, s);
1491 data += s;
1492 unf_len += s;
1493 len += s;
1494
1495 LOG(5, 0, 0, "Withdrawing route %s/%d from BGP peer %s\n",
1496 fmtaddr(tmp->dest.prefix, 0), tmp->dest.len, peer->name);
1497
1498 free(tmp);
1499
1500 if (e)
1501 e->next = have;
1502 else
1503 peer->routes = have;
1504 }
1505 else
1506 {
1507 if (!s) /* same */
1508 {
1509 e = have; /* stash the last found to relink above */
1510 have = have->next;
1511 want = want->next;
1512 }
1513 else if (s > 0) /* addition reqd. */
1514 {
1515 if (add)
1516 {
1517 peer->update_routes = 1; /* only one add per packet */
1518 if (!have)
1519 break;
1520 }
1521 else
1522 add = want;
1523
1524 if (want)
1525 want = want->next;
1526 }
1527 }
1528 }
1529
1530 if (have || want)
1531 peer->update_routes = 1; /* more to do */
1532
1533 /* anything changed? */
1534 if (!(unf_len || add))
1535 return 1;
1536
1537 /* go back and insert unf_len */
1538 unf_len = htons(unf_len);
1539 memcpy(&peer->outbuf->packet.data, &unf_len, sizeof(unf_len));
1540
1541 if (add)
1542 {
1543 if (!(e = malloc(sizeof(*e))))
1544 {
1545 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
1546 fmtaddr(add->dest.prefix, 0), add->dest.len, strerror(errno));
1547
1548 return 0;
1549 }
1550
1551 memcpy(e, add, sizeof(*e));
1552 e->next = 0;
1553 peer->routes = bgp_insert_route(peer->routes, e);
1554
1555 attr_len = htons(peer->path_attr_len);
1556 memcpy(data, &attr_len, sizeof(attr_len));
1557 data += sizeof(attr_len);
1558 len += sizeof(attr_len);
1559
1560 memcpy(data, peer->path_attrs, peer->path_attr_len);
1561 data += peer->path_attr_len;
1562 len += peer->path_attr_len;
1563
1564 s = BGP_IP_PREFIX_SIZE(add->dest);
1565 memcpy(data, &add->dest, s);
1566 data += s;
1567 len += s;
1568
1569 LOG(5, 0, 0, "Advertising route %s/%d to BGP peer %s\n",
1570 fmtaddr(add->dest.prefix, 0), add->dest.len, peer->name);
1571 }
1572 else
1573 {
1574 attr_len = 0;
1575 memcpy(data, &attr_len, sizeof(attr_len));
1576 data += sizeof(attr_len);
1577 len += sizeof(attr_len);
1578 }
1579
1580 peer->outbuf->packet.header.len = htons(len);
1581 peer->outbuf->done = 0;
1582
1583 return bgp_write(peer);
1584 }
1585
1586 /* send/buffer UPDATE message for IPv6 routes */
1587 static int bgp_send_update6(struct bgp_peer *peer)
1588 {
1589 uint16_t attr_len;
1590 uint16_t unreach_len = 0;
1591 char *unreach_len_pos;
1592 uint8_t reach_len;
1593 uint16_t len = sizeof(peer->outbuf->packet.header);
1594 struct bgp_route6_list *have = peer->routes6;
1595 struct bgp_route6_list *want = peer->routing ? bgp_routes6 : 0;
1596 struct bgp_route6_list *e = 0;
1597 struct bgp_route6_list *add = 0;
1598 int s;
1599 char ipv6addr[INET6_ADDRSTRLEN];
1600
1601 char *data = (char *) &peer->outbuf->packet.data;
1602
1603 /* need leave room for attr_len, bgp_path_attrs and one prefix */
1604 char *max = (char *) &peer->outbuf->packet.data
1605 + sizeof(peer->outbuf->packet.data)
1606 - sizeof(attr_len) - peer->path_attr_len_without_nexthop
1607 - BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE - sizeof(struct bgp_ip6_prefix);
1608
1609 memset(peer->outbuf->packet.header.marker, 0xff,
1610 sizeof(peer->outbuf->packet.header.marker));
1611
1612 peer->outbuf->packet.header.type = BGP_MSG_UPDATE;
1613
1614 /* insert non-MP unfeasible routes length */
1615 memcpy(data, &unreach_len, sizeof(unreach_len));
1616 /* skip over it and attr_len too; it will be filled when known */
1617 data += sizeof(unreach_len) + sizeof(attr_len);
1618 len += sizeof(unreach_len) + sizeof(attr_len);
1619
1620 /* copy usual attributes */
1621 memcpy(data, peer->path_attrs, peer->path_attr_len_without_nexthop);
1622 data += peer->path_attr_len_without_nexthop;
1623 attr_len = peer->path_attr_len_without_nexthop;
1624
1625 /* copy MP unreachable NLRI heading */
1626 memcpy(data, peer->mp_unreach_nlri_partial,
1627 BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE);
1628 /* remember where to update this attr len */
1629 unreach_len_pos = data + 2;
1630 data += BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE;
1631 attr_len += BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE;
1632
1633 peer->update_routes6 = 0; /* tentatively clear */
1634
1635 /* find differences */
1636 while ((have || want) && data < (max - sizeof(struct bgp_ip6_prefix)))
1637 {
1638 if (have)
1639 s = want
1640 ? memcmp(&have->dest, &want->dest, sizeof(have->dest))
1641 : -1;
1642 else
1643 s = 1;
1644
1645 if (s < 0) /* found one to delete */
1646 {
1647 struct bgp_route6_list *tmp = have;
1648 have = have->next;
1649
1650 s = BGP_IP_PREFIX_SIZE(tmp->dest);
1651 memcpy(data, &tmp->dest, s);
1652 data += s;
1653 unreach_len += s;
1654 attr_len += s;
1655
1656 LOG(5, 0, 0, "Withdrawing route %s/%d from BGP peer %s\n",
1657 inet_ntop(AF_INET6, &tmp->dest.prefix, ipv6addr, INET6_ADDRSTRLEN),
1658 tmp->dest.len, peer->name);
1659
1660 free(tmp);
1661
1662 if (e)
1663 e->next = have;
1664 else
1665 peer->routes6 = have;
1666 }
1667 else
1668 {
1669 if (!s) /* same */
1670 {
1671 e = have; /* stash the last found to relink above */
1672 have = have->next;
1673 want = want->next;
1674 }
1675 else if (s > 0) /* addition reqd. */
1676 {
1677 if (add)
1678 {
1679 peer->update_routes6 = 1; /* only one add per packet */
1680 if (!have)
1681 break;
1682 }
1683 else
1684 add = want;
1685
1686 if (want)
1687 want = want->next;
1688 }
1689 }
1690 }
1691
1692 if (have || want)
1693 peer->update_routes6 = 1; /* more to do */
1694
1695 /* anything changed? */
1696 if (!(unreach_len || add))
1697 return 1;
1698
1699 if (unreach_len)
1700 {
1701 /* go back and insert MP unreach_len */
1702 unreach_len += sizeof(struct bgp_attr_mp_unreach_nlri_partial);
1703 unreach_len = htons(unreach_len);
1704 memcpy(unreach_len_pos, &unreach_len, sizeof(unreach_len));
1705 }
1706 else
1707 {
1708 /* we can remove this attribute, then */
1709 data -= BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE;
1710 attr_len -= BGP_PATH_ATTR_MP_UNREACH_NLRI_PARTIAL_SIZE;
1711 }
1712
1713 if (add)
1714 {
1715 if (!(e = malloc(sizeof(*e))))
1716 {
1717 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
1718 inet_ntop(AF_INET6, &add->dest.prefix, ipv6addr, INET6_ADDRSTRLEN),
1719 add->dest.len, strerror(errno));
1720
1721 return 0;
1722 }
1723
1724 memcpy(e, add, sizeof(*e));
1725 e->next = 0;
1726 peer->routes6 = bgp_insert_route6(peer->routes6, e);
1727
1728 /* copy MP reachable NLRI heading */
1729 memcpy(data, peer->mp_reach_nlri_partial,
1730 BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE);
1731 /* with proper len */
1732 reach_len = BGP_IP_PREFIX_SIZE(add->dest);
1733 data[2] = sizeof(struct bgp_attr_mp_reach_nlri_partial) + reach_len;
1734 data += BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE;
1735 attr_len += BGP_PATH_ATTR_MP_REACH_NLRI_PARTIAL_SIZE;
1736
1737 memcpy(data, &add->dest, reach_len);
1738 data += reach_len;
1739 attr_len += reach_len;
1740
1741 LOG(5, 0, 0, "Advertising route %s/%d to BGP peer %s\n",
1742 inet_ntop(AF_INET6, &add->dest.prefix, ipv6addr, INET6_ADDRSTRLEN),
1743 add->dest.len, peer->name);
1744 }
1745
1746 /* update len with attributes we added */
1747 len += attr_len;
1748
1749 /* go back and insert attr_len */
1750 attr_len = htons(attr_len);
1751 memcpy((char *)&peer->outbuf->packet.data + 2, &attr_len, sizeof(attr_len));
1752
1753 peer->outbuf->packet.header.len = htons(len);
1754 peer->outbuf->done = 0;
1755
1756 return bgp_write(peer);
1757 }
1758
1759 /* send/buffer NOTIFICATION message */
1760 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
1761 uint8_t subcode)
1762 {
1763 struct bgp_data_notification data;
1764 uint16_t len = 0;
1765
1766 data.error_code = code;
1767 len += sizeof(data.error_code);
1768
1769 data.error_subcode = subcode;
1770 len += sizeof(data.error_code);
1771
1772 memset(peer->outbuf->packet.header.marker, 0xff,
1773 sizeof(peer->outbuf->packet.header.marker));
1774
1775 peer->outbuf->packet.header.type = BGP_MSG_NOTIFICATION;
1776 peer->outbuf->packet.header.len =
1777 htons(sizeof(peer->outbuf->packet.header) + len);
1778
1779 memcpy(peer->outbuf->packet.data, &data, len);
1780
1781 peer->outbuf->done = 0;
1782 peer->next_state = code == BGP_ERR_CEASE ? Disabled : Idle;
1783
1784 /* we're dying; ignore any pending input */
1785 peer->inbuf->packet.header.len = 0;
1786 peer->inbuf->done = 0;
1787
1788 return bgp_write(peer);
1789 }