Add note about fragmentation in Docs/manual.html, and a sample
[l2tpns.git] / bgp.c
1 /*
2 * BGPv4
3 * Used to advertise routes for upstream (l2tp port, rather than gratiutious
4 * arp) and downstream--allowing routers to load-balance both.
5 *
6 * Implementation limitations:
7 * - We never listen for incoming connections (session always initiated by us).
8 * - Any routes advertised by the peer are accepted, but ignored.
9 * - No password support; neither RFC1771 (which no-one seems to do anyway)
10 * nor RFC2385 (which requires a kernel patch on 2.4 kernels).
11 */
12
13 char const *cvs_id_bgp = "$Id: bgp.c,v 1.9 2004-12-16 08:49:52 bodea Exp $";
14
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <string.h>
18 #include <time.h>
19 #include <errno.h>
20 #include <sys/socket.h>
21 #include <netinet/in.h>
22 #include <arpa/inet.h>
23 #include <netdb.h>
24 #include <fcntl.h>
25
26 #include "l2tpns.h"
27 #include "bgp.h"
28 #include "util.h"
29
30 static void bgp_clear(struct bgp_peer *peer);
31 static void bgp_set_retry(struct bgp_peer *peer);
32 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx);
33 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
34 struct bgp_route_list *new);
35
36 static void bgp_free_routes(struct bgp_route_list *routes);
37 static char const *bgp_msg_type_str(uint8_t type);
38 static int bgp_connect(struct bgp_peer *peer);
39 static int bgp_handle_connect(struct bgp_peer *peer);
40 static int bgp_write(struct bgp_peer *peer);
41 static int bgp_read(struct bgp_peer *peer);
42 static int bgp_handle_input(struct bgp_peer *peer);
43 static int bgp_send_open(struct bgp_peer *peer);
44 static int bgp_send_keepalive(struct bgp_peer *peer);
45 static int bgp_send_update(struct bgp_peer *peer);
46 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
47 uint8_t subcode);
48
49 static uint16_t our_as;
50 static struct bgp_route_list *bgp_routes = 0;
51
52 int bgp_configured = 0;
53 struct bgp_peer *bgp_peers = 0;
54
55 /* prepare peer structure, globals */
56 int bgp_setup(int as)
57 {
58 int i;
59 struct bgp_peer *peer;
60
61 for (i = 0; i < BGP_NUM_PEERS; i++)
62 {
63 peer = &bgp_peers[i];
64 memset(peer, 0, sizeof(*peer));
65
66 peer->addr = INADDR_NONE;
67 peer->sock = -1;
68 peer->state = peer->next_state = Disabled;
69
70 if (!((peer->outbuf = malloc(sizeof(*peer->outbuf)))
71 && (peer->inbuf = malloc(sizeof(*peer->inbuf)))))
72 {
73 LOG(0, 0, 0, "Can't allocate buffers for bgp peer (%s)\n",
74 strerror(errno));
75
76 return 0;
77 }
78 }
79
80 if (as < 1)
81 as = 0;
82
83 if ((our_as = as))
84 return 0;
85
86 bgp_routes = 0;
87 bgp_configured = 0; /* set by bgp_start */
88
89 return 1;
90 }
91
92 /* start connection with a peer */
93 int bgp_start(struct bgp_peer *peer, char *name, int as, int keepalive,
94 int hold, int enable)
95 {
96 struct hostent *h;
97 int ibgp;
98 int i;
99 struct bgp_path_attr a;
100 char path_attrs[64];
101 char *p = path_attrs;
102 in_addr_t ip;
103 uint32_t metric = htonl(BGP_METRIC);
104 uint32_t no_export = htonl(BGP_COMMUNITY_NO_EXPORT);
105
106 if (!our_as)
107 return 0;
108
109 if (peer->state != Disabled)
110 bgp_halt(peer);
111
112 snprintf(peer->name, sizeof(peer->name), "%s", name);
113
114 if (!(h = gethostbyname(name)) || h->h_addrtype != AF_INET)
115 {
116 LOG(0, 0, 0, "Can't get address for BGP peer %s (%s)\n",
117 name, h ? "no address" : hstrerror(h_errno));
118
119 return 0;
120 }
121
122 memcpy(&peer->addr, h->h_addr, sizeof(peer->addr));
123 peer->as = as > 0 ? as : our_as;
124 ibgp = peer->as == our_as;
125
126 /* set initial timer values */
127 peer->init_keepalive = keepalive == -1 ? BGP_KEEPALIVE_TIME : keepalive;
128 peer->init_hold = hold == -1 ? BGP_HOLD_TIME : hold;
129
130 if (peer->init_hold < 3)
131 peer->init_hold = 3;
132
133 if (peer->init_keepalive * 3 > peer->init_hold)
134 peer->init_keepalive = peer->init_hold / 3;
135
136 /* clear buffers, go to Idle state */
137 peer->next_state = Idle;
138 bgp_clear(peer);
139
140 /* set initial routing state */
141 peer->routing = enable;
142
143 /* all our routes use the same attributes, so prepare it in advance */
144 if (peer->path_attrs)
145 free(peer->path_attrs);
146
147 peer->path_attr_len = 0;
148
149 /* ORIGIN */
150 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
151 a.code = BGP_PATH_ATTR_CODE_ORIGIN;
152 a.data.s.len = 1;
153 a.data.s.value[0] = BGP_PATH_ATTR_CODE_ORIGIN_IGP;
154
155 #define ADD_ATTRIBUTE() do { \
156 i = BGP_PATH_ATTR_SIZE(a); \
157 memcpy(p, &a, i); \
158 p += i; \
159 peer->path_attr_len += i; } while (0)
160
161 ADD_ATTRIBUTE();
162
163 /* AS_PATH */
164 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
165 a.code = BGP_PATH_ATTR_CODE_AS_PATH;
166 if (ibgp)
167 {
168 /* empty path */
169 a.data.s.len = 0;
170 }
171 else
172 {
173 /* just our AS */
174 struct {
175 uint8_t type;
176 uint8_t len;
177 uint16_t value;
178 } as_path = {
179 BGP_PATH_ATTR_CODE_AS_PATH_AS_SEQUENCE,
180 1,
181 htons(our_as),
182 };
183
184 a.data.s.len = sizeof(as_path);
185 memcpy(&a.data.s.value, &as_path, sizeof(as_path));
186 }
187
188 ADD_ATTRIBUTE();
189
190 /* NEXT_HOP */
191 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
192 a.code = BGP_PATH_ATTR_CODE_NEXT_HOP;
193 ip = my_address; /* we're it */
194 a.data.s.len = sizeof(ip);
195 memcpy(a.data.s.value, &ip, sizeof(ip));
196
197 ADD_ATTRIBUTE();
198
199 /* MULTI_EXIT_DISC */
200 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL;
201 a.code = BGP_PATH_ATTR_CODE_MULTI_EXIT_DISC;
202 a.data.s.len = sizeof(metric);
203 memcpy(a.data.s.value, &metric, sizeof(metric));
204
205 ADD_ATTRIBUTE();
206
207 if (ibgp)
208 {
209 uint32_t local_pref = htonl(BGP_LOCAL_PREF);
210
211 /* LOCAL_PREF */
212 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
213 a.code = BGP_PATH_ATTR_CODE_LOCAL_PREF;
214 a.data.s.len = sizeof(local_pref);
215 memcpy(a.data.s.value, &local_pref, sizeof(local_pref));
216
217 ADD_ATTRIBUTE();
218 }
219
220 /* COMMUNITIES */
221 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL | BGP_PATH_ATTR_FLAG_TRANS;
222 a.code = BGP_PATH_ATTR_CODE_COMMUNITIES;
223 a.data.s.len = sizeof(no_export);
224 memcpy(a.data.s.value, &no_export, sizeof(no_export));
225
226 ADD_ATTRIBUTE();
227
228 if (!(peer->path_attrs = malloc(peer->path_attr_len)))
229 {
230 LOG(0, 0, 0, "Can't allocate path_attrs for %s (%s)\n",
231 name, strerror(errno));
232
233 return 0;
234 }
235
236 memcpy(peer->path_attrs, path_attrs, peer->path_attr_len);
237
238 LOG(4, 0, 0, "Initiating BGP connection to %s (routing %s)\n",
239 name, enable ? "enabled" : "suspended");
240
241 /* we have at least one peer configured */
242 bgp_configured = 1;
243
244 /* connect */
245 return bgp_connect(peer);
246 }
247
248 /* clear counters, timers, routes and buffers; close socket; move to
249 next_state, which may be Disabled or Idle */
250 static void bgp_clear(struct bgp_peer *peer)
251 {
252 if (peer->sock != -1)
253 {
254 close(peer->sock);
255 peer->sock = -1;
256 }
257
258 peer->keepalive_time = 0;
259 peer->expire_time = 0;
260
261 peer->keepalive = peer->init_keepalive;
262 peer->hold = peer->init_hold;
263
264 bgp_free_routes(peer->routes);
265 peer->routes = 0;
266
267 peer->outbuf->packet.header.len = 0;
268 peer->outbuf->done = 0;
269 peer->inbuf->packet.header.len = 0;
270 peer->inbuf->done = 0;
271
272 peer->cli_flag = 0;
273
274 if (peer->state != peer->next_state)
275 {
276 peer->state = peer->next_state;
277 peer->state_time = time_now;
278
279 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
280 bgp_state_str(peer->next_state));
281 }
282 }
283
284 /* initiate a clean shutdown */
285 void bgp_stop(struct bgp_peer *peer)
286 {
287 LOG(4, 0, 0, "Terminating BGP connection to %s\n", peer->name);
288 bgp_send_notification(peer, BGP_ERR_CEASE, 0);
289 }
290
291 /* drop connection (if any) and set state to Disabled */
292 void bgp_halt(struct bgp_peer *peer)
293 {
294 LOG(4, 0, 0, "Aborting BGP connection to %s\n", peer->name);
295 peer->next_state = Disabled;
296 bgp_clear(peer);
297 }
298
299 /* drop connection (if any) and set to Idle for connection retry */
300 int bgp_restart(struct bgp_peer *peer)
301 {
302 peer->next_state = Idle;
303 bgp_clear(peer);
304
305 /* restart now */
306 peer->retry_time = time_now;
307 peer->retry_count = 0;
308
309 /* connect */
310 return bgp_connect(peer);
311 }
312
313 static void bgp_set_retry(struct bgp_peer *peer)
314 {
315 if (peer->retry_count++ < BGP_MAX_RETRY)
316 {
317 peer->retry_time = time_now + (BGP_RETRY_BACKOFF * peer->retry_count);
318 peer->next_state = Idle;
319 bgp_clear(peer);
320 }
321 else
322 bgp_halt(peer); /* give up */
323 }
324
325 /* convert ip/mask to CIDR notation */
326 static void bgp_cidr(in_addr_t ip, in_addr_t mask, struct bgp_ip_prefix *pfx)
327 {
328 int i;
329 uint32_t b;
330
331 /* convert to prefix notation */
332 pfx->len = 32;
333 pfx->prefix = ip;
334
335 if (!mask) /* bogus */
336 mask = 0xffffffff;
337
338 for (i = 0; i < 32 && ((b = ntohl(1 << i)), !(mask & b)); i++)
339 {
340 pfx->len--;
341 pfx->prefix &= ~b;
342 }
343 }
344
345 /* insert route into list; sorted */
346 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
347 struct bgp_route_list *new)
348 {
349 struct bgp_route_list *p = head;
350 struct bgp_route_list *e = 0;
351
352 while (p && memcmp(&p->dest, &new->dest, sizeof(p->dest)) < 0)
353 {
354 e = p;
355 p = p->next;
356 }
357
358 if (e)
359 {
360 new->next = e->next;
361 e->next = new;
362 }
363 else
364 {
365 new->next = head;
366 head = new;
367 }
368
369 return head;
370 }
371
372 /* add route to list for peers */
373 /*
374 * Note: this doesn't do route aggregation, nor drop routes if a less
375 * specific match already exists (partly because I'm lazy, but also so
376 * that if that route is later deleted we don't have to be concerned
377 * about adding back the more specific one).
378 */
379 int bgp_add_route(in_addr_t ip, in_addr_t mask)
380 {
381 struct bgp_route_list *r = bgp_routes;
382 struct bgp_route_list add;
383 int i;
384
385 bgp_cidr(ip, mask, &add.dest);
386 add.next = 0;
387
388 /* check for duplicate */
389 while (r)
390 {
391 i = memcmp(&r->dest, &add.dest, sizeof(r->dest));
392 if (!i)
393 return 1; /* already covered */
394
395 if (i > 0)
396 break;
397
398 r = r->next;
399 }
400
401 /* insert into route list; sorted */
402 if (!(r = malloc(sizeof(*r))))
403 {
404 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
405 fmtaddr(add.dest.prefix, 0), add.dest.len, strerror(errno));
406
407 return 0;
408 }
409
410 memcpy(r, &add, sizeof(*r));
411 bgp_routes = bgp_insert_route(bgp_routes, r);
412
413 /* flag established peers for update */
414 for (i = 0; i < BGP_NUM_PEERS; i++)
415 if (bgp_peers[i].state == Established)
416 bgp_peers[i].update_routes = 1;
417
418 LOG(4, 0, 0, "Registered BGP route %s/%d\n",
419 fmtaddr(add.dest.prefix, 0), add.dest.len);
420
421 return 1;
422 }
423
424 /* remove route from list for peers */
425 int bgp_del_route(in_addr_t ip, in_addr_t mask)
426 {
427 struct bgp_route_list *r = bgp_routes;
428 struct bgp_route_list *e = 0;
429 struct bgp_route_list del;
430 int i;
431
432 bgp_cidr(ip, mask, &del.dest);
433 del.next = 0;
434
435 /* find entry in routes list and remove */
436 while (r)
437 {
438 i = memcmp(&r->dest, &del.dest, sizeof(r->dest));
439 if (!i)
440 {
441 if (e)
442 e->next = r->next;
443 else
444 bgp_routes = r->next;
445
446 free(r);
447 break;
448 }
449
450 e = r;
451
452 if (i > 0)
453 r = 0; /* stop */
454 else
455 r = r->next;
456 }
457
458 /* not found */
459 if (!r)
460 return 1;
461
462 /* flag established peers for update */
463 for (i = 0; i < BGP_NUM_PEERS; i++)
464 if (bgp_peers[i].state == Established)
465 bgp_peers[i].update_routes = 1;
466
467 LOG(4, 0, 0, "Removed BGP route %s/%d\n",
468 fmtaddr(del.dest.prefix, 0), del.dest.len);
469
470 return 1;
471 }
472
473 /* enable or disable routing */
474 void bgp_enable_routing(int enable)
475 {
476 int i;
477
478 for (i = 0; i < BGP_NUM_PEERS; i++)
479 {
480 bgp_peers[i].routing = enable;
481
482 /* flag established peers for update */
483 if (bgp_peers[i].state == Established)
484 bgp_peers[i].update_routes = 1;
485 }
486
487 LOG(4, 0, 0, "%s BGP routing\n", enable ? "Enabled" : "Suspended");
488 }
489
490 /* return a bitmask indicating if the socket should be added to the
491 read set (1) and or write set (2) for select */
492 int bgp_select_state(struct bgp_peer *peer)
493 {
494 int flags = 0;
495
496 if (!bgp_configured)
497 return 0;
498
499 if (peer->state == Disabled || peer->state == Idle)
500 return 0;
501
502 if (peer->inbuf->done < BGP_MAX_PACKET_SIZE)
503 flags |= 1;
504
505 if (peer->state == Connect || /* connection in progress */
506 peer->update_routes || /* routing updates */
507 peer->outbuf->packet.header.len) /* pending output */
508 flags |= 2;
509
510 return flags;
511 }
512
513 /* process bgp peer */
514 int bgp_process(struct bgp_peer *peer, int readable, int writable)
515 {
516 if (!bgp_configured)
517 return 0;
518
519 if (*peer->name && peer->cli_flag == BGP_CLI_RESTART)
520 return bgp_restart(peer);
521
522 if (peer->state == Disabled)
523 return 1;
524
525 if (peer->cli_flag)
526 {
527 switch (peer->cli_flag)
528 {
529 case BGP_CLI_SUSPEND:
530 if (peer->routing)
531 {
532 peer->routing = 0;
533 if (peer->state == Established)
534 peer->update_routes = 1;
535 }
536
537 break;
538
539 case BGP_CLI_ENABLE:
540 if (!peer->routing)
541 {
542 peer->routing = 1;
543 if (peer->state == Established)
544 peer->update_routes = 1;
545 }
546
547 break;
548 }
549
550 peer->cli_flag = 0;
551 }
552
553 /* handle empty/fill of buffers */
554 if (writable)
555 {
556 int r = 1;
557 if (peer->state == Connect)
558 r = bgp_handle_connect(peer);
559 else if (peer->outbuf->packet.header.len)
560 r = bgp_write(peer);
561
562 if (!r)
563 return 0;
564 }
565
566 if (readable)
567 {
568 if (!bgp_read(peer))
569 return 0;
570 }
571
572 /* process input buffer contents */
573 while (peer->inbuf->done >= sizeof(peer->inbuf->packet.header)
574 && !peer->outbuf->packet.header.len) /* may need to queue a response */
575 {
576 if (bgp_handle_input(peer) < 0)
577 return 0;
578 }
579
580 /* process pending updates */
581 if (peer->update_routes
582 && !peer->outbuf->packet.header.len) /* ditto */
583 {
584 if (!bgp_send_update(peer))
585 return 0;
586 }
587
588 /* process timers */
589 if (peer->state == Established)
590 {
591 if (time_now > peer->expire_time)
592 {
593 LOG(1, 0, 0, "No message from BGP peer %s in %ds\n",
594 peer->name, peer->hold);
595
596 bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0);
597 return 0;
598 }
599
600 if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len)
601 bgp_send_keepalive(peer);
602 }
603 else if (peer->state == Idle)
604 {
605 if (time_now > peer->retry_time)
606 return bgp_connect(peer);
607 }
608 else if (time_now > peer->state_time + BGP_STATE_TIME)
609 {
610 LOG(1, 0, 0, "%s timer expired for BGP peer %s\n",
611 bgp_state_str(peer->state), peer->name);
612
613 return bgp_restart(peer);
614 }
615
616 return 1;
617 }
618
619 static void bgp_free_routes(struct bgp_route_list *routes)
620 {
621 struct bgp_route_list *tmp;
622
623 while ((tmp = routes))
624 {
625 routes = tmp->next;
626 free(tmp);
627 }
628 }
629
630 char const *bgp_state_str(enum bgp_state state)
631 {
632 switch (state)
633 {
634 case Disabled: return "Disabled";
635 case Idle: return "Idle";
636 case Connect: return "Connect";
637 case Active: return "Active";
638 case OpenSent: return "OpenSent";
639 case OpenConfirm: return "OpenConfirm";
640 case Established: return "Established";
641 }
642
643 return "?";
644 }
645
646 static char const *bgp_msg_type_str(uint8_t type)
647 {
648 switch (type)
649 {
650 case BGP_MSG_OPEN: return "OPEN";
651 case BGP_MSG_UPDATE: return "UPDATE";
652 case BGP_MSG_NOTIFICATION: return "NOTIFICATION";
653 case BGP_MSG_KEEPALIVE: return "KEEPALIVE";
654 }
655
656 return "?";
657 }
658
659 /* attempt to connect to peer */
660 static int bgp_connect(struct bgp_peer *peer)
661 {
662 static int bgp_port = 0;
663 struct sockaddr_in addr;
664
665 if (!bgp_port)
666 {
667 struct servent *serv;
668 if (!(serv = getservbyname("bgp", "tcp")))
669 {
670 LOG(0, 0, 0, "Can't get bgp service (%s)\n", strerror(errno));
671 return 0;
672 }
673
674 bgp_port = serv->s_port;
675 }
676
677 if ((peer->sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
678 {
679 LOG(0, 0, 0, "Can't create a socket for BGP peer %s (%s)\n",
680 peer->name, strerror(errno));
681
682 peer->state = peer->next_state = Disabled;
683 return 0;
684 }
685
686 /* set to non-blocking */
687 fcntl(peer->sock, F_SETFL, fcntl(peer->sock, F_GETFL, 0) | O_NONBLOCK);
688
689 /* try connect */
690 memset(&addr, 0, sizeof(addr));
691 addr.sin_family = AF_INET;
692 addr.sin_port = bgp_port;
693 addr.sin_addr.s_addr = peer->addr;
694
695 while (connect(peer->sock, (struct sockaddr *) &addr, sizeof(addr)) == -1)
696 {
697 if (errno == EINTR) /* SIGALARM handler */
698 continue;
699
700 if (errno != EINPROGRESS)
701 {
702 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n",
703 inet_ntoa(addr.sin_addr), strerror(errno));
704
705 bgp_set_retry(peer);
706 return 0;
707 }
708
709 peer->state = Connect;
710 peer->state_time = time_now;
711
712 LOG(4, 0, 0, "BGP peer %s: state Connect\n", peer->name);
713 return 1;
714 }
715
716 peer->state = Active;
717 peer->state_time = time_now;
718 peer->retry_time = peer->retry_count = 0;
719
720 LOG(4, 0, 0, "BGP peer %s: state Active\n", inet_ntoa(addr.sin_addr));
721
722 return bgp_send_open(peer);
723 }
724
725 /* complete partial connection (state = Connect) */
726 static int bgp_handle_connect(struct bgp_peer *peer)
727 {
728 int err = 0;
729 int len = sizeof(int);
730 getsockopt(peer->sock, SOL_SOCKET, SO_ERROR, &err, &len);
731 if (err)
732 {
733 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n", peer->name,
734 strerror(err));
735
736 bgp_set_retry(peer);
737 return 0;
738 }
739
740 peer->state = Active;
741 peer->state_time = time_now;
742
743 LOG(4, 0, 0, "BGP peer %s: state Active\n", peer->name);
744
745 return bgp_send_open(peer);
746 }
747
748 /* initiate a write */
749 static int bgp_write(struct bgp_peer *peer)
750 {
751 int len = htons(peer->outbuf->packet.header.len);
752 int r;
753
754 while ((r = write(peer->sock, &peer->outbuf->packet + peer->outbuf->done,
755 len - peer->outbuf->done)) == -1)
756 {
757 if (errno == EINTR)
758 continue;
759
760 if (errno == EAGAIN)
761 return 1;
762
763 if (errno == EPIPE)
764 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
765 else
766 LOG(1, 0, 0, "Can't write to BGP peer %s (%s)\n", peer->name,
767 strerror(errno));
768
769 bgp_set_retry(peer);
770 return 0;
771 }
772
773 if (r < len)
774 {
775 peer->outbuf->done += r;
776 return 1;
777 }
778
779 LOG(4, 0, 0, "Sent %s to BGP peer %s\n",
780 bgp_msg_type_str(peer->outbuf->packet.header.type), peer->name);
781
782 peer->outbuf->packet.header.len = 0;
783 peer->outbuf->done = 0;
784
785 if (peer->state == Established)
786 peer->keepalive_time = time_now + peer->keepalive;
787
788 if (peer->state != peer->next_state)
789 {
790 if (peer->next_state == Disabled || peer->next_state == Idle)
791 {
792 bgp_clear(peer);
793 return 0;
794 }
795
796 peer->state = peer->next_state;
797 peer->state_time = time_now;
798
799 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
800 bgp_state_str(peer->state));
801 }
802
803 return 1;
804 }
805
806 /* initiate a read */
807 static int bgp_read(struct bgp_peer *peer)
808 {
809 int r;
810
811 while ((r = read(peer->sock, &peer->inbuf->packet + peer->inbuf->done,
812 BGP_MAX_PACKET_SIZE - peer->inbuf->done)) < 1)
813 {
814 if (!r)
815 {
816 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
817 }
818 else
819 {
820 if (errno == EINTR)
821 continue;
822
823 if (errno == EAGAIN)
824 return 1;
825
826 LOG(1, 0, 0, "Can't read from BGP peer %s (%s)\n", peer->name,
827 strerror(errno));
828 }
829
830 bgp_set_retry(peer);
831 return 0;
832 }
833
834 peer->inbuf->done += r;
835 return 1;
836 }
837
838 /* process buffered packets */
839 static int bgp_handle_input(struct bgp_peer *peer)
840 {
841 struct bgp_packet *p = &peer->inbuf->packet;
842 int len = ntohs(p->header.len);
843
844 if (len > BGP_MAX_PACKET_SIZE)
845 {
846 LOG(1, 0, 0, "Bad header length from BGP %s\n", peer->name);
847 bgp_send_notification(peer, BGP_ERR_HEADER, BGP_ERR_HDR_BAD_LEN);
848 return 0;
849 }
850
851 if (peer->inbuf->done < len)
852 return 0;
853
854 LOG(4, 0, 0, "Received %s from BGP peer %s\n",
855 bgp_msg_type_str(p->header.type), peer->name);
856
857 switch (p->header.type)
858 {
859 case BGP_MSG_OPEN:
860 {
861 struct bgp_data_open data;
862 int hold;
863 int i;
864
865 for (i = 0; i < sizeof(p->header.marker); i++)
866 {
867 if ((unsigned char) p->header.marker[i] != 0xff)
868 {
869 LOG(1, 0, 0, "Invalid marker from BGP peer %s\n",
870 peer->name);
871
872 bgp_send_notification(peer, BGP_ERR_HEADER,
873 BGP_ERR_HDR_NOT_SYNC);
874
875 return 0;
876 }
877 }
878
879 if (peer->state != OpenSent)
880 {
881 LOG(1, 0, 0, "OPEN from BGP peer %s in %s state\n",
882 peer->name, bgp_state_str(peer->state));
883
884 bgp_send_notification(peer, BGP_ERR_FSM, 0);
885 return 0;
886 }
887
888 memcpy(&data, p->data, len - sizeof(p->header));
889
890 if (data.version != BGP_VERSION)
891 {
892 LOG(1, 0, 0, "Bad version (%d) sent by BGP peer %s\n",
893 (int) data.version, peer->name);
894
895 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_VERSION);
896 return 0;
897 }
898
899 if (ntohs(data.as) != peer->as)
900 {
901 LOG(1, 0, 0, "Bad AS sent by BGP peer %s (got %d, "
902 "expected %d)\n", peer->name, (int) htons(data.as),
903 (int) peer->as);
904
905 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_BAD_AS);
906 return 0;
907 }
908
909 if ((hold = ntohs(data.hold_time)) < 3)
910 {
911 LOG(1, 0, 0, "Bad hold time (%d) from BGP peer %s\n",
912 hold, peer->name);
913
914 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_HOLD_TIME);
915 return 0;
916 }
917
918 /* pick lowest hold time */
919 if (hold < peer->hold)
920 peer->hold = hold;
921
922 /* adjust our keepalive based on negotiated hold value */
923 if (peer->keepalive * 3 > peer->hold)
924 peer->keepalive = peer->hold / 3;
925
926 /* next transition requires an exchange of keepalives */
927 bgp_send_keepalive(peer);
928
929 /* FIXME: may need to check for optional params */
930 }
931
932 break;
933
934 case BGP_MSG_KEEPALIVE:
935 if (peer->state == OpenConfirm)
936 {
937 peer->state = peer->next_state = Established;
938 peer->state_time = time_now;
939 peer->keepalive_time = time_now + peer->keepalive;
940 peer->update_routes = 1;
941 peer->retry_count = 0;
942 peer->retry_time = 0;
943
944 LOG(4, 0, 0, "BGP peer %s: state Established\n", peer->name);
945 }
946
947 break;
948
949 case BGP_MSG_NOTIFICATION:
950 if (len > sizeof(p->header))
951 {
952 struct bgp_data_notification *notification =
953 (struct bgp_data_notification *) p->data;
954
955 if (notification->error_code == BGP_ERR_CEASE)
956 {
957 LOG(4, 0, 0, "BGP peer %s sent CEASE\n", peer->name);
958 bgp_halt(peer);
959 return 0;
960 }
961
962 /* FIXME: should handle more notifications */
963 LOG(4, 0, 0, "BGP peer %s sent unhandled NOTIFICATION %d\n",
964 peer->name, (int) notification->error_code);
965 }
966
967 break;
968 }
969
970 /* reset timer */
971 peer->expire_time = time_now + peer->hold;
972
973 /* see if there's another message in the same packet/buffer */
974 if (peer->inbuf->done > len)
975 {
976 peer->inbuf->done -= len;
977 memmove(p, (char *) p + len, peer->inbuf->done);
978 }
979 else
980 {
981 peer->inbuf->packet.header.len = 0;
982 peer->inbuf->done = 0;
983 }
984
985 return peer->inbuf->done;
986 }
987
988 /* send/buffer OPEN message */
989 static int bgp_send_open(struct bgp_peer *peer)
990 {
991 struct bgp_data_open data;
992 uint16_t len = sizeof(peer->outbuf->packet.header);
993
994 memset(peer->outbuf->packet.header.marker, 0xff,
995 sizeof(peer->outbuf->packet.header.marker));
996
997 peer->outbuf->packet.header.type = BGP_MSG_OPEN;
998
999 data.version = BGP_VERSION;
1000 data.as = htons(our_as);
1001 data.hold_time = htons(peer->hold);
1002 data.identifier = my_address;
1003 data.opt_len = 0;
1004
1005 memcpy(peer->outbuf->packet.data, &data, BGP_DATA_OPEN_SIZE);
1006 len += BGP_DATA_OPEN_SIZE;
1007
1008 peer->outbuf->packet.header.len = htons(len);
1009 peer->outbuf->done = 0;
1010 peer->next_state = OpenSent;
1011
1012 return bgp_write(peer);
1013 }
1014
1015 /* send/buffer KEEPALIVE message */
1016 static int bgp_send_keepalive(struct bgp_peer *peer)
1017 {
1018 memset(peer->outbuf->packet.header.marker, 0xff,
1019 sizeof(peer->outbuf->packet.header.marker));
1020
1021 peer->outbuf->packet.header.type = BGP_MSG_KEEPALIVE;
1022 peer->outbuf->packet.header.len =
1023 htons(sizeof(peer->outbuf->packet.header));
1024
1025 peer->outbuf->done = 0;
1026 peer->next_state = (peer->state == OpenSent) ? OpenConfirm : peer->state;
1027
1028 return bgp_write(peer);
1029 }
1030
1031 /* send/buffer UPDATE message */
1032 static int bgp_send_update(struct bgp_peer *peer)
1033 {
1034 uint16_t unf_len = 0;
1035 uint16_t attr_len;
1036 uint16_t len = sizeof(peer->outbuf->packet.header);
1037 struct bgp_route_list *have = peer->routes;
1038 struct bgp_route_list *want = peer->routing ? bgp_routes : 0;
1039 struct bgp_route_list *e = 0;
1040 struct bgp_route_list *add = 0;
1041 int s;
1042
1043 char *data = (char *) &peer->outbuf->packet.data;
1044
1045 /* need leave room for attr_len, bgp_path_attrs and one prefix */
1046 char *max = (char *) &peer->outbuf->packet.data
1047 + sizeof(peer->outbuf->packet.data)
1048 - sizeof(attr_len) - peer->path_attr_len - sizeof(struct bgp_ip_prefix);
1049
1050 /* skip over unf_len */
1051 data += sizeof(unf_len);
1052 len += sizeof(unf_len);
1053
1054 memset(peer->outbuf->packet.header.marker, 0xff,
1055 sizeof(peer->outbuf->packet.header.marker));
1056
1057 peer->outbuf->packet.header.type = BGP_MSG_UPDATE;
1058
1059 peer->update_routes = 0; /* tentatively clear */
1060
1061 /* find differences */
1062 while ((have || want) && data < (max - sizeof(struct bgp_ip_prefix)))
1063 {
1064 if (have)
1065 s = want
1066 ? memcmp(&have->dest, &want->dest, sizeof(have->dest))
1067 : -1;
1068 else
1069 s = 1;
1070
1071 if (s < 0) /* found one to delete */
1072 {
1073 struct bgp_route_list *tmp = have;
1074 have = have->next;
1075
1076 s = BGP_IP_PREFIX_SIZE(tmp->dest);
1077 memcpy(data, &tmp->dest, s);
1078 data += s;
1079 unf_len += s;
1080 len += s;
1081
1082 LOG(5, 0, 0, "Withdrawing route %s/%d from BGP peer %s\n",
1083 fmtaddr(tmp->dest.prefix, 0), tmp->dest.len, peer->name);
1084
1085 free(tmp);
1086
1087 if (e)
1088 e->next = have;
1089 else
1090 peer->routes = have;
1091 }
1092 else
1093 {
1094 if (!s) /* same */
1095 {
1096 e = have; /* stash the last found to relink above */
1097 have = have->next;
1098 want = want->next;
1099 }
1100 else if (s > 0) /* addition reqd. */
1101 {
1102 if (add)
1103 {
1104 peer->update_routes = 1; /* only one add per packet */
1105 if (!have)
1106 break;
1107 }
1108 else
1109 add = want;
1110
1111 if (want)
1112 want = want->next;
1113 }
1114 }
1115 }
1116
1117 if (have || want)
1118 peer->update_routes = 1; /* more to do */
1119
1120 /* anything changed? */
1121 if (!(unf_len || add))
1122 return 1;
1123
1124 /* go back and insert unf_len */
1125 unf_len = htons(unf_len);
1126 memcpy(&peer->outbuf->packet.data, &unf_len, sizeof(unf_len));
1127
1128 if (add)
1129 {
1130 if (!(e = malloc(sizeof(*e))))
1131 {
1132 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
1133 fmtaddr(add->dest.prefix, 0), add->dest.len, strerror(errno));
1134
1135 return 0;
1136 }
1137
1138 memcpy(e, add, sizeof(*e));
1139 e->next = 0;
1140 peer->routes = bgp_insert_route(peer->routes, e);
1141
1142 attr_len = htons(peer->path_attr_len);
1143 memcpy(data, &attr_len, sizeof(attr_len));
1144 data += sizeof(attr_len);
1145 len += sizeof(attr_len);
1146
1147 memcpy(data, peer->path_attrs, peer->path_attr_len);
1148 data += peer->path_attr_len;
1149 len += peer->path_attr_len;
1150
1151 s = BGP_IP_PREFIX_SIZE(add->dest);
1152 memcpy(data, &add->dest, s);
1153 data += s;
1154 len += s;
1155
1156 LOG(5, 0, 0, "Advertising route %s/%d to BGP peer %s\n",
1157 fmtaddr(add->dest.prefix, 0), add->dest.len, peer->name);
1158 }
1159 else
1160 {
1161 attr_len = 0;
1162 memcpy(data, &attr_len, sizeof(attr_len));
1163 data += sizeof(attr_len);
1164 len += sizeof(attr_len);
1165 }
1166
1167 peer->outbuf->packet.header.len = htons(len);
1168 peer->outbuf->done = 0;
1169
1170 return bgp_write(peer);
1171 }
1172
1173 /* send/buffer NOTIFICATION message */
1174 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
1175 uint8_t subcode)
1176 {
1177 struct bgp_data_notification data;
1178 uint16_t len = 0;
1179
1180 data.error_code = code;
1181 len += sizeof(data.error_code);
1182
1183 data.error_subcode = subcode;
1184 len += sizeof(data.error_code);
1185
1186 memset(peer->outbuf->packet.header.marker, 0xff,
1187 sizeof(peer->outbuf->packet.header.marker));
1188
1189 peer->outbuf->packet.header.type = BGP_MSG_NOTIFICATION;
1190 peer->outbuf->packet.header.len =
1191 htons(sizeof(peer->outbuf->packet.header) + len);
1192
1193 memcpy(peer->outbuf->packet.data, &data, len);
1194
1195 peer->outbuf->done = 0;
1196 peer->next_state = code == BGP_ERR_CEASE ? Disabled : Idle;
1197
1198 /* we're dying; ignore any pending input */
1199 peer->inbuf->packet.header.len = 0;
1200 peer->inbuf->done = 0;
1201
1202 return bgp_write(peer);
1203 }