f1d0ec28634b134dd09fed93454e7638b0e5dea3
[l2tpns.git] / bgp.c
1 /*
2 * BGPv4
3 * Used to advertise routes for upstream (l2tp port, rather than gratiutious
4 * arp) and downstream--allowing routers to load-balance both.
5 *
6 * Implementation limitations:
7 * - We never listen for incoming connections (session always initiated by us).
8 * - Any routes advertised by the peer are accepted, but ignored.
9 * - No password support; neither RFC1771 (which no-one seems to do anyway)
10 * nor RFC2385 (which requires a kernel patch on 2.4 kernels).
11 */
12
13 char const *cvs_id_bgp = "$Id: bgp.c,v 1.12 2005/09/02 23:39:36 bodea Exp $";
14
15 #include <stdlib.h>
16 #include <unistd.h>
17 #include <string.h>
18 #include <time.h>
19 #include <errno.h>
20 #include <sys/socket.h>
21 #include <netinet/in.h>
22 #include <arpa/inet.h>
23 #include <netdb.h>
24 #include <fcntl.h>
25
26 #include "l2tpns.h"
27 #include "bgp.h"
28 #include "util.h"
29
30 static void bgp_clear(struct bgp_peer *peer);
31 static void bgp_set_retry(struct bgp_peer *peer);
32 static void bgp_cidr(in_addr_t ip, int prefixlen, struct bgp_ip_prefix *pfx);
33 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
34 struct bgp_route_list *new);
35
36 static void bgp_free_routes(struct bgp_route_list *routes);
37 static char const *bgp_msg_type_str(uint8_t type);
38 static int bgp_connect(struct bgp_peer *peer);
39 static int bgp_handle_connect(struct bgp_peer *peer);
40 static int bgp_write(struct bgp_peer *peer);
41 static int bgp_read(struct bgp_peer *peer);
42 static int bgp_handle_input(struct bgp_peer *peer);
43 static int bgp_send_open(struct bgp_peer *peer);
44 static int bgp_send_keepalive(struct bgp_peer *peer);
45 static int bgp_send_update(struct bgp_peer *peer);
46 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
47 uint8_t subcode);
48
49 static uint16_t our_as;
50 static struct bgp_route_list *bgp_routes = 0;
51
52 int bgp_configured = 0;
53 struct bgp_peer *bgp_peers = 0;
54
55 /* prepare peer structure, globals */
56 int bgp_setup(int as)
57 {
58 int i;
59 struct bgp_peer *peer;
60
61 for (i = 0; i < BGP_NUM_PEERS; i++)
62 {
63 peer = &bgp_peers[i];
64 memset(peer, 0, sizeof(*peer));
65
66 peer->addr = INADDR_NONE;
67 peer->sock = -1;
68 peer->state = peer->next_state = Disabled;
69
70 if (!((peer->outbuf = malloc(sizeof(*peer->outbuf)))
71 && (peer->inbuf = malloc(sizeof(*peer->inbuf)))))
72 {
73 LOG(0, 0, 0, "Can't allocate buffers for bgp peer (%s)\n",
74 strerror(errno));
75
76 return 0;
77 }
78
79 peer->edata.type = FD_TYPE_BGP;
80 peer->edata.index = i;
81 peer->events = 0;
82 }
83
84 if (as < 1)
85 as = 0;
86
87 if ((our_as = as))
88 return 0;
89
90 bgp_routes = 0;
91 bgp_configured = 0; /* set by bgp_start */
92
93 return 1;
94 }
95
96 /* start connection with a peer */
97 int bgp_start(struct bgp_peer *peer, char *name, int as, int keepalive,
98 int hold, int enable)
99 {
100 struct hostent *h;
101 int ibgp;
102 int i;
103 struct bgp_path_attr a;
104 char path_attrs[64];
105 char *p = path_attrs;
106 in_addr_t ip;
107 uint32_t metric = htonl(BGP_METRIC);
108 uint32_t no_export = htonl(BGP_COMMUNITY_NO_EXPORT);
109
110 if (!our_as)
111 return 0;
112
113 if (peer->state != Disabled)
114 bgp_halt(peer);
115
116 snprintf(peer->name, sizeof(peer->name), "%s", name);
117
118 if (!(h = gethostbyname(name)) || h->h_addrtype != AF_INET)
119 {
120 LOG(0, 0, 0, "Can't get address for BGP peer %s (%s)\n",
121 name, h ? "no address" : hstrerror(h_errno));
122
123 return 0;
124 }
125
126 memcpy(&peer->addr, h->h_addr, sizeof(peer->addr));
127 peer->as = as > 0 ? as : our_as;
128 ibgp = peer->as == our_as;
129
130 /* set initial timer values */
131 peer->init_keepalive = keepalive == -1 ? BGP_KEEPALIVE_TIME : keepalive;
132 peer->init_hold = hold == -1 ? BGP_HOLD_TIME : hold;
133
134 if (peer->init_hold < 3)
135 peer->init_hold = 3;
136
137 if (peer->init_keepalive * 3 > peer->init_hold)
138 peer->init_keepalive = peer->init_hold / 3;
139
140 /* clear buffers, go to Idle state */
141 peer->next_state = Idle;
142 bgp_clear(peer);
143
144 /* set initial routing state */
145 peer->routing = enable;
146
147 /* all our routes use the same attributes, so prepare it in advance */
148 if (peer->path_attrs)
149 free(peer->path_attrs);
150
151 peer->path_attr_len = 0;
152
153 /* ORIGIN */
154 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
155 a.code = BGP_PATH_ATTR_CODE_ORIGIN;
156 a.data.s.len = 1;
157 a.data.s.value[0] = BGP_PATH_ATTR_CODE_ORIGIN_IGP;
158
159 #define ADD_ATTRIBUTE() do { \
160 i = BGP_PATH_ATTR_SIZE(a); \
161 memcpy(p, &a, i); \
162 p += i; \
163 peer->path_attr_len += i; } while (0)
164
165 ADD_ATTRIBUTE();
166
167 /* AS_PATH */
168 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
169 a.code = BGP_PATH_ATTR_CODE_AS_PATH;
170 if (ibgp)
171 {
172 /* empty path */
173 a.data.s.len = 0;
174 }
175 else
176 {
177 /* just our AS */
178 struct {
179 uint8_t type;
180 uint8_t len;
181 uint16_t value;
182 } as_path = {
183 BGP_PATH_ATTR_CODE_AS_PATH_AS_SEQUENCE,
184 1,
185 htons(our_as),
186 };
187
188 a.data.s.len = sizeof(as_path);
189 memcpy(&a.data.s.value, &as_path, sizeof(as_path));
190 }
191
192 ADD_ATTRIBUTE();
193
194 /* NEXT_HOP */
195 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
196 a.code = BGP_PATH_ATTR_CODE_NEXT_HOP;
197 ip = my_address; /* we're it */
198 a.data.s.len = sizeof(ip);
199 memcpy(a.data.s.value, &ip, sizeof(ip));
200
201 ADD_ATTRIBUTE();
202
203 /* MULTI_EXIT_DISC */
204 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL;
205 a.code = BGP_PATH_ATTR_CODE_MULTI_EXIT_DISC;
206 a.data.s.len = sizeof(metric);
207 memcpy(a.data.s.value, &metric, sizeof(metric));
208
209 ADD_ATTRIBUTE();
210
211 if (ibgp)
212 {
213 uint32_t local_pref = htonl(BGP_LOCAL_PREF);
214
215 /* LOCAL_PREF */
216 a.flags = BGP_PATH_ATTR_FLAG_TRANS;
217 a.code = BGP_PATH_ATTR_CODE_LOCAL_PREF;
218 a.data.s.len = sizeof(local_pref);
219 memcpy(a.data.s.value, &local_pref, sizeof(local_pref));
220
221 ADD_ATTRIBUTE();
222 }
223
224 /* COMMUNITIES */
225 a.flags = BGP_PATH_ATTR_FLAG_OPTIONAL | BGP_PATH_ATTR_FLAG_TRANS;
226 a.code = BGP_PATH_ATTR_CODE_COMMUNITIES;
227 a.data.s.len = sizeof(no_export);
228 memcpy(a.data.s.value, &no_export, sizeof(no_export));
229
230 ADD_ATTRIBUTE();
231
232 if (!(peer->path_attrs = malloc(peer->path_attr_len)))
233 {
234 LOG(0, 0, 0, "Can't allocate path_attrs for %s (%s)\n",
235 name, strerror(errno));
236
237 return 0;
238 }
239
240 memcpy(peer->path_attrs, path_attrs, peer->path_attr_len);
241
242 LOG(4, 0, 0, "Initiating BGP connection to %s (routing %s)\n",
243 name, enable ? "enabled" : "suspended");
244
245 /* we have at least one peer configured */
246 bgp_configured = 1;
247
248 /* connect */
249 return bgp_connect(peer);
250 }
251
252 /* clear counters, timers, routes and buffers; close socket; move to
253 next_state, which may be Disabled or Idle */
254 static void bgp_clear(struct bgp_peer *peer)
255 {
256 if (peer->sock != -1)
257 {
258 close(peer->sock);
259 peer->sock = -1;
260 }
261
262 peer->keepalive_time = 0;
263 peer->expire_time = 0;
264
265 peer->keepalive = peer->init_keepalive;
266 peer->hold = peer->init_hold;
267
268 bgp_free_routes(peer->routes);
269 peer->routes = 0;
270
271 peer->outbuf->packet.header.len = 0;
272 peer->outbuf->done = 0;
273 peer->inbuf->packet.header.len = 0;
274 peer->inbuf->done = 0;
275
276 peer->cli_flag = 0;
277 peer->events = 0;
278
279 if (peer->state != peer->next_state)
280 {
281 peer->state = peer->next_state;
282 peer->state_time = time_now;
283
284 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
285 bgp_state_str(peer->next_state));
286 }
287 }
288
289 /* initiate a clean shutdown */
290 void bgp_stop(struct bgp_peer *peer)
291 {
292 LOG(4, 0, 0, "Terminating BGP connection to %s\n", peer->name);
293 bgp_send_notification(peer, BGP_ERR_CEASE, 0);
294 }
295
296 /* drop connection (if any) and set state to Disabled */
297 void bgp_halt(struct bgp_peer *peer)
298 {
299 LOG(4, 0, 0, "Aborting BGP connection to %s\n", peer->name);
300 peer->next_state = Disabled;
301 bgp_clear(peer);
302 }
303
304 /* drop connection (if any) and set to Idle for connection retry */
305 int bgp_restart(struct bgp_peer *peer)
306 {
307 peer->next_state = Idle;
308 bgp_clear(peer);
309
310 /* restart now */
311 peer->retry_time = time_now;
312 peer->retry_count = 0;
313
314 /* connect */
315 return bgp_connect(peer);
316 }
317
318 static void bgp_set_retry(struct bgp_peer *peer)
319 {
320 if (peer->retry_count++ < BGP_MAX_RETRY)
321 {
322 peer->retry_time = time_now + (BGP_RETRY_BACKOFF * peer->retry_count);
323 peer->next_state = Idle;
324 bgp_clear(peer);
325 }
326 else
327 bgp_halt(peer); /* give up */
328 }
329
330 /* insert route into list; sorted */
331 static struct bgp_route_list *bgp_insert_route(struct bgp_route_list *head,
332 struct bgp_route_list *new)
333 {
334 struct bgp_route_list *p = head;
335 struct bgp_route_list *e = 0;
336
337 while (p && memcmp(&p->dest, &new->dest, sizeof(p->dest)) < 0)
338 {
339 e = p;
340 p = p->next;
341 }
342
343 if (e)
344 {
345 new->next = e->next;
346 e->next = new;
347 }
348 else
349 {
350 new->next = head;
351 head = new;
352 }
353
354 return head;
355 }
356
357 /* add route to list for peers */
358 /*
359 * Note: this doesn't do route aggregation, nor drop routes if a less
360 * specific match already exists (partly because I'm lazy, but also so
361 * that if that route is later deleted we don't have to be concerned
362 * about adding back the more specific one).
363 */
364 int bgp_add_route(in_addr_t ip, int prefixlen)
365 {
366 struct bgp_route_list *r = bgp_routes;
367 struct bgp_route_list add;
368 int i;
369
370 add.dest.prefix = ip;
371 add.dest.len = prefixlen;
372 add.next = 0;
373
374 /* check for duplicate */
375 while (r)
376 {
377 i = memcmp(&r->dest, &add.dest, sizeof(r->dest));
378 if (!i)
379 return 1; /* already covered */
380
381 if (i > 0)
382 break;
383
384 r = r->next;
385 }
386
387 /* insert into route list; sorted */
388 if (!(r = malloc(sizeof(*r))))
389 {
390 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
391 fmtaddr(add.dest.prefix, 0), add.dest.len, strerror(errno));
392
393 return 0;
394 }
395
396 memcpy(r, &add, sizeof(*r));
397 bgp_routes = bgp_insert_route(bgp_routes, r);
398
399 /* flag established peers for update */
400 for (i = 0; i < BGP_NUM_PEERS; i++)
401 if (bgp_peers[i].state == Established)
402 bgp_peers[i].update_routes = 1;
403
404 LOG(4, 0, 0, "Registered BGP route %s/%d\n",
405 fmtaddr(add.dest.prefix, 0), add.dest.len);
406
407 return 1;
408 }
409
410 /* remove route from list for peers */
411 int bgp_del_route(in_addr_t ip, int prefixlen)
412 {
413 struct bgp_route_list *r = bgp_routes;
414 struct bgp_route_list *e = 0;
415 struct bgp_route_list del;
416 int i;
417
418 del.dest.prefix = ip;
419 del.dest.len = prefixlen;
420 del.next = 0;
421
422 /* find entry in routes list and remove */
423 while (r)
424 {
425 i = memcmp(&r->dest, &del.dest, sizeof(r->dest));
426 if (!i)
427 {
428 if (e)
429 e->next = r->next;
430 else
431 bgp_routes = r->next;
432
433 free(r);
434 break;
435 }
436
437 e = r;
438
439 if (i > 0)
440 r = 0; /* stop */
441 else
442 r = r->next;
443 }
444
445 /* not found */
446 if (!r)
447 return 1;
448
449 /* flag established peers for update */
450 for (i = 0; i < BGP_NUM_PEERS; i++)
451 if (bgp_peers[i].state == Established)
452 bgp_peers[i].update_routes = 1;
453
454 LOG(4, 0, 0, "Removed BGP route %s/%d\n",
455 fmtaddr(del.dest.prefix, 0), del.dest.len);
456
457 return 1;
458 }
459
460 /* enable or disable routing */
461 void bgp_enable_routing(int enable)
462 {
463 int i;
464
465 for (i = 0; i < BGP_NUM_PEERS; i++)
466 {
467 bgp_peers[i].routing = enable;
468
469 /* flag established peers for update */
470 if (bgp_peers[i].state == Established)
471 bgp_peers[i].update_routes = 1;
472 }
473
474 LOG(4, 0, 0, "%s BGP routing\n", enable ? "Enabled" : "Suspended");
475 }
476
477 #ifdef HAVE_EPOLL
478 # include <sys/epoll.h>
479 #else
480 # include "fake_epoll.h"
481 #endif
482
483 /* return a bitmask of the events required to poll this peer's fd */
484 int bgp_set_poll()
485 {
486 int i;
487
488 if (!bgp_configured)
489 return 0;
490
491 for (i = 0; i < BGP_NUM_PEERS; i++)
492 {
493 struct bgp_peer *peer = &bgp_peers[i];
494 int events = 0;
495
496 if (peer->state == Disabled || peer->state == Idle)
497 continue;
498
499 if (peer->inbuf->done < BGP_MAX_PACKET_SIZE)
500 events |= EPOLLIN;
501
502 if (peer->state == Connect || /* connection in progress */
503 peer->update_routes || /* routing updates */
504 peer->outbuf->packet.header.len) /* pending output */
505 events |= EPOLLOUT;
506
507 if (peer->events != events)
508 {
509 struct epoll_event ev;
510
511 ev.events = peer->events = events;
512 ev.data.ptr = &peer->edata;
513 epoll_ctl(epollfd, EPOLL_CTL_MOD, peer->sock, &ev);
514 }
515 }
516
517 return 1;
518 }
519
520 /* process bgp events/timers */
521 int bgp_process(uint32_t events[])
522 {
523 int i;
524
525 if (!bgp_configured)
526 return 0;
527
528 for (i = 0; i < BGP_NUM_PEERS; i++)
529 {
530 struct bgp_peer *peer = &bgp_peers[i];
531
532 if (*peer->name && peer->cli_flag == BGP_CLI_RESTART)
533 {
534 bgp_restart(peer);
535 continue;
536 }
537
538 if (peer->state == Disabled)
539 continue;
540
541 if (peer->cli_flag)
542 {
543 switch (peer->cli_flag)
544 {
545 case BGP_CLI_SUSPEND:
546 if (peer->routing)
547 {
548 peer->routing = 0;
549 if (peer->state == Established)
550 peer->update_routes = 1;
551 }
552
553 break;
554
555 case BGP_CLI_ENABLE:
556 if (!peer->routing)
557 {
558 peer->routing = 1;
559 if (peer->state == Established)
560 peer->update_routes = 1;
561 }
562
563 break;
564 }
565
566 peer->cli_flag = 0;
567 }
568
569 /* handle empty/fill of buffers */
570 if (events[i] & EPOLLOUT)
571 {
572 int r = 1;
573 if (peer->state == Connect)
574 r = bgp_handle_connect(peer);
575 else if (peer->outbuf->packet.header.len)
576 r = bgp_write(peer);
577
578 if (!r)
579 continue;
580 }
581
582 if (events[i] & (EPOLLIN|EPOLLHUP))
583 {
584 if (!bgp_read(peer))
585 continue;
586 }
587
588 /* process input buffer contents */
589 while (peer->inbuf->done >= sizeof(peer->inbuf->packet.header)
590 && !peer->outbuf->packet.header.len) /* may need to queue a response */
591 {
592 if (bgp_handle_input(peer) < 0)
593 continue;
594 }
595
596 /* process pending updates */
597 if (peer->update_routes
598 && !peer->outbuf->packet.header.len) /* ditto */
599 {
600 if (!bgp_send_update(peer))
601 continue;
602 }
603
604 /* process timers */
605 if (peer->state == Established)
606 {
607 if (time_now > peer->expire_time)
608 {
609 LOG(1, 0, 0, "No message from BGP peer %s in %ds\n",
610 peer->name, peer->hold);
611
612 bgp_send_notification(peer, BGP_ERR_HOLD_TIMER_EXP, 0);
613 continue;
614 }
615
616 if (time_now > peer->keepalive_time && !peer->outbuf->packet.header.len)
617 bgp_send_keepalive(peer);
618 }
619 else if (peer->state == Idle)
620 {
621 if (time_now > peer->retry_time)
622 bgp_connect(peer);
623 }
624 else if (time_now > peer->state_time + BGP_STATE_TIME)
625 {
626 LOG(1, 0, 0, "%s timer expired for BGP peer %s\n",
627 bgp_state_str(peer->state), peer->name);
628
629 bgp_restart(peer);
630 }
631 }
632
633 return 1;
634 }
635
636 static void bgp_free_routes(struct bgp_route_list *routes)
637 {
638 struct bgp_route_list *tmp;
639
640 while ((tmp = routes))
641 {
642 routes = tmp->next;
643 free(tmp);
644 }
645 }
646
647 char const *bgp_state_str(enum bgp_state state)
648 {
649 switch (state)
650 {
651 case Disabled: return "Disabled";
652 case Idle: return "Idle";
653 case Connect: return "Connect";
654 case Active: return "Active";
655 case OpenSent: return "OpenSent";
656 case OpenConfirm: return "OpenConfirm";
657 case Established: return "Established";
658 }
659
660 return "?";
661 }
662
663 static char const *bgp_msg_type_str(uint8_t type)
664 {
665 switch (type)
666 {
667 case BGP_MSG_OPEN: return "OPEN";
668 case BGP_MSG_UPDATE: return "UPDATE";
669 case BGP_MSG_NOTIFICATION: return "NOTIFICATION";
670 case BGP_MSG_KEEPALIVE: return "KEEPALIVE";
671 }
672
673 return "?";
674 }
675
676 /* attempt to connect to peer */
677 static int bgp_connect(struct bgp_peer *peer)
678 {
679 static int bgp_port = 0;
680 struct sockaddr_in addr;
681 struct epoll_event ev;
682
683 if (!bgp_port)
684 {
685 struct servent *serv;
686 if (!(serv = getservbyname("bgp", "tcp")))
687 {
688 LOG(0, 0, 0, "Can't get bgp service (%s)\n", strerror(errno));
689 return 0;
690 }
691
692 bgp_port = serv->s_port;
693 }
694
695 if ((peer->sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
696 {
697 LOG(0, 0, 0, "Can't create a socket for BGP peer %s (%s)\n",
698 peer->name, strerror(errno));
699
700 peer->state = peer->next_state = Disabled;
701 return 0;
702 }
703
704 /* add to poll set */
705 ev.events = peer->events = EPOLLOUT;
706 ev.data.ptr = &peer->edata;
707 epoll_ctl(epollfd, EPOLL_CTL_ADD, peer->sock, &ev);
708
709 /* set to non-blocking */
710 fcntl(peer->sock, F_SETFL, fcntl(peer->sock, F_GETFL, 0) | O_NONBLOCK);
711
712 /* try connect */
713 memset(&addr, 0, sizeof(addr));
714 addr.sin_family = AF_INET;
715 addr.sin_port = bgp_port;
716 addr.sin_addr.s_addr = peer->addr;
717
718 while (connect(peer->sock, (struct sockaddr *) &addr, sizeof(addr)) == -1)
719 {
720 if (errno == EINTR) /* SIGALARM handler */
721 continue;
722
723 if (errno != EINPROGRESS)
724 {
725 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n",
726 inet_ntoa(addr.sin_addr), strerror(errno));
727
728 bgp_set_retry(peer);
729 return 0;
730 }
731
732 peer->state = Connect;
733 peer->state_time = time_now;
734
735 LOG(4, 0, 0, "BGP peer %s: state Connect\n", peer->name);
736 return 1;
737 }
738
739 peer->state = Active;
740 peer->state_time = time_now;
741 peer->retry_time = peer->retry_count = 0;
742
743 LOG(4, 0, 0, "BGP peer %s: state Active\n", inet_ntoa(addr.sin_addr));
744
745 return bgp_send_open(peer);
746 }
747
748 /* complete partial connection (state = Connect) */
749 static int bgp_handle_connect(struct bgp_peer *peer)
750 {
751 int err = 0;
752 socklen_t len = sizeof(int);
753 getsockopt(peer->sock, SOL_SOCKET, SO_ERROR, &err, &len);
754 if (err)
755 {
756 LOG(1, 0, 0, "Can't connect to BGP peer %s (%s)\n", peer->name,
757 strerror(err));
758
759 bgp_set_retry(peer);
760 return 0;
761 }
762
763 peer->state = Active;
764 peer->state_time = time_now;
765
766 LOG(4, 0, 0, "BGP peer %s: state Active\n", peer->name);
767
768 return bgp_send_open(peer);
769 }
770
771 /* initiate a write */
772 static int bgp_write(struct bgp_peer *peer)
773 {
774 int len = htons(peer->outbuf->packet.header.len);
775 int r;
776
777 while ((r = write(peer->sock, &peer->outbuf->packet + peer->outbuf->done,
778 len - peer->outbuf->done)) == -1)
779 {
780 if (errno == EINTR)
781 continue;
782
783 if (errno == EAGAIN)
784 return 1;
785
786 if (errno == EPIPE)
787 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
788 else
789 LOG(1, 0, 0, "Can't write to BGP peer %s (%s)\n", peer->name,
790 strerror(errno));
791
792 bgp_set_retry(peer);
793 return 0;
794 }
795
796 if (r < len)
797 {
798 peer->outbuf->done += r;
799 return 1;
800 }
801
802 LOG(4, 0, 0, "Sent %s to BGP peer %s\n",
803 bgp_msg_type_str(peer->outbuf->packet.header.type), peer->name);
804
805 peer->outbuf->packet.header.len = 0;
806 peer->outbuf->done = 0;
807
808 if (peer->state == Established)
809 peer->keepalive_time = time_now + peer->keepalive;
810
811 if (peer->state != peer->next_state)
812 {
813 if (peer->next_state == Disabled || peer->next_state == Idle)
814 {
815 bgp_clear(peer);
816 return 0;
817 }
818
819 peer->state = peer->next_state;
820 peer->state_time = time_now;
821
822 LOG(4, 0, 0, "BGP peer %s: state %s\n", peer->name,
823 bgp_state_str(peer->state));
824 }
825
826 return 1;
827 }
828
829 /* initiate a read */
830 static int bgp_read(struct bgp_peer *peer)
831 {
832 int r;
833
834 while ((r = read(peer->sock, &peer->inbuf->packet + peer->inbuf->done,
835 BGP_MAX_PACKET_SIZE - peer->inbuf->done)) < 1)
836 {
837 if (!r)
838 {
839 LOG(1, 0, 0, "Connection to BGP peer %s closed\n", peer->name);
840 }
841 else
842 {
843 if (errno == EINTR)
844 continue;
845
846 if (errno == EAGAIN)
847 return 1;
848
849 LOG(1, 0, 0, "Can't read from BGP peer %s (%s)\n", peer->name,
850 strerror(errno));
851 }
852
853 bgp_set_retry(peer);
854 return 0;
855 }
856
857 peer->inbuf->done += r;
858 return 1;
859 }
860
861 /* process buffered packets */
862 static int bgp_handle_input(struct bgp_peer *peer)
863 {
864 struct bgp_packet *p = &peer->inbuf->packet;
865 int len = ntohs(p->header.len);
866
867 if (len > BGP_MAX_PACKET_SIZE)
868 {
869 LOG(1, 0, 0, "Bad header length from BGP %s\n", peer->name);
870 bgp_send_notification(peer, BGP_ERR_HEADER, BGP_ERR_HDR_BAD_LEN);
871 return 0;
872 }
873
874 if (peer->inbuf->done < len)
875 return 0;
876
877 LOG(4, 0, 0, "Received %s from BGP peer %s\n",
878 bgp_msg_type_str(p->header.type), peer->name);
879
880 switch (p->header.type)
881 {
882 case BGP_MSG_OPEN:
883 {
884 struct bgp_data_open data;
885 int hold;
886 int i;
887
888 for (i = 0; i < sizeof(p->header.marker); i++)
889 {
890 if ((unsigned char) p->header.marker[i] != 0xff)
891 {
892 LOG(1, 0, 0, "Invalid marker from BGP peer %s\n",
893 peer->name);
894
895 bgp_send_notification(peer, BGP_ERR_HEADER,
896 BGP_ERR_HDR_NOT_SYNC);
897
898 return 0;
899 }
900 }
901
902 if (peer->state != OpenSent)
903 {
904 LOG(1, 0, 0, "OPEN from BGP peer %s in %s state\n",
905 peer->name, bgp_state_str(peer->state));
906
907 bgp_send_notification(peer, BGP_ERR_FSM, 0);
908 return 0;
909 }
910
911 memcpy(&data, p->data, len - sizeof(p->header));
912
913 if (data.version != BGP_VERSION)
914 {
915 LOG(1, 0, 0, "Bad version (%d) sent by BGP peer %s\n",
916 (int) data.version, peer->name);
917
918 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_VERSION);
919 return 0;
920 }
921
922 if (ntohs(data.as) != peer->as)
923 {
924 LOG(1, 0, 0, "Bad AS sent by BGP peer %s (got %d, "
925 "expected %d)\n", peer->name, (int) htons(data.as),
926 (int) peer->as);
927
928 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_BAD_AS);
929 return 0;
930 }
931
932 if ((hold = ntohs(data.hold_time)) < 3)
933 {
934 LOG(1, 0, 0, "Bad hold time (%d) from BGP peer %s\n",
935 hold, peer->name);
936
937 bgp_send_notification(peer, BGP_ERR_OPEN, BGP_ERR_OPN_HOLD_TIME);
938 return 0;
939 }
940
941 /* pick lowest hold time */
942 if (hold < peer->hold)
943 peer->hold = hold;
944
945 /* adjust our keepalive based on negotiated hold value */
946 if (peer->keepalive * 3 > peer->hold)
947 peer->keepalive = peer->hold / 3;
948
949 /* next transition requires an exchange of keepalives */
950 bgp_send_keepalive(peer);
951
952 /* FIXME: may need to check for optional params */
953 }
954
955 break;
956
957 case BGP_MSG_KEEPALIVE:
958 if (peer->state == OpenConfirm)
959 {
960 peer->state = peer->next_state = Established;
961 peer->state_time = time_now;
962 peer->keepalive_time = time_now + peer->keepalive;
963 peer->update_routes = 1;
964 peer->retry_count = 0;
965 peer->retry_time = 0;
966
967 LOG(4, 0, 0, "BGP peer %s: state Established\n", peer->name);
968 }
969
970 break;
971
972 case BGP_MSG_NOTIFICATION:
973 if (len > sizeof(p->header))
974 {
975 struct bgp_data_notification *notification =
976 (struct bgp_data_notification *) p->data;
977
978 if (notification->error_code == BGP_ERR_CEASE)
979 {
980 LOG(4, 0, 0, "BGP peer %s sent CEASE\n", peer->name);
981 bgp_restart(peer);
982 return 0;
983 }
984
985 /* FIXME: should handle more notifications */
986 LOG(4, 0, 0, "BGP peer %s sent unhandled NOTIFICATION %d\n",
987 peer->name, (int) notification->error_code);
988 }
989
990 break;
991 }
992
993 /* reset timer */
994 peer->expire_time = time_now + peer->hold;
995
996 /* see if there's another message in the same packet/buffer */
997 if (peer->inbuf->done > len)
998 {
999 peer->inbuf->done -= len;
1000 memmove(p, (char *) p + len, peer->inbuf->done);
1001 }
1002 else
1003 {
1004 peer->inbuf->packet.header.len = 0;
1005 peer->inbuf->done = 0;
1006 }
1007
1008 return peer->inbuf->done;
1009 }
1010
1011 /* send/buffer OPEN message */
1012 static int bgp_send_open(struct bgp_peer *peer)
1013 {
1014 struct bgp_data_open data;
1015 uint16_t len = sizeof(peer->outbuf->packet.header);
1016
1017 memset(peer->outbuf->packet.header.marker, 0xff,
1018 sizeof(peer->outbuf->packet.header.marker));
1019
1020 peer->outbuf->packet.header.type = BGP_MSG_OPEN;
1021
1022 data.version = BGP_VERSION;
1023 data.as = htons(our_as);
1024 data.hold_time = htons(peer->hold);
1025 data.identifier = my_address;
1026 data.opt_len = 0;
1027
1028 memcpy(peer->outbuf->packet.data, &data, BGP_DATA_OPEN_SIZE);
1029 len += BGP_DATA_OPEN_SIZE;
1030
1031 peer->outbuf->packet.header.len = htons(len);
1032 peer->outbuf->done = 0;
1033 peer->next_state = OpenSent;
1034
1035 return bgp_write(peer);
1036 }
1037
1038 /* send/buffer KEEPALIVE message */
1039 static int bgp_send_keepalive(struct bgp_peer *peer)
1040 {
1041 memset(peer->outbuf->packet.header.marker, 0xff,
1042 sizeof(peer->outbuf->packet.header.marker));
1043
1044 peer->outbuf->packet.header.type = BGP_MSG_KEEPALIVE;
1045 peer->outbuf->packet.header.len =
1046 htons(sizeof(peer->outbuf->packet.header));
1047
1048 peer->outbuf->done = 0;
1049 peer->next_state = (peer->state == OpenSent) ? OpenConfirm : peer->state;
1050
1051 return bgp_write(peer);
1052 }
1053
1054 /* send/buffer UPDATE message */
1055 static int bgp_send_update(struct bgp_peer *peer)
1056 {
1057 uint16_t unf_len = 0;
1058 uint16_t attr_len;
1059 uint16_t len = sizeof(peer->outbuf->packet.header);
1060 struct bgp_route_list *have = peer->routes;
1061 struct bgp_route_list *want = peer->routing ? bgp_routes : 0;
1062 struct bgp_route_list *e = 0;
1063 struct bgp_route_list *add = 0;
1064 int s;
1065
1066 char *data = (char *) &peer->outbuf->packet.data;
1067
1068 /* need leave room for attr_len, bgp_path_attrs and one prefix */
1069 char *max = (char *) &peer->outbuf->packet.data
1070 + sizeof(peer->outbuf->packet.data)
1071 - sizeof(attr_len) - peer->path_attr_len - sizeof(struct bgp_ip_prefix);
1072
1073 /* skip over unf_len */
1074 data += sizeof(unf_len);
1075 len += sizeof(unf_len);
1076
1077 memset(peer->outbuf->packet.header.marker, 0xff,
1078 sizeof(peer->outbuf->packet.header.marker));
1079
1080 peer->outbuf->packet.header.type = BGP_MSG_UPDATE;
1081
1082 peer->update_routes = 0; /* tentatively clear */
1083
1084 /* find differences */
1085 while ((have || want) && data < (max - sizeof(struct bgp_ip_prefix)))
1086 {
1087 if (have)
1088 s = want
1089 ? memcmp(&have->dest, &want->dest, sizeof(have->dest))
1090 : -1;
1091 else
1092 s = 1;
1093
1094 if (s < 0) /* found one to delete */
1095 {
1096 struct bgp_route_list *tmp = have;
1097 have = have->next;
1098
1099 s = BGP_IP_PREFIX_SIZE(tmp->dest);
1100 memcpy(data, &tmp->dest, s);
1101 data += s;
1102 unf_len += s;
1103 len += s;
1104
1105 LOG(5, 0, 0, "Withdrawing route %s/%d from BGP peer %s\n",
1106 fmtaddr(tmp->dest.prefix, 0), tmp->dest.len, peer->name);
1107
1108 free(tmp);
1109
1110 if (e)
1111 e->next = have;
1112 else
1113 peer->routes = have;
1114 }
1115 else
1116 {
1117 if (!s) /* same */
1118 {
1119 e = have; /* stash the last found to relink above */
1120 have = have->next;
1121 want = want->next;
1122 }
1123 else if (s > 0) /* addition reqd. */
1124 {
1125 if (add)
1126 {
1127 peer->update_routes = 1; /* only one add per packet */
1128 if (!have)
1129 break;
1130 }
1131 else
1132 add = want;
1133
1134 if (want)
1135 want = want->next;
1136 }
1137 }
1138 }
1139
1140 if (have || want)
1141 peer->update_routes = 1; /* more to do */
1142
1143 /* anything changed? */
1144 if (!(unf_len || add))
1145 return 1;
1146
1147 /* go back and insert unf_len */
1148 unf_len = htons(unf_len);
1149 memcpy(&peer->outbuf->packet.data, &unf_len, sizeof(unf_len));
1150
1151 if (add)
1152 {
1153 if (!(e = malloc(sizeof(*e))))
1154 {
1155 LOG(0, 0, 0, "Can't allocate route for %s/%d (%s)\n",
1156 fmtaddr(add->dest.prefix, 0), add->dest.len, strerror(errno));
1157
1158 return 0;
1159 }
1160
1161 memcpy(e, add, sizeof(*e));
1162 e->next = 0;
1163 peer->routes = bgp_insert_route(peer->routes, e);
1164
1165 attr_len = htons(peer->path_attr_len);
1166 memcpy(data, &attr_len, sizeof(attr_len));
1167 data += sizeof(attr_len);
1168 len += sizeof(attr_len);
1169
1170 memcpy(data, peer->path_attrs, peer->path_attr_len);
1171 data += peer->path_attr_len;
1172 len += peer->path_attr_len;
1173
1174 s = BGP_IP_PREFIX_SIZE(add->dest);
1175 memcpy(data, &add->dest, s);
1176 data += s;
1177 len += s;
1178
1179 LOG(5, 0, 0, "Advertising route %s/%d to BGP peer %s\n",
1180 fmtaddr(add->dest.prefix, 0), add->dest.len, peer->name);
1181 }
1182 else
1183 {
1184 attr_len = 0;
1185 memcpy(data, &attr_len, sizeof(attr_len));
1186 data += sizeof(attr_len);
1187 len += sizeof(attr_len);
1188 }
1189
1190 peer->outbuf->packet.header.len = htons(len);
1191 peer->outbuf->done = 0;
1192
1193 return bgp_write(peer);
1194 }
1195
1196 /* send/buffer NOTIFICATION message */
1197 static int bgp_send_notification(struct bgp_peer *peer, uint8_t code,
1198 uint8_t subcode)
1199 {
1200 struct bgp_data_notification data;
1201 uint16_t len = 0;
1202
1203 data.error_code = code;
1204 len += sizeof(data.error_code);
1205
1206 data.error_subcode = subcode;
1207 len += sizeof(data.error_code);
1208
1209 memset(peer->outbuf->packet.header.marker, 0xff,
1210 sizeof(peer->outbuf->packet.header.marker));
1211
1212 peer->outbuf->packet.header.type = BGP_MSG_NOTIFICATION;
1213 peer->outbuf->packet.header.len =
1214 htons(sizeof(peer->outbuf->packet.header) + len);
1215
1216 memcpy(peer->outbuf->packet.data, &data, len);
1217
1218 peer->outbuf->done = 0;
1219 peer->next_state = code == BGP_ERR_CEASE ? Disabled : Idle;
1220
1221 /* we're dying; ignore any pending input */
1222 peer->inbuf->packet.header.len = 0;
1223 peer->inbuf->done = 0;
1224
1225 return bgp_write(peer);
1226 }