1 | /* |
2 | This file is part of Mtproto-proxy Library. |
3 | |
4 | Mtproto-proxy Library is free software: you can redistribute it and/or modify |
5 | it under the terms of the GNU Lesser General Public License as published by |
6 | the Free Software Foundation, either version 2 of the License, or |
7 | (at your option) any later version. |
8 | |
9 | Mtproto-proxy Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | GNU Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public License |
15 | along with Mtproto-proxy Library. If not, see <http://www.gnu.org/licenses/>. |
16 | |
17 | Copyright 2009-2013 Vkontakte Ltd |
18 | 2008-2013 Nikolai Durov |
19 | 2008-2013 Andrey Lopatin |
20 | |
21 | Copyright 2014-2016 Telegram Messenger Inc |
22 | 2016 Vitaly Valtman |
23 | */ |
24 | |
25 | #include <arpa/inet.h> |
26 | #include <assert.h> |
27 | #include <errno.h> |
28 | #include <fcntl.h> |
29 | #include <ifaddrs.h> |
30 | #include <limits.h> |
31 | #include <netinet/in.h> |
32 | #include <netinet/tcp.h> |
33 | #include <pwd.h> |
34 | #include <signal.h> |
35 | #include <stdio.h> |
36 | #include <stdlib.h> |
37 | #include <string.h> |
38 | #include <sys/epoll.h> |
39 | #include <sys/io.h> |
40 | #include <sys/socket.h> |
41 | #include <time.h> |
42 | #include <unistd.h> |
43 | |
44 | #include "engine/engine.h" |
45 | #include "net/net-events.h" |
46 | #include "kprintf.h" |
47 | #include "precise-time.h" |
48 | #include "vv/vv-io.h" |
49 | |
50 | |
51 | /* |
52 | * generic events (epoll-based) machinery |
53 | */ |
54 | double tot_idle_time, a_idle_time, a_idle_quotient; |
55 | |
56 | volatile int main_thread_interrupt_status; |
57 | |
58 | event_t Events[MAX_EVENTS]; |
59 | int epoll_fd; |
60 | static long long ev_timestamp; |
61 | |
62 | static event_t *ev_heap[MAX_EVENTS+1]; |
63 | int ev_heap_size; |
64 | |
65 | long long epoll_calls; |
66 | long long epoll_intr; |
67 | long long event_timer_insert_ops; |
68 | long long event_timer_remove_ops; |
69 | |
70 | int epoll_remove (int fd); |
71 | |
72 | int init_epoll (void) { |
73 | int fd; |
74 | if (epoll_fd) { |
75 | return 0; |
76 | } |
77 | Events[0].fd = -1; |
78 | fd = epoll_create (MAX_EVENTS); |
79 | if (fd < 0) { |
80 | perror ("epoll_create()" ); |
81 | return -1; |
82 | } |
83 | epoll_fd = fd; |
84 | assert (fd > 0); |
85 | return fd; |
86 | } |
87 | |
88 | /* returns positive value if ev1 is greater than ev2 */ |
89 | /* since we use only "greater_ev(x,y) > 0" and "greater_ev(x,y) <= 0" compares, */ |
90 | /* it is unimportant to distinguish "x<y" and "x==y" cases */ |
91 | static int greater_ev (event_t *ev1, event_t *ev2) { |
92 | int x = ev1->priority - ev2->priority; |
93 | if (x) return x; |
94 | return (ev1->timestamp > ev2->timestamp) ? 1 : 0; |
95 | } |
96 | |
97 | static event_t *pop_heap_head (void) { |
98 | int i, j, N = ev_heap_size; |
99 | event_t *ev, *x, *y; |
100 | if (!N) return 0; |
101 | ev = ev_heap[1]; |
102 | assert (ev && ev->in_queue == 1); |
103 | ev->in_queue = 0; |
104 | if (!--ev_heap_size) return ev; |
105 | x = ev_heap[N--]; |
106 | i = 1; |
107 | while (1) { |
108 | j = (i << 1); |
109 | if (j > N) break; |
110 | if (j < N && greater_ev (ev_heap[j], ev_heap[j+1]) > 0) j++; |
111 | y = ev_heap[j]; |
112 | if (greater_ev (x, y) <= 0) break; |
113 | ev_heap[i] = y; |
114 | y->in_queue = i; |
115 | i = j; |
116 | } |
117 | ev_heap[i] = x; |
118 | x->in_queue = i; |
119 | return ev; |
120 | } |
121 | |
122 | int remove_event_from_heap (event_t *ev, int allow_hole) { |
123 | int v = ev->fd, i, j, N = ev_heap_size; |
124 | event_t *x; |
125 | assert (v >= 0 && v < MAX_EVENTS && Events + v == ev); |
126 | i = ev->in_queue; |
127 | if (!i) return 0; |
128 | assert (i > 0 && i <= N); |
129 | ev->in_queue = 0; |
130 | do { |
131 | j = (i << 1); |
132 | if (j > N) break; |
133 | if (j < N && greater_ev (ev_heap[j], ev_heap[j+1]) > 0) j++; |
134 | ev_heap[i] = x = ev_heap[j]; |
135 | x->in_queue = i; |
136 | i = j; |
137 | } while(1); |
138 | if (allow_hole) { |
139 | ev_heap[i] = 0; |
140 | return i; |
141 | } |
142 | if (i < N) { |
143 | ev = ev_heap[N]; |
144 | ev_heap[N] = 0; |
145 | while (i > 1) { |
146 | j = (i >> 1); |
147 | x = ev_heap[j]; |
148 | if (greater_ev (x, ev) <= 0) break; |
149 | ev_heap[i] = x; |
150 | x->in_queue = i; |
151 | i = j; |
152 | } |
153 | ev_heap[i] = ev; |
154 | ev->in_queue = i; |
155 | } |
156 | ev_heap_size--; |
157 | return N; |
158 | } |
159 | |
160 | int put_event_into_heap (event_t *ev) { |
161 | int v = ev->fd, i, j; |
162 | event_t *x; |
163 | assert (v >= 0 && v < MAX_EVENTS && Events + v == ev); |
164 | i = ev->in_queue ? remove_event_from_heap (ev, 1) : ++ev_heap_size; |
165 | assert (i <= MAX_EVENTS); |
166 | while (i > 1) { |
167 | j = (i >> 1); |
168 | x = ev_heap[j]; |
169 | if (greater_ev (x, ev) <= 0) break; |
170 | ev_heap[i] = x; |
171 | x->in_queue = i; |
172 | i = j; |
173 | } |
174 | ev_heap[i] = ev; |
175 | ev->in_queue = i; |
176 | return i; |
177 | } |
178 | |
179 | int put_event_into_heap_tail (event_t *ev, int ts_delta) { |
180 | ev->timestamp = ev_timestamp + ts_delta; |
181 | return put_event_into_heap (ev); |
182 | } |
183 | |
184 | int epoll_sethandler (int fd, int prio, event_handler_t handler, void *data) { |
185 | event_t *ev; |
186 | assert (fd >= 0 && fd < MAX_EVENTS); |
187 | ev = Events + fd; |
188 | if (ev->fd != fd) { |
189 | memset (ev, 0, sizeof (*ev)); |
190 | ev->fd = fd; |
191 | } |
192 | assert (!ev->refcnt); |
193 | __sync_fetch_and_add (&ev->refcnt, 1); |
194 | ev->priority = prio; |
195 | ev->data = data; |
196 | ev->work = handler; |
197 | return 0; |
198 | } |
199 | |
200 | int epoll_conv_flags (int flags) { |
201 | if (!flags) { |
202 | return 0; |
203 | } |
204 | int r = EPOLLERR; |
205 | |
206 | // no need |
207 | // it is always set |
208 | //if (!(flags & EVT_NOHUP)) { |
209 | // r |= EPOLLHUP; |
210 | //} |
211 | if (flags & EVT_READ) { |
212 | r |= EPOLLIN; |
213 | } |
214 | if (flags & EVT_WRITE) { |
215 | r |= EPOLLOUT; |
216 | } |
217 | if (flags & EVT_SPEC) { |
218 | r |= EPOLLRDHUP | EPOLLPRI; |
219 | } |
220 | if (!(flags & EVT_LEVEL)) { |
221 | r |= EPOLLET; |
222 | } |
223 | return r; |
224 | } |
225 | |
226 | int epoll_unconv_flags (int f) { |
227 | int r = EVT_FROM_EPOLL; |
228 | if (f & (EPOLLIN | EPOLLERR)) { |
229 | r |= EVT_READ; |
230 | } |
231 | if (f & EPOLLOUT) { |
232 | r |= EVT_WRITE; |
233 | } |
234 | if (f & (EPOLLRDHUP | EPOLLPRI)) { |
235 | r |= EVT_SPEC; |
236 | } |
237 | return r; |
238 | } |
239 | |
240 | int epoll_insert (int fd, int flags) { |
241 | event_t *ev; |
242 | int ef; |
243 | struct epoll_event ee; |
244 | if (!flags) { |
245 | return epoll_remove (fd); |
246 | } |
247 | assert (fd >= 0 && fd < MAX_EVENTS); |
248 | ev = Events + fd; |
249 | if (ev->fd != fd) { |
250 | memset (ev, 0, sizeof(event_t)); |
251 | ev->fd = fd; |
252 | } |
253 | flags &= EVT_NEW | EVT_NOHUP | EVT_LEVEL | EVT_RWX; |
254 | ev->ready = 0; // !!! this bugfix led to some AIO-related bugs, now fixed with the aid of C_REPARSE flag |
255 | if ((ev->state & (EVT_LEVEL | EVT_RWX | EVT_IN_EPOLL)) == flags + EVT_IN_EPOLL) { |
256 | return 0; |
257 | } |
258 | ev->state = (ev->state & ~(EVT_LEVEL | EVT_RWX)) | (flags & (EVT_LEVEL | EVT_RWX)); |
259 | ef = epoll_conv_flags (flags); |
260 | if (ef != ev->epoll_state || (flags & EVT_NEW) || !(ev->state & EVT_IN_EPOLL)) { |
261 | ev->epoll_state = ef; |
262 | memset (&ee, 0, sizeof (ee)); |
263 | ee.events = ef; |
264 | ee.data.fd = fd; |
265 | |
266 | vkprintf (2, "epoll_mod(%d,0x%08x,%d,%d,%08x)\n" , epoll_fd, ev->state, fd, ee.data.fd, ee.events); |
267 | |
268 | if (epoll_ctl (epoll_fd, (ev->state & EVT_IN_EPOLL) ? EPOLL_CTL_MOD : EPOLL_CTL_ADD, fd, &ee) < 0) { |
269 | vkprintf (0, "epoll_ctl(%d,0x%x,%d,%d,%08x): %m\n" , epoll_fd, ev->state, fd, ee.data.fd, ee.events); |
270 | } |
271 | ev->state |= EVT_IN_EPOLL; |
272 | } |
273 | return 0; |
274 | } |
275 | |
276 | int epoll_remove (int fd) { |
277 | event_t *ev; |
278 | assert (fd >= 0 && fd < MAX_EVENTS); |
279 | ev = Events + fd; |
280 | if (ev->fd != fd) { return -1; } |
281 | if (ev->state & EVT_IN_EPOLL) { |
282 | ev->state &= ~EVT_IN_EPOLL; |
283 | vkprintf (2, "epoll_del(%d,0x%08x,%d,%d,%08x)\n" , epoll_fd, EPOLL_CTL_DEL, fd, 0, 0); |
284 | if (epoll_ctl (epoll_fd, EPOLL_CTL_DEL, fd, 0) < 0) { |
285 | perror ("epoll_ctl(DEL)" ); |
286 | } |
287 | } |
288 | return 0; |
289 | } |
290 | |
291 | int epoll_close (int fd) { |
292 | event_t *ev; |
293 | assert (fd >= 0 && fd < MAX_EVENTS); |
294 | ev = Events + fd; |
295 | if (ev->fd != fd) { |
296 | return -1; |
297 | } |
298 | epoll_remove (fd); |
299 | if (ev->in_queue) { |
300 | remove_event_from_heap (ev, 0); |
301 | } |
302 | memset (ev, 0, sizeof (event_t)); |
303 | ev->fd = -1; |
304 | return 0; |
305 | } |
306 | |
307 | int thread_run_timers (void); |
308 | int epoll_run_timers (void) { |
309 | return thread_run_timers (); |
310 | } |
311 | |
312 | int term_signal_received (void) { |
313 | return signal_check_pending (SIGINT) || signal_check_pending (SIGTERM); |
314 | } |
315 | |
316 | int epoll_runqueue (void) { |
317 | event_t *ev; |
318 | int res, fd, cnt = 0; |
319 | if (!ev_heap_size) { |
320 | return 0; |
321 | } |
322 | |
323 | vkprintf (3, "epoll_runqueue: %d events\n" , ev_heap_size); |
324 | |
325 | ev_timestamp += 2; |
326 | while (ev_heap_size && (ev = ev_heap[1])->timestamp < ev_timestamp && !term_signal_received ()) { |
327 | pop_heap_head(); |
328 | fd = ev->fd; |
329 | assert (ev == Events + fd && fd >= 0 && fd < MAX_EVENTS); |
330 | if (ev->work) { |
331 | res = ev->work(fd, ev->data, ev); |
332 | } else { |
333 | res = EVA_REMOVE; |
334 | } |
335 | if (res == EVA_REMOVE || res == EVA_DESTROY || res <= EVA_ERROR) { |
336 | remove_event_from_heap (ev, 0); |
337 | epoll_remove (ev->fd); |
338 | if (res == EVA_DESTROY) { |
339 | if (!(ev->state & EVT_CLOSED)) { |
340 | close (ev->fd); |
341 | } |
342 | memset (ev, 0, sizeof(event_t)); |
343 | } |
344 | if (res <= EVA_FATAL) { |
345 | perror ("fatal" ); |
346 | exit(1); |
347 | } |
348 | } else if (res == EVA_RERUN) { |
349 | ev->timestamp = ev_timestamp; |
350 | put_event_into_heap (ev); |
351 | } else if (res > 0) { |
352 | epoll_insert (fd, res & 0xf); |
353 | } else if (res == EVA_CONTINUE) { |
354 | ev->ready = 0; |
355 | } |
356 | cnt++; |
357 | } |
358 | return cnt; |
359 | } |
360 | |
361 | double last_epoll_wait_at; |
362 | struct epoll_event new_ev_list[MAX_EVENTS]; |
363 | int epoll_sleep_ns = 0; |
364 | |
365 | int epoll_fetch_events (int timeout) { |
366 | epoll_calls ++; |
367 | int fd, i; |
368 | main_thread_interrupt_status = 1; |
369 | struct timespec ts; |
370 | ts.tv_sec = 0; |
371 | ts.tv_nsec = epoll_sleep_ns; |
372 | nanosleep (&ts, NULL); |
373 | int res = epoll_wait (epoll_fd, new_ev_list, MAX_EVENTS, timeout); |
374 | main_thread_interrupt_status = 0; |
375 | if (res < 0 && errno == EINTR) { |
376 | epoll_intr ++; |
377 | res = 0; |
378 | } |
379 | if (res < 0) { |
380 | perror ("epoll_wait()" ); |
381 | } |
382 | if (verbosity > 2 && res) { |
383 | kprintf ("epoll_wait(%d, ...) = %d\n" , epoll_fd, res); |
384 | } |
385 | for (i = 0; i < res; i++) { |
386 | fd = new_ev_list[i].data.fd; |
387 | assert (fd >= 0 && fd < MAX_EVENTS); |
388 | event_t *ev = Events + fd; |
389 | assert (ev->fd == fd); |
390 | ev->ready |= epoll_unconv_flags (ev->epoll_ready = new_ev_list[i].events); |
391 | ev->timestamp = ev_timestamp; |
392 | put_event_into_heap (ev); |
393 | } |
394 | return res; |
395 | } |
396 | |
397 | void jobs_check_all_timers (void); |
398 | int epoll_work (int timeout) { |
399 | int timeout2 = 10000; |
400 | if (1) { |
401 | now = time (0); |
402 | get_utime_monotonic (); |
403 | do { |
404 | epoll_runqueue (); |
405 | timeout2 = epoll_run_timers (); |
406 | } while ((timeout2 <= 0 || ev_heap_size) && !term_signal_received ()); |
407 | } |
408 | if (term_signal_received ()) { |
409 | return 0; |
410 | } |
411 | |
412 | double epoll_wait_start = get_utime_monotonic (); |
413 | |
414 | epoll_fetch_events (1); |
415 | |
416 | last_epoll_wait_at = get_utime_monotonic (); |
417 | double epoll_wait_time = last_epoll_wait_at - epoll_wait_start; |
418 | tot_idle_time += epoll_wait_time; |
419 | a_idle_time += epoll_wait_time; |
420 | |
421 | now = time (0); |
422 | static int prev_now = 0; |
423 | if (now > prev_now && now < prev_now + 60) { |
424 | while (prev_now < now) { |
425 | a_idle_time *= 100.0 / 101; |
426 | a_idle_quotient = a_idle_quotient * (100.0/101) + 1; |
427 | prev_now++; |
428 | } |
429 | } else { |
430 | prev_now = now; |
431 | } |
432 | |
433 | epoll_run_timers (); |
434 | |
435 | jobs_check_all_timers (); |
436 | return epoll_runqueue(); |
437 | } |
438 | |
439 | // ------- end of definitions ---------- |
440 | |
441 | /* |
442 | * end (events) |
443 | */ |
444 | |
445 | |
446 | // From memcached.c: socket functions |
447 | |
448 | int new_socket (int mode, int nonblock) { |
449 | int socket_fd; |
450 | int flags; |
451 | |
452 | if ((socket_fd = socket (mode & SM_IPV6 ? AF_INET6 : AF_INET, mode & SM_UDP ? SOCK_DGRAM : SOCK_STREAM, 0)) == -1) { |
453 | perror ("socket()" ); |
454 | return -1; |
455 | } |
456 | |
457 | if (mode & SM_IPV6) { |
458 | flags = (mode & SM_IPV6_ONLY) != 0; |
459 | if (setsockopt (socket_fd, IPPROTO_IPV6, IPV6_V6ONLY, &flags, 4) < 0) { |
460 | perror ("setting IPV6_V6ONLY" ); |
461 | close (socket_fd); |
462 | return -1; |
463 | } |
464 | } |
465 | |
466 | if (!nonblock) { |
467 | return socket_fd; |
468 | } |
469 | |
470 | if ((flags = fcntl (socket_fd, F_GETFL, 0)) < 0 || fcntl (socket_fd, F_SETFL, flags | O_NONBLOCK) < 0) { |
471 | perror ("setting O_NONBLOCK" ); |
472 | close (socket_fd); |
473 | return -1; |
474 | } |
475 | return socket_fd; |
476 | } |
477 | |
478 | |
479 | /* |
480 | * Sets a socket's send buffer size to the maximum allowed by the system. |
481 | */ |
482 | void maximize_sndbuf (int socket_fd, int max) { |
483 | socklen_t intsize = sizeof(int); |
484 | int last_good = 0; |
485 | int min, avg; |
486 | int old_size; |
487 | |
488 | if (max <= 0) { |
489 | max = MAX_UDP_SENDBUF_SIZE; |
490 | } |
491 | |
492 | /* Start with the default size. */ |
493 | if (getsockopt (socket_fd, SOL_SOCKET, SO_SNDBUF, &old_size, &intsize)) { |
494 | if (verbosity > 0) { |
495 | perror ("getsockopt (SO_SNDBUF)" ); |
496 | } |
497 | return; |
498 | } |
499 | |
500 | /* Binary-search for the real maximum. */ |
501 | min = last_good = old_size; |
502 | max = MAX_UDP_SENDBUF_SIZE; |
503 | |
504 | while (min <= max) { |
505 | avg = ((unsigned int) min + max) / 2; |
506 | if (setsockopt (socket_fd, SOL_SOCKET, SO_SNDBUF, &avg, intsize) == 0) { |
507 | last_good = avg; |
508 | min = avg + 1; |
509 | } else { |
510 | max = avg - 1; |
511 | } |
512 | } |
513 | |
514 | vkprintf (2, "<%d send buffer was %d, now %d\n" , socket_fd, old_size, last_good); |
515 | } |
516 | |
517 | /* |
518 | * Sets a socket's receive buffer size to the maximum allowed by the system. |
519 | */ |
520 | void maximize_rcvbuf (int socket_fd, int max) { |
521 | socklen_t intsize = sizeof(int); |
522 | int last_good = 0; |
523 | int min, avg; |
524 | int old_size; |
525 | |
526 | if (max <= 0) { |
527 | max = MAX_UDP_RCVBUF_SIZE; |
528 | } |
529 | |
530 | /* Start with the default size. */ |
531 | if (getsockopt (socket_fd, SOL_SOCKET, SO_RCVBUF, &old_size, &intsize)) { |
532 | if (verbosity > 0) { |
533 | perror ("getsockopt (SO_RCVBUF)" ); |
534 | } |
535 | return; |
536 | } |
537 | |
538 | /* Binary-search for the real maximum. */ |
539 | min = last_good = old_size; |
540 | max = MAX_UDP_RCVBUF_SIZE; |
541 | |
542 | while (min <= max) { |
543 | avg = ((unsigned int) min + max) / 2; |
544 | if (setsockopt (socket_fd, SOL_SOCKET, SO_RCVBUF, &avg, intsize) == 0) { |
545 | last_good = avg; |
546 | min = avg + 1; |
547 | } else { |
548 | max = avg - 1; |
549 | } |
550 | } |
551 | |
552 | vkprintf (2, ">%d receive buffer was %d, now %d\n" , socket_fd, old_size, last_good); |
553 | } |
554 | |
555 | int tcp_maximize_buffers; |
556 | struct in_addr settings_addr; |
557 | |
558 | int server_socket (int port, struct in_addr in_addr, int backlog, int mode) { |
559 | int socket_fd; |
560 | struct linger ling = {0, 0}; |
561 | int flags = 1; |
562 | |
563 | if ((socket_fd = new_socket (mode, 1)) == -1) { |
564 | return -1; |
565 | } |
566 | |
567 | if (mode & SM_UDP) { |
568 | maximize_sndbuf (socket_fd, 0); |
569 | maximize_rcvbuf (socket_fd, 0); |
570 | setsockopt (socket_fd, SOL_IP, IP_RECVERR, &flags, sizeof (flags)); |
571 | |
572 | } else { |
573 | setsockopt (socket_fd, SOL_SOCKET, SO_REUSEADDR, &flags, sizeof (flags)); |
574 | if (tcp_maximize_buffers) { |
575 | maximize_sndbuf (socket_fd, 0); |
576 | maximize_rcvbuf (socket_fd, 0); |
577 | } |
578 | assert (setsockopt (socket_fd, SOL_SOCKET, SO_KEEPALIVE, &flags, sizeof (flags)) >= 0); |
579 | assert (flags == 1); |
580 | setsockopt (socket_fd, SOL_SOCKET, SO_LINGER, &ling, sizeof (ling)); |
581 | setsockopt (socket_fd, IPPROTO_TCP, TCP_NODELAY, &flags, sizeof (flags)); |
582 | |
583 | int x = 40; |
584 | assert (setsockopt (socket_fd, IPPROTO_TCP, TCP_KEEPIDLE, &x, sizeof (x)) >= 0); |
585 | assert (setsockopt (socket_fd, IPPROTO_TCP, TCP_KEEPINTVL, &x, sizeof (x)) >= 0); |
586 | x = 5; |
587 | assert (setsockopt (socket_fd, IPPROTO_TCP, TCP_KEEPCNT, &x, sizeof (x)) >= 0); |
588 | } |
589 | |
590 | if (mode & SM_REUSE) { |
591 | setsockopt (socket_fd, SOL_SOCKET, SO_REUSEADDR, &flags, sizeof (flags)); |
592 | } |
593 | |
594 | if (!(mode & SM_IPV6)) { |
595 | struct sockaddr_in addr; |
596 | memset (&addr, 0, sizeof (addr)); |
597 | |
598 | addr.sin_family = AF_INET; |
599 | addr.sin_port = htons (port); |
600 | addr.sin_addr = in_addr; |
601 | if (bind (socket_fd, (struct sockaddr *) &addr, sizeof (addr)) == -1) { |
602 | perror ("bind()" ); |
603 | close (socket_fd); |
604 | return -1; |
605 | } |
606 | } else { |
607 | struct sockaddr_in6 addr; |
608 | memset (&addr, 0, sizeof (addr)); |
609 | |
610 | addr.sin6_family = AF_INET6; |
611 | addr.sin6_port = htons (port); |
612 | addr.sin6_addr = in6addr_any; |
613 | |
614 | if (bind (socket_fd, (struct sockaddr *) &addr, sizeof (addr)) == -1) { |
615 | perror ("bind()" ); |
616 | close (socket_fd); |
617 | return -1; |
618 | } |
619 | } |
620 | if (!(mode & SM_UDP) && listen (socket_fd, backlog) == -1) { |
621 | // perror("listen()"); |
622 | close (socket_fd); |
623 | return -1; |
624 | } |
625 | return socket_fd; |
626 | } |
627 | |
628 | int client_socket (in_addr_t in_addr, int port, int mode) { |
629 | int socket_fd; |
630 | struct sockaddr_in addr; |
631 | int flags = 1; |
632 | |
633 | if (mode & SM_IPV6) { |
634 | return -1; |
635 | } |
636 | |
637 | if ((socket_fd = new_socket (mode, 1)) == -1) { |
638 | return -1; |
639 | } |
640 | |
641 | if (mode & SM_UDP) { |
642 | maximize_sndbuf (socket_fd, 0); |
643 | maximize_rcvbuf (socket_fd, 0); |
644 | setsockopt (socket_fd, SOL_IP, IP_RECVERR, &flags, sizeof (flags)); |
645 | } else { |
646 | setsockopt (socket_fd, SOL_SOCKET, SO_REUSEADDR, &flags, sizeof (flags)); |
647 | if (tcp_maximize_buffers) { |
648 | maximize_sndbuf (socket_fd, 0); |
649 | maximize_rcvbuf (socket_fd, 0); |
650 | } |
651 | assert (setsockopt (socket_fd, SOL_SOCKET, SO_KEEPALIVE, &flags, sizeof (flags)) >= 0); |
652 | assert (flags == 1); |
653 | setsockopt (socket_fd, IPPROTO_TCP, TCP_NODELAY, &flags, sizeof (flags)); |
654 | |
655 | int x = 40; |
656 | assert (setsockopt (socket_fd, IPPROTO_TCP, TCP_KEEPIDLE, &x, sizeof (x)) >= 0); |
657 | assert (setsockopt (socket_fd, IPPROTO_TCP, TCP_KEEPINTVL, &x, sizeof (x)) >= 0); |
658 | x = 5; |
659 | assert (setsockopt (socket_fd, IPPROTO_TCP, TCP_KEEPCNT, &x, sizeof (x)) >= 0); |
660 | } |
661 | |
662 | if (!(mode & SM_IPV6)) { |
663 | engine_t *E = engine_state; |
664 | if (E && E->settings_addr.s_addr) { |
665 | struct sockaddr_in localaddr; |
666 | memset (&localaddr, 0, sizeof (localaddr)); |
667 | |
668 | localaddr.sin_family = AF_INET; |
669 | localaddr.sin_port = 0; |
670 | localaddr.sin_addr = E->settings_addr; |
671 | if (bind (socket_fd, (struct sockaddr *) &localaddr, sizeof (localaddr)) == -1) { |
672 | perror ("bind()" ); |
673 | close (socket_fd); |
674 | return -1; |
675 | } |
676 | } |
677 | } |
678 | |
679 | memset (&addr, 0, sizeof (addr)); |
680 | |
681 | addr.sin_family = AF_INET; |
682 | addr.sin_port = htons (port); |
683 | addr.sin_addr.s_addr = in_addr; |
684 | |
685 | if (connect (socket_fd, (struct sockaddr *) &addr, sizeof (addr)) == -1 && errno != EINPROGRESS) { |
686 | perror ("connect()" ); |
687 | close (socket_fd); |
688 | return -1; |
689 | } |
690 | |
691 | return socket_fd; |
692 | |
693 | } |
694 | |
695 | int client_socket_ipv6 (const unsigned char in6_addr_ptr[16], int port, int mode) { |
696 | int socket_fd; |
697 | struct sockaddr_in6 addr; |
698 | int flags = 1; |
699 | |
700 | if (!(mode & SM_IPV6)) { |
701 | return -1; |
702 | } |
703 | |
704 | if ((socket_fd = new_socket (mode, 1)) == -1) { |
705 | return -1; |
706 | } |
707 | |
708 | if (mode & SM_UDP) { |
709 | maximize_sndbuf (socket_fd, 0); |
710 | maximize_rcvbuf (socket_fd, 0); |
711 | } else { |
712 | setsockopt (socket_fd, SOL_SOCKET, SO_REUSEADDR, &flags, sizeof (flags)); |
713 | if (tcp_maximize_buffers) { |
714 | maximize_sndbuf (socket_fd, 0); |
715 | maximize_rcvbuf (socket_fd, 0); |
716 | } |
717 | setsockopt (socket_fd, SOL_SOCKET, SO_KEEPALIVE, &flags, sizeof (flags)); |
718 | setsockopt (socket_fd, IPPROTO_TCP, TCP_NODELAY, &flags, sizeof (flags)); |
719 | } |
720 | |
721 | memset (&addr, 0, sizeof (addr)); |
722 | |
723 | addr.sin6_family = AF_INET6; |
724 | addr.sin6_port = htons (port); |
725 | memcpy (&addr.sin6_addr, in6_addr_ptr, 16); |
726 | |
727 | if (connect (socket_fd, (struct sockaddr *) &addr, sizeof (addr)) == -1 && errno != EINPROGRESS) { |
728 | perror ("connect()" ); |
729 | close (socket_fd); |
730 | return -1; |
731 | } |
732 | |
733 | return socket_fd; |
734 | |
735 | } |
736 | |
737 | unsigned get_my_ipv4 (void) { |
738 | struct ifaddrs *ifa_first, *ifa; |
739 | unsigned my_ip = 0, my_netmask = -1; |
740 | char *my_iface = 0; |
741 | if (getifaddrs (&ifa_first) < 0) { |
742 | perror ("getifaddrs()" ); |
743 | return 0; |
744 | } |
745 | for (ifa = ifa_first; ifa; ifa = ifa->ifa_next) { |
746 | if (ifa->ifa_addr == NULL || ifa->ifa_addr->sa_family != AF_INET) { |
747 | continue; |
748 | } |
749 | if (!strncmp (ifa->ifa_name, "lo" , 2)) { |
750 | continue; |
751 | } |
752 | unsigned ip = ntohl (((struct sockaddr_in *) ifa->ifa_addr)->sin_addr.s_addr); |
753 | unsigned mask = ntohl (((struct sockaddr_in *) ifa->ifa_netmask)->sin_addr.s_addr); |
754 | // fprintf (stderr, "%08x %08x\t%s\n", ip, mask, ifa->ifa_name); |
755 | if ((ip & (-1 << 24)) == (10 << 24) && (mask < my_netmask || (my_ip >> 24) != 10)) { |
756 | my_ip = ip; |
757 | my_netmask = mask; |
758 | my_iface = ifa->ifa_name; |
759 | } else if ((ip & (-1 << 24)) != (127 << 24) && mask < my_netmask && (my_ip >> 24) != 10) { |
760 | my_ip = ip; |
761 | my_netmask = mask; |
762 | my_iface = ifa->ifa_name; |
763 | } |
764 | } |
765 | vkprintf (1, "using main IP %d.%d.%d.%d/%d at interface %s\n" , (my_ip >> 24), (my_ip >> 16) & 255, (my_ip >> 8) & 255, my_ip & 255, |
766 | __builtin_clz (~my_netmask), my_iface ?: "(none)" ); |
767 | freeifaddrs (ifa_first); |
768 | return my_ip; |
769 | } |
770 | |
771 | int get_my_ipv6 (unsigned char ipv6[16]) { |
772 | struct ifaddrs *ifa_first, *ifa; |
773 | char *my_iface = 0; |
774 | unsigned char ip[16]; |
775 | unsigned char mask[16]; |
776 | memset (mask, 0, sizeof (mask)); |
777 | if (getifaddrs (&ifa_first) < 0) { |
778 | perror ("getifaddrs()" ); |
779 | return 0; |
780 | } |
781 | int found_auto = 0; |
782 | for (ifa = ifa_first; ifa; ifa = ifa->ifa_next) { |
783 | if (ifa->ifa_addr == NULL || ifa->ifa_addr->sa_family != AF_INET6) { |
784 | continue; |
785 | } |
786 | memcpy (ip, &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr, 16); |
787 | vkprintf (2, "test IP " IPV6_PRINT_STR " at interface %s\n" , IPV6_TO_PRINT (ip), ifa->ifa_name); |
788 | |
789 | if ((ip[0] & 0xf0) != 0x30 && (ip[0] & 0xf0) != 0x20) { |
790 | vkprintf (2, "not a global ipv6 address\n" ); |
791 | continue; |
792 | } |
793 | |
794 | if (ip[11] == 0xff && ip[12] == 0xfe && (ip[8] & 2)) { |
795 | if (found_auto) { continue; } |
796 | my_iface = ifa->ifa_name; |
797 | memcpy (ipv6, ip, 16); |
798 | memcpy (mask, &((struct sockaddr_in6 *)ifa->ifa_netmask)->sin6_addr, 16); |
799 | found_auto = 1; |
800 | } else { |
801 | my_iface = ifa->ifa_name; |
802 | memcpy (ipv6, ip, 16); |
803 | memcpy (mask, &((struct sockaddr_in6 *)ifa->ifa_netmask)->sin6_addr, 16); |
804 | break; |
805 | } |
806 | } |
807 | int m = 0; |
808 | while (m < 128 && mask[m / 8] == 0xff) { m += 8; } |
809 | if (m < 128) { |
810 | unsigned char c = mask[m / 8]; |
811 | while (c & 1) { |
812 | c /= 2; |
813 | m ++; |
814 | } |
815 | } |
816 | vkprintf (1, "using main IP " IPV6_PRINT_STR "/%d at interface %s\n" , IPV6_TO_PRINT (ipv6), m, my_iface); |
817 | freeifaddrs (ifa_first); |
818 | return 1; |
819 | } |
820 | |
821 | /* IPv4/IPv6 address formatting functions */ |
822 | |
823 | const char *conv_addr (in_addr_t a, char *buf) { |
824 | static char abuf[64]; |
825 | if (!buf) { |
826 | buf = abuf; |
827 | } |
828 | sprintf (buf, "%d.%d.%d.%d" , a&255, (a>>8)&255, (a>>16)&255, a>>24); |
829 | return buf; |
830 | } |
831 | |
832 | int conv_ipv6_internal (const unsigned short a[8], char *buf) { |
833 | int i, j = 0, k = 0, l = 0; |
834 | for (i = 0; i < 8; i++) { |
835 | if (a[i]) { |
836 | if (j > l) { |
837 | l = j; |
838 | k = i; |
839 | } |
840 | j = 0; |
841 | } else { |
842 | j++; |
843 | } |
844 | } |
845 | if (j == 8) { |
846 | memcpy (buf, "::" , 3); |
847 | return 2; |
848 | } |
849 | if (l == 5 && a[5] == 0xffff) { |
850 | return sprintf (buf, "::ffff:%d.%d.%d.%d" , a[6]&255, a[6]>>8, a[7]&255, a[7]>>8); |
851 | } |
852 | char *ptr = buf; |
853 | if (l) { |
854 | for (i = 0; i < k - l; i++) { |
855 | ptr += sprintf (ptr, "%x:" , ntohs (a[i])); |
856 | } |
857 | if (!i || k == 8) { |
858 | *ptr++ = ':'; |
859 | } |
860 | for (i = k; i < 8; i++) { |
861 | ptr += sprintf (ptr, ":%x" , ntohs (a[i])); |
862 | } |
863 | } else { |
864 | for (i = 0; i < 7; i++) { |
865 | ptr += sprintf (ptr, "%x:" , ntohs (a[i])); |
866 | } |
867 | ptr += sprintf (ptr, "%x" , ntohs (a[i])); |
868 | } |
869 | return ptr - buf; |
870 | } |
871 | |
872 | const char *conv_addr6 (const unsigned char a[16], char *buf) { |
873 | static char abuf[64]; |
874 | if (!buf) { |
875 | buf = abuf; |
876 | } |
877 | conv_ipv6_internal ((const unsigned short *) a, buf); |
878 | return buf; |
879 | } |
880 | |
881 | const char *show_ip (unsigned ip) { |
882 | static char abuf[256], *ptr = abuf; |
883 | char *res; |
884 | if (ptr > abuf + 200) { |
885 | ptr = abuf; |
886 | } |
887 | res = ptr; |
888 | ptr += sprintf (ptr, "%d.%d.%d.%d" , ip >> 24, (ip >> 16) & 0xff, (ip >> 8) & 0xff, ip & 0xff) + 1; |
889 | return res; |
890 | } |
891 | |
892 | const char *show_ipv6 (const unsigned char ipv6[16]) { |
893 | static char abuf[256], *ptr = abuf; |
894 | char *res; |
895 | if (ptr > abuf + 200) { |
896 | ptr = abuf; |
897 | } |
898 | res = ptr; |
899 | ptr += conv_ipv6_internal ((const unsigned short *) ipv6, ptr) + 1; |
900 | return res; |
901 | } |
902 | |
903 | |