2 * nghttp2 - HTTP/2 C Library
4 * Copyright (c) 2012 Tatsuhiro Tsujikawa
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice shall be
15 * included in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #include "shrpx_worker.h"
29 #endif // HAVE_UNISTD_H
34 #include <openssl/rand.h>
38 # include <bpf/libbpf.h>
41 #include "shrpx_tls.h"
42 #include "shrpx_log.h"
43 #include "shrpx_client_handler.h"
44 #include "shrpx_http2_session.h"
45 #include "shrpx_log_config.h"
46 #include "shrpx_memcached_dispatcher.h"
48 # include "shrpx_mruby.h"
51 # include "shrpx_quic_listener.h"
52 #endif // ENABLE_HTTP3
53 #include "shrpx_connection_handler.h"
56 #include "xsi_strerror.h"
61 void eventcb(struct ev_loop *loop, ev_async *w, int revents) {
62 auto worker = static_cast<Worker *>(w->data);
63 worker->process_events();
68 void mcpool_clear_cb(struct ev_loop *loop, ev_timer *w, int revents) {
69 auto worker = static_cast<Worker *>(w->data);
70 if (worker->get_worker_stat()->num_connections != 0) {
73 auto mcpool = worker->get_mcpool();
74 if (mcpool->freelistsize == mcpool->poolsize) {
75 worker->get_mcpool()->clear();
81 void proc_wev_cb(struct ev_loop *loop, ev_timer *w, int revents) {
82 auto worker = static_cast<Worker *>(w->data);
83 worker->process_events();
87 DownstreamAddrGroup::DownstreamAddrGroup() : retired{false} {}
89 DownstreamAddrGroup::~DownstreamAddrGroup() {}
91 // DownstreamKey is used to index SharedDownstreamAddr in order to
92 // find the same configuration.
94 std::tuple<std::vector<std::tuple<StringRef, StringRef, StringRef, size_t,
95 size_t, Proto, uint32_t, uint32_t,
96 uint32_t, bool, bool, bool, bool>>,
97 bool, SessionAffinity, StringRef, StringRef,
98 SessionAffinityCookieSecure, int64_t, int64_t, StringRef, bool>;
102 create_downstream_key(const std::shared_ptr<SharedDownstreamAddr> &shared_addr,
103 const StringRef &mruby_file) {
106 auto &addrs = std::get<0>(dkey);
107 addrs.resize(shared_addr->addrs.size());
108 auto p = std::begin(addrs);
109 for (auto &a : shared_addr->addrs) {
110 std::get<0>(*p) = a.host;
111 std::get<1>(*p) = a.sni;
112 std::get<2>(*p) = a.group;
113 std::get<3>(*p) = a.fall;
114 std::get<4>(*p) = a.rise;
115 std::get<5>(*p) = a.proto;
116 std::get<6>(*p) = a.port;
117 std::get<7>(*p) = a.weight;
118 std::get<8>(*p) = a.group_weight;
119 std::get<9>(*p) = a.host_unix;
120 std::get<10>(*p) = a.tls;
121 std::get<11>(*p) = a.dns;
122 std::get<12>(*p) = a.upgrade_scheme;
125 std::sort(std::begin(addrs), std::end(addrs));
127 std::get<1>(dkey) = shared_addr->redirect_if_not_tls;
129 auto &affinity = shared_addr->affinity;
130 std::get<2>(dkey) = affinity.type;
131 std::get<3>(dkey) = affinity.cookie.name;
132 std::get<4>(dkey) = affinity.cookie.path;
133 std::get<5>(dkey) = affinity.cookie.secure;
134 auto &timeout = shared_addr->timeout;
135 std::get<6>(dkey) = timeout.read;
136 std::get<7>(dkey) = timeout.write;
137 std::get<8>(dkey) = mruby_file;
138 std::get<9>(dkey) = shared_addr->dnf;
144 Worker::Worker(struct ev_loop *loop, SSL_CTX *sv_ssl_ctx, SSL_CTX *cl_ssl_ctx,
145 SSL_CTX *tls_session_cache_memcached_ssl_ctx,
146 tls::CertLookupTree *cert_tree,
148 SSL_CTX *quic_sv_ssl_ctx, tls::CertLookupTree *quic_cert_tree,
149 const uint8_t *cid_prefix, size_t cid_prefixlen,
152 # endif // HAVE_LIBBPF
153 #endif // ENABLE_HTTP3
154 const std::shared_ptr<TicketKeys> &ticket_keys,
155 ConnectionHandler *conn_handler,
156 std::shared_ptr<DownstreamConfig> downstreamconf)
158 #if defined(ENABLE_HTTP3) && defined(HAVE_LIBBPF)
160 #endif // ENABLE_HTTP3 && HAVE_LIBBPF
161 randgen_(util::make_mt19937()),
165 quic_upstream_addrs_{get_config()->conn.quic_listener.addrs},
166 #endif // ENABLE_HTTP3
168 sv_ssl_ctx_(sv_ssl_ctx),
169 cl_ssl_ctx_(cl_ssl_ctx),
170 cert_tree_(cert_tree),
171 conn_handler_(conn_handler),
173 quic_sv_ssl_ctx_{quic_sv_ssl_ctx},
174 quic_cert_tree_{quic_cert_tree},
175 quic_conn_handler_{this},
176 #endif // ENABLE_HTTP3
177 ticket_keys_(ticket_keys),
179 std::make_unique<ConnectBlocker>(randgen_, loop_, nullptr, nullptr)),
180 graceful_shutdown_(false) {
182 std::copy_n(cid_prefix, cid_prefixlen, std::begin(cid_prefix_));
183 #endif // ENABLE_HTTP3
185 ev_async_init(&w_, eventcb);
187 ev_async_start(loop_, &w_);
189 ev_timer_init(&mcpool_clear_timer_, mcpool_clear_cb, 0., 0.);
190 mcpool_clear_timer_.data = this;
192 ev_timer_init(&proc_wev_timer_, proc_wev_cb, 0., 0.);
193 proc_wev_timer_.data = this;
195 auto &session_cacheconf = get_config()->tls.session_cache;
197 if (!session_cacheconf.memcached.host.empty()) {
198 session_cache_memcached_dispatcher_ = std::make_unique<MemcachedDispatcher>(
199 &session_cacheconf.memcached.addr, loop,
200 tls_session_cache_memcached_ssl_ctx,
201 StringRef{session_cacheconf.memcached.host}, &mcpool_, randgen_);
204 replace_downstream_config(std::move(downstreamconf));
208 void ensure_enqueue_addr(
209 std::priority_queue<WeightGroupEntry, std::vector<WeightGroupEntry>,
210 WeightGroupEntryGreater> &wgpq,
211 WeightGroup *wg, DownstreamAddr *addr) {
213 if (!wg->pq.empty()) {
214 auto &top = wg->pq.top();
221 addr->pending_penalty = 0;
222 wg->pq.push(DownstreamAddrEntry{addr, addr->seq, addr->cycle});
227 auto &top = wgpq.top();
234 wg->pending_penalty = 0;
235 wgpq.push(WeightGroupEntry{wg, wg->seq, wg->cycle});
241 void Worker::replace_downstream_config(
242 std::shared_ptr<DownstreamConfig> downstreamconf) {
243 for (auto &g : downstream_addr_groups_) {
246 auto &shared_addr = g->shared_addr;
247 for (auto &addr : shared_addr->addrs) {
248 addr.dconn_pool->remove_all();
252 downstreamconf_ = downstreamconf;
254 // Making a copy is much faster with multiple thread on
255 // backendconfig API call.
256 auto groups = downstreamconf->addr_groups;
258 downstream_addr_groups_ =
259 std::vector<std::shared_ptr<DownstreamAddrGroup>>(groups.size());
261 std::map<DownstreamKey, size_t> addr_groups_indexer;
263 // TODO It is a bit less efficient because
264 // mruby::create_mruby_context returns std::unique_ptr and we cannot
265 // use std::make_shared.
266 std::map<StringRef, std::shared_ptr<mruby::MRubyContext>> shared_mruby_ctxs;
269 for (size_t i = 0; i < groups.size(); ++i) {
270 auto &src = groups[i];
271 auto &dst = downstream_addr_groups_[i];
273 dst = std::make_shared<DownstreamAddrGroup>();
275 ImmutableString{std::begin(src.pattern), std::end(src.pattern)};
277 auto shared_addr = std::make_shared<SharedDownstreamAddr>();
279 shared_addr->addrs.resize(src.addrs.size());
280 shared_addr->affinity.type = src.affinity.type;
281 if (src.affinity.type == SessionAffinity::COOKIE) {
282 shared_addr->affinity.cookie.name =
283 make_string_ref(shared_addr->balloc, src.affinity.cookie.name);
284 if (!src.affinity.cookie.path.empty()) {
285 shared_addr->affinity.cookie.path =
286 make_string_ref(shared_addr->balloc, src.affinity.cookie.path);
288 shared_addr->affinity.cookie.secure = src.affinity.cookie.secure;
290 shared_addr->affinity_hash = src.affinity_hash;
291 shared_addr->redirect_if_not_tls = src.redirect_if_not_tls;
292 shared_addr->dnf = src.dnf;
293 shared_addr->timeout.read = src.timeout.read;
294 shared_addr->timeout.write = src.timeout.write;
296 for (size_t j = 0; j < src.addrs.size(); ++j) {
297 auto &src_addr = src.addrs[j];
298 auto &dst_addr = shared_addr->addrs[j];
300 dst_addr.addr = src_addr.addr;
301 dst_addr.host = make_string_ref(shared_addr->balloc, src_addr.host);
303 make_string_ref(shared_addr->balloc, src_addr.hostport);
304 dst_addr.port = src_addr.port;
305 dst_addr.host_unix = src_addr.host_unix;
306 dst_addr.weight = src_addr.weight;
307 dst_addr.group = make_string_ref(shared_addr->balloc, src_addr.group);
308 dst_addr.group_weight = src_addr.group_weight;
309 dst_addr.proto = src_addr.proto;
310 dst_addr.tls = src_addr.tls;
311 dst_addr.sni = make_string_ref(shared_addr->balloc, src_addr.sni);
312 dst_addr.fall = src_addr.fall;
313 dst_addr.rise = src_addr.rise;
314 dst_addr.dns = src_addr.dns;
315 dst_addr.upgrade_scheme = src_addr.upgrade_scheme;
319 auto mruby_ctx_it = shared_mruby_ctxs.find(src.mruby_file);
320 if (mruby_ctx_it == std::end(shared_mruby_ctxs)) {
321 shared_addr->mruby_ctx = mruby::create_mruby_context(src.mruby_file);
322 assert(shared_addr->mruby_ctx);
323 shared_mruby_ctxs.emplace(src.mruby_file, shared_addr->mruby_ctx);
325 shared_addr->mruby_ctx = (*mruby_ctx_it).second;
329 // share the connection if patterns have the same set of backend
332 auto dkey = create_downstream_key(shared_addr, src.mruby_file);
333 auto it = addr_groups_indexer.find(dkey);
335 if (it == std::end(addr_groups_indexer)) {
336 std::shuffle(std::begin(shared_addr->addrs), std::end(shared_addr->addrs),
339 auto shared_addr_ptr = shared_addr.get();
341 for (auto &addr : shared_addr->addrs) {
342 addr.connect_blocker = std::make_unique<ConnectBlocker>(
343 randgen_, loop_, nullptr, [shared_addr_ptr, &addr]() {
348 ensure_enqueue_addr(shared_addr_ptr->pq, addr.wg, &addr);
352 addr.live_check = std::make_unique<LiveCheck>(loop_, cl_ssl_ctx_, this,
357 for (auto &addr : shared_addr->addrs) {
358 addr.dconn_pool = std::make_unique<DownstreamConnectionPool>();
362 if (shared_addr->affinity.type == SessionAffinity::NONE) {
363 std::map<StringRef, WeightGroup *> wgs;
365 for (auto &addr : shared_addr->addrs) {
366 if (wgs.find(addr.group) == std::end(wgs)) {
368 wgs.emplace(addr.group, nullptr);
372 shared_addr->wgs = std::vector<WeightGroup>(num_wgs);
374 for (auto &addr : shared_addr->addrs) {
375 auto &wg = wgs[addr.group];
377 wg = &shared_addr->wgs[--num_wgs];
381 wg->weight = addr.group_weight;
382 wg->pq.push(DownstreamAddrEntry{&addr, addr.seq, addr.cycle});
387 assert(num_wgs == 0);
389 for (auto &kv : wgs) {
390 shared_addr->pq.push(
391 WeightGroupEntry{kv.second, kv.second->seq, kv.second->cycle});
392 kv.second->queued = true;
396 dst->shared_addr = shared_addr;
398 addr_groups_indexer.emplace(std::move(dkey), i);
400 auto &g = *(std::begin(downstream_addr_groups_) + (*it).second);
401 if (LOG_ENABLED(INFO)) {
402 LOG(INFO) << dst->pattern << " shares the same backend group with "
405 dst->shared_addr = g->shared_addr;
411 ev_async_stop(loop_, &w_);
412 ev_timer_stop(loop_, &mcpool_clear_timer_);
413 ev_timer_stop(loop_, &proc_wev_timer_);
416 void Worker::schedule_clear_mcpool() {
417 // libev manual says: "If the watcher is already active nothing will
418 // happen." Since we don't change any timeout here, we don't have
419 // to worry about querying ev_is_active.
420 ev_timer_start(loop_, &mcpool_clear_timer_);
423 void Worker::wait() {
429 void Worker::run_async() {
431 fut_ = std::async(std::launch::async, [this] {
432 (void)reopen_log_files(get_config()->logging);
439 void Worker::send(WorkerEvent event) {
441 std::lock_guard<std::mutex> g(m_);
443 q_.emplace_back(std::move(event));
446 ev_async_send(loop_, &w_);
449 void Worker::process_events() {
452 std::lock_guard<std::mutex> g(m_);
454 // Process event one at a time. This is important for
455 // WorkerEventType::NEW_CONNECTION event since accepting large
456 // number of new connections at once may delay time to 1st byte
457 // for existing connections.
460 ev_timer_stop(loop_, &proc_wev_timer_);
464 wev = std::move(q_.front());
468 ev_timer_start(loop_, &proc_wev_timer_);
470 auto config = get_config();
472 auto worker_connections = config->conn.upstream.worker_connections;
475 case WorkerEventType::NEW_CONNECTION: {
476 if (LOG_ENABLED(INFO)) {
477 WLOG(INFO, this) << "WorkerEvent: client_fd=" << wev.client_fd
478 << ", addrlen=" << wev.client_addrlen;
481 if (worker_stat_.num_connections >= worker_connections) {
483 if (LOG_ENABLED(INFO)) {
484 WLOG(INFO, this) << "Too many connections >= " << worker_connections;
487 close(wev.client_fd);
492 auto client_handler =
493 tls::accept_connection(this, wev.client_fd, &wev.client_addr.sa,
494 wev.client_addrlen, wev.faddr);
495 if (!client_handler) {
496 if (LOG_ENABLED(INFO)) {
497 WLOG(ERROR, this) << "ClientHandler creation failed";
499 close(wev.client_fd);
503 if (LOG_ENABLED(INFO)) {
504 WLOG(INFO, this) << "CLIENT_HANDLER:" << client_handler << " created ";
509 case WorkerEventType::REOPEN_LOG:
510 WLOG(NOTICE, this) << "Reopening log files: worker process (thread " << this
513 reopen_log_files(config->logging);
516 case WorkerEventType::GRACEFUL_SHUTDOWN:
517 WLOG(NOTICE, this) << "Graceful shutdown commencing";
519 graceful_shutdown_ = true;
521 if (worker_stat_.num_connections == 0 &&
522 worker_stat_.num_close_waits == 0) {
529 case WorkerEventType::REPLACE_DOWNSTREAM:
530 WLOG(NOTICE, this) << "Replace downstream";
532 replace_downstream_config(wev.downstreamconf);
536 case WorkerEventType::QUIC_PKT_FORWARD: {
537 const UpstreamAddr *faddr;
539 if (wev.quic_pkt->upstream_addr_index == static_cast<size_t>(-1)) {
540 faddr = find_quic_upstream_addr(wev.quic_pkt->local_addr);
541 if (faddr == nullptr) {
542 LOG(ERROR) << "No suitable upstream address found";
546 } else if (quic_upstream_addrs_.size() <=
547 wev.quic_pkt->upstream_addr_index) {
548 LOG(ERROR) << "upstream_addr_index is too large";
552 faddr = &quic_upstream_addrs_[wev.quic_pkt->upstream_addr_index];
555 quic_conn_handler_.handle_packet(
556 faddr, wev.quic_pkt->remote_addr, wev.quic_pkt->local_addr,
557 wev.quic_pkt->data.data(), wev.quic_pkt->data.size());
561 #endif // ENABLE_HTTP3
563 if (LOG_ENABLED(INFO)) {
564 WLOG(INFO, this) << "unknown event type " << static_cast<int>(wev.type);
569 tls::CertLookupTree *Worker::get_cert_lookup_tree() const { return cert_tree_; }
572 tls::CertLookupTree *Worker::get_quic_cert_lookup_tree() const {
573 return quic_cert_tree_;
575 #endif // ENABLE_HTTP3
577 std::shared_ptr<TicketKeys> Worker::get_ticket_keys() {
578 #ifdef HAVE_ATOMIC_STD_SHARED_PTR
579 return std::atomic_load_explicit(&ticket_keys_, std::memory_order_acquire);
580 #else // !HAVE_ATOMIC_STD_SHARED_PTR
581 std::lock_guard<std::mutex> g(ticket_keys_m_);
583 #endif // !HAVE_ATOMIC_STD_SHARED_PTR
586 void Worker::set_ticket_keys(std::shared_ptr<TicketKeys> ticket_keys) {
587 #ifdef HAVE_ATOMIC_STD_SHARED_PTR
588 // This is single writer
589 std::atomic_store_explicit(&ticket_keys_, std::move(ticket_keys),
590 std::memory_order_release);
591 #else // !HAVE_ATOMIC_STD_SHARED_PTR
592 std::lock_guard<std::mutex> g(ticket_keys_m_);
593 ticket_keys_ = std::move(ticket_keys);
594 #endif // !HAVE_ATOMIC_STD_SHARED_PTR
597 WorkerStat *Worker::get_worker_stat() { return &worker_stat_; }
599 struct ev_loop *Worker::get_loop() const {
603 SSL_CTX *Worker::get_sv_ssl_ctx() const { return sv_ssl_ctx_; }
605 SSL_CTX *Worker::get_cl_ssl_ctx() const { return cl_ssl_ctx_; }
608 SSL_CTX *Worker::get_quic_sv_ssl_ctx() const { return quic_sv_ssl_ctx_; }
609 #endif // ENABLE_HTTP3
611 void Worker::set_graceful_shutdown(bool f) { graceful_shutdown_ = f; }
613 bool Worker::get_graceful_shutdown() const { return graceful_shutdown_; }
615 MemchunkPool *Worker::get_mcpool() { return &mcpool_; }
617 MemcachedDispatcher *Worker::get_session_cache_memcached_dispatcher() {
618 return session_cache_memcached_dispatcher_.get();
621 std::mt19937 &Worker::get_randgen() { return randgen_; }
624 int Worker::create_mruby_context() {
625 mruby_ctx_ = mruby::create_mruby_context(StringRef{get_config()->mruby_file});
633 mruby::MRubyContext *Worker::get_mruby_context() const {
634 return mruby_ctx_.get();
638 std::vector<std::shared_ptr<DownstreamAddrGroup>> &
639 Worker::get_downstream_addr_groups() {
640 return downstream_addr_groups_;
643 ConnectBlocker *Worker::get_connect_blocker() const {
644 return connect_blocker_.get();
647 const DownstreamConfig *Worker::get_downstream_config() const {
648 return downstreamconf_.get();
651 ConnectionHandler *Worker::get_connection_handler() const {
652 return conn_handler_;
656 QUICConnectionHandler *Worker::get_quic_connection_handler() {
657 return &quic_conn_handler_;
659 #endif // ENABLE_HTTP3
661 DNSTracker *Worker::get_dns_tracker() { return &dns_tracker_; }
665 bool Worker::should_attach_bpf() const {
666 auto config = get_config();
667 auto &quicconf = config->quic;
668 auto &apiconf = config->api;
670 if (quicconf.bpf.disabled) {
674 if (!config->single_thread && apiconf.enabled) {
681 bool Worker::should_update_bpf_map() const {
682 auto config = get_config();
683 auto &quicconf = config->quic;
685 return !quicconf.bpf.disabled;
688 uint32_t Worker::compute_sk_index() const {
689 auto config = get_config();
690 auto &apiconf = config->api;
692 if (!config->single_thread && apiconf.enabled) {
698 # endif // HAVE_LIBBPF
700 int Worker::setup_quic_server_socket() {
703 for (auto &addr : quic_upstream_addrs_) {
704 assert(!addr.host_unix);
705 if (create_quic_server_socket(addr) != 0) {
709 // Make sure that each endpoint has a unique address.
710 for (size_t i = 0; i < n; ++i) {
711 const auto &a = quic_upstream_addrs_[i];
713 if (addr.hostport == a.hostport) {
715 << "QUIC frontend endpoint must be unique: a duplicate found for "
724 quic_listeners_.emplace_back(std::make_unique<QUICListener>(&addr, this));
730 int Worker::create_quic_server_socket(UpstreamAddr &faddr) {
731 std::array<char, STRERROR_BUFSIZE> errbuf;
735 auto service = util::utos(faddr.port);
737 hints.ai_family = faddr.family;
738 hints.ai_socktype = SOCK_DGRAM;
739 hints.ai_flags = AI_PASSIVE;
740 # ifdef AI_ADDRCONFIG
741 hints.ai_flags |= AI_ADDRCONFIG;
742 # endif // AI_ADDRCONFIG
745 faddr.host == StringRef::from_lit("*") ? nullptr : faddr.host.c_str();
748 rv = getaddrinfo(node, service.c_str(), &hints, &res);
749 # ifdef AI_ADDRCONFIG
751 // Retry without AI_ADDRCONFIG
752 hints.ai_flags &= ~AI_ADDRCONFIG;
753 rv = getaddrinfo(node, service.c_str(), &hints, &res);
755 # endif // AI_ADDRCONFIG
757 LOG(FATAL) << "Unable to get IPv" << (faddr.family == AF_INET ? "4" : "6")
758 << " address for " << faddr.host << ", port " << faddr.port
759 << ": " << gai_strerror(rv);
763 auto res_d = defer(freeaddrinfo, res);
765 std::array<char, NI_MAXHOST> host;
767 for (rp = res; rp; rp = rp->ai_next) {
768 rv = getnameinfo(rp->ai_addr, rp->ai_addrlen, host.data(), host.size(),
769 nullptr, 0, NI_NUMERICHOST);
771 LOG(WARN) << "getnameinfo() failed: " << gai_strerror(rv);
775 # ifdef SOCK_NONBLOCK
776 fd = socket(rp->ai_family, rp->ai_socktype | SOCK_NONBLOCK | SOCK_CLOEXEC,
780 LOG(WARN) << "socket() syscall failed: "
781 << xsi_strerror(error, errbuf.data(), errbuf.size());
784 # else // !SOCK_NONBLOCK
785 fd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
788 LOG(WARN) << "socket() syscall failed: "
789 << xsi_strerror(error, errbuf.data(), errbuf.size());
792 util::make_socket_nonblocking(fd);
793 util::make_socket_closeonexec(fd);
794 # endif // !SOCK_NONBLOCK
797 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val,
798 static_cast<socklen_t>(sizeof(val))) == -1) {
800 LOG(WARN) << "Failed to set SO_REUSEADDR option to listener socket: "
801 << xsi_strerror(error, errbuf.data(), errbuf.size());
806 if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val,
807 static_cast<socklen_t>(sizeof(val))) == -1) {
809 LOG(WARN) << "Failed to set SO_REUSEPORT option to listener socket: "
810 << xsi_strerror(error, errbuf.data(), errbuf.size());
815 if (faddr.family == AF_INET6) {
817 if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &val,
818 static_cast<socklen_t>(sizeof(val))) == -1) {
820 LOG(WARN) << "Failed to set IPV6_V6ONLY option to listener socket: "
821 << xsi_strerror(error, errbuf.data(), errbuf.size());
825 # endif // IPV6_V6ONLY
827 if (setsockopt(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &val,
828 static_cast<socklen_t>(sizeof(val))) == -1) {
831 << "Failed to set IPV6_RECVPKTINFO option to listener socket: "
832 << xsi_strerror(error, errbuf.data(), errbuf.size());
837 if (setsockopt(fd, IPPROTO_IP, IP_PKTINFO, &val,
838 static_cast<socklen_t>(sizeof(val))) == -1) {
840 LOG(WARN) << "Failed to set IP_PKTINFO option to listener socket: "
841 << xsi_strerror(error, errbuf.data(), errbuf.size());
849 if (bind(fd, rp->ai_addr, rp->ai_addrlen) == -1) {
851 LOG(WARN) << "bind() syscall failed: "
852 << xsi_strerror(error, errbuf.data(), errbuf.size());
858 auto config = get_config();
860 auto &quic_bpf_refs = conn_handler_->get_quic_bpf_refs();
863 if (should_attach_bpf()) {
864 auto &bpfconf = config->quic.bpf;
866 auto obj = bpf_object__open_file(bpfconf.prog_file.c_str(), nullptr);
867 err = libbpf_get_error(obj);
869 LOG(FATAL) << "Failed to open bpf object file: "
870 << xsi_strerror(-err, errbuf.data(), errbuf.size());
875 if (bpf_object__load(obj)) {
876 LOG(FATAL) << "Failed to load bpf object file: "
877 << xsi_strerror(errno, errbuf.data(), errbuf.size());
882 auto prog = bpf_object__find_program_by_name(obj, "select_reuseport");
883 err = libbpf_get_error(prog);
885 LOG(FATAL) << "Failed to find sk_reuseport program: "
886 << xsi_strerror(-err, errbuf.data(), errbuf.size());
891 auto &ref = quic_bpf_refs[faddr.index];
895 auto reuseport_array =
896 bpf_object__find_map_by_name(obj, "reuseport_array");
897 err = libbpf_get_error(reuseport_array);
899 LOG(FATAL) << "Failed to get reuseport_array: "
900 << xsi_strerror(-err, errbuf.data(), errbuf.size());
905 ref.reuseport_array = bpf_map__fd(reuseport_array);
907 auto cid_prefix_map = bpf_object__find_map_by_name(obj, "cid_prefix_map");
908 err = libbpf_get_error(cid_prefix_map);
910 LOG(FATAL) << "Failed to get cid_prefix_map: "
911 << xsi_strerror(-err, errbuf.data(), errbuf.size());
916 ref.cid_prefix_map = bpf_map__fd(cid_prefix_map);
918 auto sk_info = bpf_object__find_map_by_name(obj, "sk_info");
919 err = libbpf_get_error(sk_info);
921 LOG(FATAL) << "Failed to get sk_info: "
922 << xsi_strerror(-err, errbuf.data(), errbuf.size());
927 constexpr uint32_t zero = 0;
928 uint64_t num_socks = config->num_worker;
930 if (bpf_map_update_elem(bpf_map__fd(sk_info), &zero, &num_socks,
932 LOG(FATAL) << "Failed to update sk_info: "
933 << xsi_strerror(errno, errbuf.data(), errbuf.size());
938 constexpr uint32_t key_high_idx = 1;
939 constexpr uint32_t key_low_idx = 2;
941 auto &qkms = conn_handler_->get_quic_keying_materials();
942 auto &qkm = qkms->keying_materials.front();
944 if (bpf_map_update_elem(bpf_map__fd(sk_info), &key_high_idx,
945 qkm.cid_encryption_key.data(), BPF_ANY) != 0) {
946 LOG(FATAL) << "Failed to update key_high_idx sk_info: "
947 << xsi_strerror(errno, errbuf.data(), errbuf.size());
952 if (bpf_map_update_elem(bpf_map__fd(sk_info), &key_low_idx,
953 qkm.cid_encryption_key.data() + 8,
955 LOG(FATAL) << "Failed to update key_low_idx sk_info: "
956 << xsi_strerror(errno, errbuf.data(), errbuf.size());
961 auto prog_fd = bpf_program__fd(prog);
963 if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &prog_fd,
964 static_cast<socklen_t>(sizeof(prog_fd))) == -1) {
965 LOG(FATAL) << "Failed to attach bpf program: "
966 << xsi_strerror(errno, errbuf.data(), errbuf.size());
972 if (should_update_bpf_map()) {
973 const auto &ref = quic_bpf_refs[faddr.index];
974 auto sk_index = compute_sk_index();
976 if (bpf_map_update_elem(ref.reuseport_array, &sk_index, &fd,
978 LOG(FATAL) << "Failed to update reuseport_array: "
979 << xsi_strerror(errno, errbuf.data(), errbuf.size());
984 if (bpf_map_update_elem(ref.cid_prefix_map, cid_prefix_.data(), &sk_index,
986 LOG(FATAL) << "Failed to update cid_prefix_map: "
987 << xsi_strerror(errno, errbuf.data(), errbuf.size());
992 # endif // HAVE_LIBBPF
998 LOG(FATAL) << "Listening " << (faddr.family == AF_INET ? "IPv4" : "IPv6")
1005 faddr.hostport = util::make_http_hostport(mod_config()->balloc,
1006 StringRef{host.data()}, faddr.port);
1008 LOG(NOTICE) << "Listening on " << faddr.hostport << ", quic";
1013 const uint8_t *Worker::get_cid_prefix() const { return cid_prefix_.data(); }
1015 const UpstreamAddr *Worker::find_quic_upstream_addr(const Address &local_addr) {
1016 std::array<char, NI_MAXHOST> host;
1018 auto rv = getnameinfo(&local_addr.su.sa, local_addr.len, host.data(),
1019 host.size(), nullptr, 0, NI_NUMERICHOST);
1021 LOG(ERROR) << "getnameinfo: " << gai_strerror(rv);
1028 switch (local_addr.su.sa.sa_family) {
1030 port = htons(local_addr.su.in.sin_port);
1034 port = htons(local_addr.su.in6.sin6_port);
1042 std::array<char, util::max_hostport> hostport_buf;
1044 auto hostport = util::make_http_hostport(std::begin(hostport_buf),
1045 StringRef{host.data()}, port);
1046 const UpstreamAddr *fallback_faddr = nullptr;
1048 for (auto &faddr : quic_upstream_addrs_) {
1049 if (faddr.hostport == hostport) {
1053 if (faddr.port != port || faddr.family != local_addr.su.sa.sa_family) {
1057 if (faddr.port == 443 || faddr.port == 80) {
1058 switch (faddr.family) {
1060 if (util::streq(faddr.hostport, StringRef::from_lit("0.0.0.0"))) {
1061 fallback_faddr = &faddr;
1066 if (util::streq(faddr.hostport, StringRef::from_lit("[::]"))) {
1067 fallback_faddr = &faddr;
1075 switch (faddr.family) {
1077 if (util::starts_with(faddr.hostport,
1078 StringRef::from_lit("0.0.0.0:"))) {
1079 fallback_faddr = &faddr;
1084 if (util::starts_with(faddr.hostport, StringRef::from_lit("[::]:"))) {
1085 fallback_faddr = &faddr;
1095 return fallback_faddr;
1097 #endif // ENABLE_HTTP3
1100 size_t match_downstream_addr_group_host(
1101 const RouterConfig &routerconf, const StringRef &host,
1102 const StringRef &path,
1103 const std::vector<std::shared_ptr<DownstreamAddrGroup>> &groups,
1104 size_t catch_all, BlockAllocator &balloc) {
1106 const auto &router = routerconf.router;
1107 const auto &rev_wildcard_router = routerconf.rev_wildcard_router;
1108 const auto &wildcard_patterns = routerconf.wildcard_patterns;
1110 if (LOG_ENABLED(INFO)) {
1111 LOG(INFO) << "Perform mapping selection, using host=" << host
1112 << ", path=" << path;
1115 auto group = router.match(host, path);
1117 if (LOG_ENABLED(INFO)) {
1118 LOG(INFO) << "Found pattern with query " << host << path
1119 << ", matched pattern=" << groups[group]->pattern;
1124 if (!wildcard_patterns.empty() && !host.empty()) {
1125 auto rev_host_src = make_byte_ref(balloc, host.size() - 1);
1127 std::copy(std::begin(host) + 1, std::end(host), rev_host_src.base);
1128 std::reverse(rev_host_src.base, ep);
1129 auto rev_host = StringRef{rev_host_src.base, ep};
1131 ssize_t best_group = -1;
1132 const RNode *last_node = nullptr;
1137 rev_wildcard_router.match_prefix(&nread, &last_node, rev_host);
1142 rev_host = StringRef{std::begin(rev_host) + nread, std::end(rev_host)};
1144 auto &wc = wildcard_patterns[wcidx];
1145 auto group = wc.router.match(StringRef{}, path);
1147 // We sorted wildcard_patterns in a way that first match is the
1148 // longest host pattern.
1149 if (LOG_ENABLED(INFO)) {
1150 LOG(INFO) << "Found wildcard pattern with query " << host << path
1151 << ", matched pattern=" << groups[group]->pattern;
1158 if (best_group != -1) {
1163 group = router.match(StringRef::from_lit(""), path);
1165 if (LOG_ENABLED(INFO)) {
1166 LOG(INFO) << "Found pattern with query " << path
1167 << ", matched pattern=" << groups[group]->pattern;
1172 if (LOG_ENABLED(INFO)) {
1173 LOG(INFO) << "None match. Use catch-all pattern";
1179 size_t match_downstream_addr_group(
1180 const RouterConfig &routerconf, const StringRef &hostport,
1181 const StringRef &raw_path,
1182 const std::vector<std::shared_ptr<DownstreamAddrGroup>> &groups,
1183 size_t catch_all, BlockAllocator &balloc) {
1184 if (std::find(std::begin(hostport), std::end(hostport), '/') !=
1185 std::end(hostport)) {
1186 // We use '/' specially, and if '/' is included in host, it breaks
1187 // our code. Select catch-all case.
1191 auto fragment = std::find(std::begin(raw_path), std::end(raw_path), '#');
1192 auto query = std::find(std::begin(raw_path), fragment, '?');
1193 auto path = StringRef{std::begin(raw_path), query};
1195 if (path.empty() || path[0] != '/') {
1196 path = StringRef::from_lit("/");
1199 if (hostport.empty()) {
1200 return match_downstream_addr_group_host(routerconf, hostport, path, groups,
1205 if (hostport[0] == '[') {
1206 // assume this is IPv6 numeric address
1207 auto p = std::find(std::begin(hostport), std::end(hostport), ']');
1208 if (p == std::end(hostport)) {
1211 if (p + 1 < std::end(hostport) && *(p + 1) != ':') {
1214 host = StringRef{std::begin(hostport), p + 1};
1216 auto p = std::find(std::begin(hostport), std::end(hostport), ':');
1217 if (p == std::begin(hostport)) {
1220 host = StringRef{std::begin(hostport), p};
1223 if (std::find_if(std::begin(host), std::end(host), [](char c) {
1224 return 'A' <= c || c <= 'Z';
1225 }) != std::end(host)) {
1226 auto low_host = make_byte_ref(balloc, host.size() + 1);
1227 auto ep = std::copy(std::begin(host), std::end(host), low_host.base);
1229 util::inp_strlower(low_host.base, ep);
1230 host = StringRef{low_host.base, ep};
1232 return match_downstream_addr_group_host(routerconf, host, path, groups,
1236 void downstream_failure(DownstreamAddr *addr, const Address *raddr) {
1237 const auto &connect_blocker = addr->connect_blocker;
1239 if (connect_blocker->in_offline()) {
1243 connect_blocker->on_failure();
1245 if (addr->fall == 0) {
1249 auto fail_count = connect_blocker->get_fail_count();
1251 if (fail_count >= addr->fall) {
1253 LOG(WARN) << "Could not connect to " << util::to_numeric_addr(raddr)
1254 << " " << fail_count
1255 << " times in a row; considered as offline";
1257 LOG(WARN) << "Could not connect to " << addr->host << ":" << addr->port
1258 << " " << fail_count
1259 << " times in a row; considered as offline";
1262 connect_blocker->offline();
1265 addr->live_check->schedule();
1271 int create_cid_prefix(uint8_t *cid_prefix, const uint8_t *server_id) {
1272 auto p = std::copy_n(server_id, SHRPX_QUIC_SERVER_IDLEN, cid_prefix);
1274 if (RAND_bytes(p, SHRPX_QUIC_CID_PREFIXLEN - SHRPX_QUIC_SERVER_IDLEN) != 1) {
1280 #endif // ENABLE_HTTP3
1282 } // namespace shrpx