3 * Copyright 2004--2005, Google Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13 * 3. The name of the author may not be used to endorse or promote products
14 * derived from this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #if defined(_MSC_VER) && _MSC_VER < 1300
29 #pragma warning(disable:4786)
44 #define WIN32_LEAN_AND_MEAN
54 #include "talk/base/basictypes.h"
55 #include "talk/base/byteorder.h"
56 #include "talk/base/common.h"
57 #include "talk/base/logging.h"
58 #include "talk/base/nethelpers.h"
59 #include "talk/base/physicalsocketserver.h"
60 #include "talk/base/timeutils.h"
61 #include "talk/base/winping.h"
62 #include "talk/base/win32socketinit.h"
64 // stm: this will tell us if we are on OSX
70 #include <netinet/tcp.h> // for TCP_NODELAY
71 #define IP_MTU 14 // Until this is integrated from linux/in.h to netinet/in.h
72 typedef void* SockOptArg;
76 typedef char* SockOptArg;
81 // Standard MTUs, from RFC 1191
82 const uint16 PACKET_MAXIMUMS[] = {
83 65535, // Theoretical maximum, Hyperchannel
85 17914, // 16Mb IBM Token Ring
87 //4464, // IEEE 802.5 (4Mb max)
89 //2048, // Wideband Network
90 2002, // IEEE 802.5 (4Mb recommended)
91 //1536, // Expermental Ethernet Networks
92 //1500, // Ethernet, Point-to-Point (default)
94 1006, // SLIP, ARPANET
95 //576, // X.25 Networks
96 //544, // DEC IP Portal
98 508, // IEEE 802/Source-Rt Bridge, ARCNET
99 296, // Point-to-Point (low delay)
100 68, // Official minimum
101 0, // End of list marker
104 static const int IP_HEADER_SIZE = 20u;
105 static const int IPV6_HEADER_SIZE = 40u;
106 static const int ICMP_HEADER_SIZE = 8u;
107 static const int ICMP_PING_TIMEOUT_MILLIS = 10000u;
109 class PhysicalSocket : public AsyncSocket, public sigslot::has_slots<> {
111 PhysicalSocket(PhysicalSocketServer* ss, SOCKET s = INVALID_SOCKET)
112 : ss_(ss), s_(s), enabled_events_(0), error_(0),
113 state_((s == INVALID_SOCKET) ? CS_CLOSED : CS_CONNECTED),
116 // EnsureWinsockInit() ensures that winsock is initialized. The default
117 // version of this function doesn't do anything because winsock is
118 // initialized by constructor of a static object. If neccessary libjingle
119 // users can link it with a different version of this function by replacing
120 // win32socketinit.cc. See win32socketinit.cc for more details.
123 if (s_ != INVALID_SOCKET) {
124 enabled_events_ = DE_READ | DE_WRITE;
126 int type = SOCK_STREAM;
127 socklen_t len = sizeof(type);
128 VERIFY(0 == getsockopt(s_, SOL_SOCKET, SO_TYPE, (SockOptArg)&type, &len));
129 udp_ = (SOCK_DGRAM == type);
133 virtual ~PhysicalSocket() {
137 // Creates the underlying OS socket (same as the "socket" function).
138 virtual bool Create(int family, int type) {
140 s_ = ::socket(family, type, 0);
141 udp_ = (SOCK_DGRAM == type);
144 enabled_events_ = DE_READ | DE_WRITE;
145 return s_ != INVALID_SOCKET;
148 SocketAddress GetLocalAddress() const {
149 sockaddr_storage addr_storage = {0};
150 socklen_t addrlen = sizeof(addr_storage);
151 sockaddr* addr = reinterpret_cast<sockaddr*>(&addr_storage);
152 int result = ::getsockname(s_, addr, &addrlen);
153 SocketAddress address;
155 SocketAddressFromSockAddrStorage(addr_storage, &address);
157 LOG(LS_WARNING) << "GetLocalAddress: unable to get local addr, socket="
163 SocketAddress GetRemoteAddress() const {
164 sockaddr_storage addr_storage = {0};
165 socklen_t addrlen = sizeof(addr_storage);
166 sockaddr* addr = reinterpret_cast<sockaddr*>(&addr_storage);
167 int result = ::getpeername(s_, addr, &addrlen);
168 SocketAddress address;
170 SocketAddressFromSockAddrStorage(addr_storage, &address);
172 LOG(LS_WARNING) << "GetRemoteAddress: unable to get remote addr, socket="
178 int Bind(const SocketAddress& bind_addr) {
179 sockaddr_storage addr_storage;
180 size_t len = bind_addr.ToSockAddrStorage(&addr_storage);
181 sockaddr* addr = reinterpret_cast<sockaddr*>(&addr_storage);
182 int err = ::bind(s_, addr, static_cast<int>(len));
186 dbg_addr_ = "Bound @ ";
187 dbg_addr_.append(GetLocalAddress().ToString());
193 int Connect(const SocketAddress& addr) {
194 // TODO: Implicit creation is required to reconnect...
195 // ...but should we make it more explicit?
196 if (state_ != CS_CLOSED) {
200 if (addr.IsUnresolved()) {
201 LOG(LS_VERBOSE) << "Resolving addr in PhysicalSocket::Connect";
202 resolver_ = new AsyncResolver();
203 resolver_->set_address(addr);
204 resolver_->SignalWorkDone.connect(this, &PhysicalSocket::OnResolveResult);
206 state_ = CS_CONNECTING;
210 return DoConnect(addr);
213 int DoConnect(const SocketAddress& connect_addr) {
214 if ((s_ == INVALID_SOCKET) &&
215 !Create(connect_addr.family(), SOCK_STREAM)) {
218 sockaddr_storage addr_storage;
219 size_t len = connect_addr.ToSockAddrStorage(&addr_storage);
220 sockaddr* addr = reinterpret_cast<sockaddr*>(&addr_storage);
221 int err = ::connect(s_, addr, static_cast<int>(len));
224 state_ = CS_CONNECTED;
225 } else if (IsBlockingError(GetError())) {
226 state_ = CS_CONNECTING;
227 enabled_events_ |= DE_CONNECT;
232 enabled_events_ |= DE_READ | DE_WRITE;
236 int GetError() const {
237 CritScope cs(&crit_);
241 void SetError(int error) {
242 CritScope cs(&crit_);
246 ConnState GetState() const {
250 int GetOption(Option opt, int* value) {
253 if (TranslateOption(opt, &slevel, &sopt) == -1)
255 socklen_t optlen = sizeof(*value);
256 int ret = ::getsockopt(s_, slevel, sopt, (SockOptArg)value, &optlen);
257 if (ret != -1 && opt == OPT_DONTFRAGMENT) {
259 *value = (*value != IP_PMTUDISC_DONT) ? 1 : 0;
265 int SetOption(Option opt, int value) {
268 if (TranslateOption(opt, &slevel, &sopt) == -1)
270 if (opt == OPT_DONTFRAGMENT) {
272 value = (value) ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
275 return ::setsockopt(s_, slevel, sopt, (SockOptArg)&value, sizeof(value));
278 int Send(const void *pv, size_t cb) {
279 int sent = ::send(s_, reinterpret_cast<const char *>(pv), (int)cb,
281 // Suppress SIGPIPE. Without this, attempting to send on a socket whose
282 // other end is closed will result in a SIGPIPE signal being raised to
283 // our process, which by default will terminate the process, which we
284 // don't want. By specifying this flag, we'll just get the error EPIPE
285 // instead and can handle the error gracefully.
292 MaybeRemapSendError();
293 // We have seen minidumps where this may be false.
294 ASSERT(sent <= static_cast<int>(cb));
295 if ((sent < 0) && IsBlockingError(GetError())) {
296 enabled_events_ |= DE_WRITE;
301 int SendTo(const void* buffer, size_t length, const SocketAddress& addr) {
302 sockaddr_storage saddr;
303 size_t len = addr.ToSockAddrStorage(&saddr);
305 s_, static_cast<const char *>(buffer), static_cast<int>(length),
307 // Suppress SIGPIPE. See above for explanation.
312 reinterpret_cast<sockaddr*>(&saddr), static_cast<int>(len));
314 MaybeRemapSendError();
315 // We have seen minidumps where this may be false.
316 ASSERT(sent <= static_cast<int>(length));
317 if ((sent < 0) && IsBlockingError(GetError())) {
318 enabled_events_ |= DE_WRITE;
323 int Recv(void* buffer, size_t length) {
324 int received = ::recv(s_, static_cast<char*>(buffer),
325 static_cast<int>(length), 0);
326 if ((received == 0) && (length != 0)) {
327 // Note: on graceful shutdown, recv can return 0. In this case, we
328 // pretend it is blocking, and then signal close, so that simplifying
329 // assumptions can be made about Recv.
330 LOG(LS_WARNING) << "EOF from socket; deferring close event";
331 // Must turn this back on so that the select() loop will notice the close
333 enabled_events_ |= DE_READ;
334 SetError(EWOULDBLOCK);
338 int error = GetError();
339 bool success = (received >= 0) || IsBlockingError(error);
340 if (udp_ || success) {
341 enabled_events_ |= DE_READ;
344 LOG_F(LS_VERBOSE) << "Error = " << error;
349 int RecvFrom(void* buffer, size_t length, SocketAddress *out_addr) {
350 sockaddr_storage addr_storage;
351 socklen_t addr_len = sizeof(addr_storage);
352 sockaddr* addr = reinterpret_cast<sockaddr*>(&addr_storage);
353 int received = ::recvfrom(s_, static_cast<char*>(buffer),
354 static_cast<int>(length), 0, addr, &addr_len);
356 if ((received >= 0) && (out_addr != NULL))
357 SocketAddressFromSockAddrStorage(addr_storage, out_addr);
358 int error = GetError();
359 bool success = (received >= 0) || IsBlockingError(error);
360 if (udp_ || success) {
361 enabled_events_ |= DE_READ;
364 LOG_F(LS_VERBOSE) << "Error = " << error;
369 int Listen(int backlog) {
370 int err = ::listen(s_, backlog);
373 state_ = CS_CONNECTING;
374 enabled_events_ |= DE_ACCEPT;
376 dbg_addr_ = "Listening @ ";
377 dbg_addr_.append(GetLocalAddress().ToString());
383 AsyncSocket* Accept(SocketAddress *out_addr) {
384 sockaddr_storage addr_storage;
385 socklen_t addr_len = sizeof(addr_storage);
386 sockaddr* addr = reinterpret_cast<sockaddr*>(&addr_storage);
387 SOCKET s = ::accept(s_, addr, &addr_len);
389 if (s == INVALID_SOCKET)
391 enabled_events_ |= DE_ACCEPT;
392 if (out_addr != NULL)
393 SocketAddressFromSockAddrStorage(addr_storage, out_addr);
394 return ss_->WrapSocket(s);
398 if (s_ == INVALID_SOCKET)
400 int err = ::closesocket(s_);
406 resolver_->Destroy(false);
412 int EstimateMTU(uint16* mtu) {
413 SocketAddress addr = GetRemoteAddress();
420 // Gets the interface MTU (TTL=1) for the interface used to reach |addr|.
422 if (!ping.IsValid()) {
423 SetError(EINVAL); // can't think of a better error ID
426 int header_size = ICMP_HEADER_SIZE;
427 if (addr.family() == AF_INET6) {
428 header_size += IPV6_HEADER_SIZE;
429 } else if (addr.family() == AF_INET) {
430 header_size += IP_HEADER_SIZE;
433 for (int level = 0; PACKET_MAXIMUMS[level + 1] > 0; ++level) {
434 int32 size = PACKET_MAXIMUMS[level] - header_size;
435 WinPing::PingResult result = ping.Ping(addr.ipaddr(), size,
436 ICMP_PING_TIMEOUT_MILLIS,
438 if (result == WinPing::PING_FAIL) {
439 SetError(EINVAL); // can't think of a better error ID
441 } else if (result != WinPing::PING_TOO_LARGE) {
442 *mtu = PACKET_MAXIMUMS[level];
449 #elif defined(IOS) || defined(OSX)
450 // No simple way to do this on Mac OS X.
451 // SIOCGIFMTU would work if we knew which interface would be used, but
452 // figuring that out is pretty complicated. For now we'll return an error
453 // and let the caller pick a default MTU.
456 #elif defined(LINUX) || defined(ANDROID)
457 // Gets the path MTU.
459 socklen_t vlen = sizeof(value);
460 int err = getsockopt(s_, IPPROTO_IP, IP_MTU, &value, &vlen);
466 ASSERT((0 <= value) && (value <= 65536));
469 #elif defined(__native_client__)
470 // Most socket operations, including this, will fail in NaCl's sandbox.
476 SocketServer* socketserver() { return ss_; }
479 void OnResolveResult(SignalThread* thread) {
480 if (thread != resolver_) {
484 int error = resolver_->error();
486 error = DoConnect(resolver_->address());
493 SignalCloseEvent(this, error);
497 void UpdateLastError() {
498 SetError(LAST_SYSTEM_ERROR);
501 void MaybeRemapSendError() {
503 // https://developer.apple.com/library/mac/documentation/Darwin/
504 // Reference/ManPages/man2/sendto.2.html
505 // ENOBUFS - The output queue for a network interface is full.
506 // This generally indicates that the interface has stopped sending,
507 // but may be caused by transient congestion.
508 if (GetError() == ENOBUFS) {
509 SetError(EWOULDBLOCK);
514 static int TranslateOption(Option opt, int* slevel, int* sopt) {
516 case OPT_DONTFRAGMENT:
518 *slevel = IPPROTO_IP;
519 *sopt = IP_DONTFRAGMENT;
521 #elif defined(IOS) || defined(OSX) || defined(BSD)
522 LOG(LS_WARNING) << "Socket::OPT_DONTFRAGMENT not supported.";
525 *slevel = IPPROTO_IP;
526 *sopt = IP_MTU_DISCOVER;
530 *slevel = SOL_SOCKET;
534 *slevel = SOL_SOCKET;
538 *slevel = IPPROTO_TCP;
542 LOG(LS_WARNING) << "Socket::OPT_DSCP not supported.";
551 PhysicalSocketServer* ss_;
553 uint8 enabled_events_;
556 // Protects |error_| that is accessed from different threads.
557 mutable CriticalSection crit_;
559 AsyncResolver* resolver_;
562 std::string dbg_addr_;
567 class EventDispatcher : public Dispatcher {
569 EventDispatcher(PhysicalSocketServer* ss) : ss_(ss), fSignaled_(false) {
571 LOG(LERROR) << "pipe failed";
575 virtual ~EventDispatcher() {
581 virtual void Signal() {
582 CritScope cs(&crit_);
584 const uint8 b[1] = { 0 };
585 if (VERIFY(1 == write(afd_[1], b, sizeof(b)))) {
591 virtual uint32 GetRequestedEvents() {
595 virtual void OnPreEvent(uint32 ff) {
596 // It is not possible to perfectly emulate an auto-resetting event with
597 // pipes. This simulates it by resetting before the event is handled.
599 CritScope cs(&crit_);
601 uint8 b[4]; // Allow for reading more than 1 byte, but expect 1.
602 VERIFY(1 == read(afd_[0], b, sizeof(b)));
607 virtual void OnEvent(uint32 ff, int err) {
611 virtual int GetDescriptor() {
615 virtual bool IsDescriptorClosed() {
620 PhysicalSocketServer *ss_;
623 CriticalSection crit_;
626 // These two classes use the self-pipe trick to deliver POSIX signals to our
627 // select loop. This is the only safe, reliable, cross-platform way to do
628 // non-trivial things with a POSIX signal in an event-driven program (until
629 // proper pselect() implementations become ubiquitous).
631 class PosixSignalHandler {
633 // POSIX only specifies 32 signals, but in principle the system might have
634 // more and the programmer might choose to use them, so we size our array
636 static const int kNumPosixSignals = 128;
638 // There is just a single global instance. (Signal handlers do not get any
639 // sort of user-defined void * parameter, so they can't access anything that
641 static PosixSignalHandler* Instance() {
642 LIBJINGLE_DEFINE_STATIC_LOCAL(PosixSignalHandler, instance, ());
646 // Returns true if the given signal number is set.
647 bool IsSignalSet(int signum) const {
648 ASSERT(signum < ARRAY_SIZE(received_signal_));
649 if (signum < ARRAY_SIZE(received_signal_)) {
650 return received_signal_[signum];
656 // Clears the given signal number.
657 void ClearSignal(int signum) {
658 ASSERT(signum < ARRAY_SIZE(received_signal_));
659 if (signum < ARRAY_SIZE(received_signal_)) {
660 received_signal_[signum] = false;
664 // Returns the file descriptor to monitor for signal events.
665 int GetDescriptor() const {
669 // This is called directly from our real signal handler, so it must be
670 // signal-handler-safe. That means it cannot assume anything about the
671 // user-level state of the process, since the handler could be executed at any
672 // time on any thread.
673 void OnPosixSignalReceived(int signum) {
674 if (signum >= ARRAY_SIZE(received_signal_)) {
675 // We don't have space in our array for this.
678 // Set a flag saying we've seen this signal.
679 received_signal_[signum] = true;
680 // Notify application code that we got a signal.
681 const uint8 b[1] = { 0 };
682 if (-1 == write(afd_[1], b, sizeof(b))) {
683 // Nothing we can do here. If there's an error somehow then there's
684 // nothing we can safely do from a signal handler.
685 // No, we can't even safely log it.
686 // But, we still have to check the return value here. Otherwise,
687 // GCC 4.4.1 complains ignoring return value. Even (void) doesn't help.
693 PosixSignalHandler() {
694 if (pipe(afd_) < 0) {
695 LOG_ERR(LS_ERROR) << "pipe failed";
698 if (fcntl(afd_[0], F_SETFL, O_NONBLOCK) < 0) {
699 LOG_ERR(LS_WARNING) << "fcntl #1 failed";
701 if (fcntl(afd_[1], F_SETFL, O_NONBLOCK) < 0) {
702 LOG_ERR(LS_WARNING) << "fcntl #2 failed";
704 memset(const_cast<void *>(static_cast<volatile void *>(received_signal_)),
706 sizeof(received_signal_));
709 ~PosixSignalHandler() {
712 // We clobber the stored file descriptor numbers here or else in principle
713 // a signal that happens to be delivered during application termination
714 // could erroneously write a zero byte to an unrelated file handle in
715 // OnPosixSignalReceived() if some other file happens to be opened later
716 // during shutdown and happens to be given the same file descriptor number
717 // as our pipe had. Unfortunately even with this precaution there is still a
718 // race where that could occur if said signal happens to be handled
719 // concurrently with this code and happens to have already read the value of
720 // afd_[1] from memory before we clobber it, but that's unlikely.
728 // These are boolean flags that will be set in our signal handler and read
729 // and cleared from Wait(). There is a race involved in this, but it is
730 // benign. The signal handler sets the flag before signaling the pipe, so
731 // we'll never end up blocking in select() while a flag is still true.
732 // However, if two of the same signal arrive close to each other then it's
733 // possible that the second time the handler may set the flag while it's still
734 // true, meaning that signal will be missed. But the first occurrence of it
735 // will still be handled, so this isn't a problem.
736 // Volatile is not necessary here for correctness, but this data _is_ volatile
737 // so I've marked it as such.
738 volatile uint8 received_signal_[kNumPosixSignals];
741 class PosixSignalDispatcher : public Dispatcher {
743 PosixSignalDispatcher(PhysicalSocketServer *owner) : owner_(owner) {
747 virtual ~PosixSignalDispatcher() {
748 owner_->Remove(this);
751 virtual uint32 GetRequestedEvents() {
755 virtual void OnPreEvent(uint32 ff) {
756 // Events might get grouped if signals come very fast, so we read out up to
757 // 16 bytes to make sure we keep the pipe empty.
759 ssize_t ret = read(GetDescriptor(), b, sizeof(b));
761 LOG_ERR(LS_WARNING) << "Error in read()";
762 } else if (ret == 0) {
763 LOG(LS_WARNING) << "Should have read at least one byte";
767 virtual void OnEvent(uint32 ff, int err) {
768 for (int signum = 0; signum < PosixSignalHandler::kNumPosixSignals;
770 if (PosixSignalHandler::Instance()->IsSignalSet(signum)) {
771 PosixSignalHandler::Instance()->ClearSignal(signum);
772 HandlerMap::iterator i = handlers_.find(signum);
773 if (i == handlers_.end()) {
774 // This can happen if a signal is delivered to our process at around
775 // the same time as we unset our handler for it. It is not an error
776 // condition, but it's unusual enough to be worth logging.
777 LOG(LS_INFO) << "Received signal with no handler: " << signum;
779 // Otherwise, execute our handler.
780 (*i->second)(signum);
786 virtual int GetDescriptor() {
787 return PosixSignalHandler::Instance()->GetDescriptor();
790 virtual bool IsDescriptorClosed() {
794 void SetHandler(int signum, void (*handler)(int)) {
795 handlers_[signum] = handler;
798 void ClearHandler(int signum) {
799 handlers_.erase(signum);
803 return !handlers_.empty();
807 typedef std::map<int, void (*)(int)> HandlerMap;
809 HandlerMap handlers_;
811 PhysicalSocketServer *owner_;
814 class SocketDispatcher : public Dispatcher, public PhysicalSocket {
816 explicit SocketDispatcher(PhysicalSocketServer *ss) : PhysicalSocket(ss) {
818 SocketDispatcher(SOCKET s, PhysicalSocketServer *ss) : PhysicalSocket(ss, s) {
821 virtual ~SocketDispatcher() {
827 fcntl(s_, F_SETFL, fcntl(s_, F_GETFL, 0) | O_NONBLOCK);
831 virtual bool Create(int type) {
832 return Create(AF_INET, type);
835 virtual bool Create(int family, int type) {
836 // Change the socket to be non-blocking.
837 if (!PhysicalSocket::Create(family, type))
843 virtual int GetDescriptor() {
847 virtual bool IsDescriptorClosed() {
848 // We don't have a reliable way of distinguishing end-of-stream
849 // from readability. So test on each readable call. Is this
850 // inefficient? Probably.
852 ssize_t res = ::recv(s_, &ch, 1, MSG_PEEK);
854 // Data available, so not closed.
856 } else if (res == 0) {
861 // Returned if we've already closed s_.
863 // Returned during ungraceful peer shutdown.
867 // Assume that all other errors are just blocking errors, meaning the
868 // connection is still good but we just can't read from it right now.
869 // This should only happen when connecting (and at most once), because
870 // in all other cases this function is only called if the file
871 // descriptor is already known to be in the readable state. However,
872 // it's not necessary a problem if we spuriously interpret a
873 // "connection lost"-type error as a blocking error, because typically
874 // the next recv() will get EOF, so we'll still eventually notice that
875 // the socket is closed.
876 LOG_ERR(LS_WARNING) << "Assuming benign blocking error";
882 virtual uint32 GetRequestedEvents() {
883 return enabled_events_;
886 virtual void OnPreEvent(uint32 ff) {
887 if ((ff & DE_CONNECT) != 0)
888 state_ = CS_CONNECTED;
889 if ((ff & DE_CLOSE) != 0)
893 virtual void OnEvent(uint32 ff, int err) {
894 // Make sure we deliver connect/accept first. Otherwise, consumers may see
895 // something like a READ followed by a CONNECT, which would be odd.
896 if ((ff & DE_CONNECT) != 0) {
897 enabled_events_ &= ~DE_CONNECT;
898 SignalConnectEvent(this);
900 if ((ff & DE_ACCEPT) != 0) {
901 enabled_events_ &= ~DE_ACCEPT;
902 SignalReadEvent(this);
904 if ((ff & DE_READ) != 0) {
905 enabled_events_ &= ~DE_READ;
906 SignalReadEvent(this);
908 if ((ff & DE_WRITE) != 0) {
909 enabled_events_ &= ~DE_WRITE;
910 SignalWriteEvent(this);
912 if ((ff & DE_CLOSE) != 0) {
913 // The socket is now dead to us, so stop checking it.
915 SignalCloseEvent(this, err);
919 virtual int Close() {
920 if (s_ == INVALID_SOCKET)
924 return PhysicalSocket::Close();
928 class FileDispatcher: public Dispatcher, public AsyncFile {
930 FileDispatcher(int fd, PhysicalSocketServer *ss) : ss_(ss), fd_(fd) {
935 fcntl(fd_, F_SETFL, fcntl(fd_, F_GETFL, 0) | O_NONBLOCK);
938 virtual ~FileDispatcher() {
942 SocketServer* socketserver() { return ss_; }
944 virtual int GetDescriptor() {
948 virtual bool IsDescriptorClosed() {
952 virtual uint32 GetRequestedEvents() {
956 virtual void OnPreEvent(uint32 ff) {
959 virtual void OnEvent(uint32 ff, int err) {
960 if ((ff & DE_READ) != 0)
961 SignalReadEvent(this);
962 if ((ff & DE_WRITE) != 0)
963 SignalWriteEvent(this);
964 if ((ff & DE_CLOSE) != 0)
965 SignalCloseEvent(this, err);
968 virtual bool readable() {
969 return (flags_ & DE_READ) != 0;
972 virtual void set_readable(bool value) {
973 flags_ = value ? (flags_ | DE_READ) : (flags_ & ~DE_READ);
976 virtual bool writable() {
977 return (flags_ & DE_WRITE) != 0;
980 virtual void set_writable(bool value) {
981 flags_ = value ? (flags_ | DE_WRITE) : (flags_ & ~DE_WRITE);
985 PhysicalSocketServer* ss_;
990 AsyncFile* PhysicalSocketServer::CreateFile(int fd) {
991 return new FileDispatcher(fd, this);
997 static uint32 FlagsToEvents(uint32 events) {
998 uint32 ffFD = FD_CLOSE;
999 if (events & DE_READ)
1001 if (events & DE_WRITE)
1003 if (events & DE_CONNECT)
1005 if (events & DE_ACCEPT)
1010 class EventDispatcher : public Dispatcher {
1012 EventDispatcher(PhysicalSocketServer *ss) : ss_(ss) {
1013 hev_ = WSACreateEvent();
1019 ~EventDispatcher() {
1022 WSACloseEvent(hev_);
1027 virtual void Signal() {
1032 virtual uint32 GetRequestedEvents() {
1036 virtual void OnPreEvent(uint32 ff) {
1037 WSAResetEvent(hev_);
1040 virtual void OnEvent(uint32 ff, int err) {
1043 virtual WSAEVENT GetWSAEvent() {
1047 virtual SOCKET GetSocket() {
1048 return INVALID_SOCKET;
1051 virtual bool CheckSignalClose() { return false; }
1054 PhysicalSocketServer* ss_;
1058 class SocketDispatcher : public Dispatcher, public PhysicalSocket {
1060 static int next_id_;
1065 SocketDispatcher(PhysicalSocketServer* ss)
1066 : PhysicalSocket(ss),
1068 signal_close_(false) {
1071 SocketDispatcher(SOCKET s, PhysicalSocketServer* ss)
1072 : PhysicalSocket(ss, s),
1074 signal_close_(false) {
1077 virtual ~SocketDispatcher() {
1082 ASSERT(s_ != INVALID_SOCKET);
1083 // Must be a non-blocking
1085 ioctlsocket(s_, FIONBIO, &argp);
1090 virtual bool Create(int type) {
1091 return Create(AF_INET, type);
1094 virtual bool Create(int family, int type) {
1096 if (!PhysicalSocket::Create(family, type))
1102 do { id_ = ++next_id_; } while (id_ == 0);
1106 virtual int Close() {
1107 if (s_ == INVALID_SOCKET)
1111 signal_close_ = false;
1113 return PhysicalSocket::Close();
1116 virtual uint32 GetRequestedEvents() {
1117 return enabled_events_;
1120 virtual void OnPreEvent(uint32 ff) {
1121 if ((ff & DE_CONNECT) != 0)
1122 state_ = CS_CONNECTED;
1123 // We set CS_CLOSED from CheckSignalClose.
1126 virtual void OnEvent(uint32 ff, int err) {
1128 // Make sure we deliver connect/accept first. Otherwise, consumers may see
1129 // something like a READ followed by a CONNECT, which would be odd.
1130 if (((ff & DE_CONNECT) != 0) && (id_ == cache_id)) {
1131 if (ff != DE_CONNECT)
1132 LOG(LS_VERBOSE) << "Signalled with DE_CONNECT: " << ff;
1133 enabled_events_ &= ~DE_CONNECT;
1135 dbg_addr_ = "Connected @ ";
1136 dbg_addr_.append(GetRemoteAddress().ToString());
1138 SignalConnectEvent(this);
1140 if (((ff & DE_ACCEPT) != 0) && (id_ == cache_id)) {
1141 enabled_events_ &= ~DE_ACCEPT;
1142 SignalReadEvent(this);
1144 if ((ff & DE_READ) != 0) {
1145 enabled_events_ &= ~DE_READ;
1146 SignalReadEvent(this);
1148 if (((ff & DE_WRITE) != 0) && (id_ == cache_id)) {
1149 enabled_events_ &= ~DE_WRITE;
1150 SignalWriteEvent(this);
1152 if (((ff & DE_CLOSE) != 0) && (id_ == cache_id)) {
1153 signal_close_ = true;
1158 virtual WSAEVENT GetWSAEvent() {
1159 return WSA_INVALID_EVENT;
1162 virtual SOCKET GetSocket() {
1166 virtual bool CheckSignalClose() {
1171 if (recv(s_, &ch, 1, MSG_PEEK) > 0)
1175 signal_close_ = false;
1176 SignalCloseEvent(this, signal_err_);
1181 int SocketDispatcher::next_id_ = 0;
1185 // Sets the value of a boolean value to false when signaled.
1186 class Signaler : public EventDispatcher {
1188 Signaler(PhysicalSocketServer* ss, bool* pf)
1189 : EventDispatcher(ss), pf_(pf) {
1191 virtual ~Signaler() { }
1193 void OnEvent(uint32 ff, int err) {
1202 PhysicalSocketServer::PhysicalSocketServer()
1204 last_tick_tracked_(0),
1205 last_tick_dispatch_count_(0) {
1206 signal_wakeup_ = new Signaler(this, &fWait_);
1208 socket_ev_ = WSACreateEvent();
1212 PhysicalSocketServer::~PhysicalSocketServer() {
1214 WSACloseEvent(socket_ev_);
1217 signal_dispatcher_.reset();
1219 delete signal_wakeup_;
1220 ASSERT(dispatchers_.empty());
1223 void PhysicalSocketServer::WakeUp() {
1224 signal_wakeup_->Signal();
1227 Socket* PhysicalSocketServer::CreateSocket(int type) {
1228 return CreateSocket(AF_INET, type);
1231 Socket* PhysicalSocketServer::CreateSocket(int family, int type) {
1232 PhysicalSocket* socket = new PhysicalSocket(this);
1233 if (socket->Create(family, type)) {
1241 AsyncSocket* PhysicalSocketServer::CreateAsyncSocket(int type) {
1242 return CreateAsyncSocket(AF_INET, type);
1245 AsyncSocket* PhysicalSocketServer::CreateAsyncSocket(int family, int type) {
1246 SocketDispatcher* dispatcher = new SocketDispatcher(this);
1247 if (dispatcher->Create(family, type)) {
1255 AsyncSocket* PhysicalSocketServer::WrapSocket(SOCKET s) {
1256 SocketDispatcher* dispatcher = new SocketDispatcher(s, this);
1257 if (dispatcher->Initialize()) {
1265 void PhysicalSocketServer::Add(Dispatcher *pdispatcher) {
1266 CritScope cs(&crit_);
1267 // Prevent duplicates. This can cause dead dispatchers to stick around.
1268 DispatcherList::iterator pos = std::find(dispatchers_.begin(),
1271 if (pos != dispatchers_.end())
1273 dispatchers_.push_back(pdispatcher);
1276 void PhysicalSocketServer::Remove(Dispatcher *pdispatcher) {
1277 CritScope cs(&crit_);
1278 DispatcherList::iterator pos = std::find(dispatchers_.begin(),
1281 // We silently ignore duplicate calls to Add, so we should silently ignore
1282 // the (expected) symmetric calls to Remove. Note that this may still hide
1283 // a real issue, so we at least log a warning about it.
1284 if (pos == dispatchers_.end()) {
1285 LOG(LS_WARNING) << "PhysicalSocketServer asked to remove a unknown "
1286 << "dispatcher, potentially from a duplicate call to Add.";
1289 size_t index = pos - dispatchers_.begin();
1290 dispatchers_.erase(pos);
1291 for (IteratorList::iterator it = iterators_.begin(); it != iterators_.end();
1300 bool PhysicalSocketServer::Wait(int cmsWait, bool process_io) {
1301 // Calculate timing information
1303 struct timeval *ptvWait = NULL;
1304 struct timeval tvWait;
1305 struct timeval tvStop;
1306 if (cmsWait != kForever) {
1307 // Calculate wait timeval
1308 tvWait.tv_sec = cmsWait / 1000;
1309 tvWait.tv_usec = (cmsWait % 1000) * 1000;
1312 // Calculate when to return in a timeval
1313 gettimeofday(&tvStop, NULL);
1314 tvStop.tv_sec += tvWait.tv_sec;
1315 tvStop.tv_usec += tvWait.tv_usec;
1316 if (tvStop.tv_usec >= 1000000) {
1317 tvStop.tv_usec -= 1000000;
1322 // Zero all fd_sets. Don't need to do this inside the loop since
1323 // select() zeros the descriptors not signaled
1335 CritScope cr(&crit_);
1336 for (size_t i = 0; i < dispatchers_.size(); ++i) {
1337 // Query dispatchers for read and write wait state
1338 Dispatcher *pdispatcher = dispatchers_[i];
1339 ASSERT(pdispatcher);
1340 if (!process_io && (pdispatcher != signal_wakeup_))
1342 int fd = pdispatcher->GetDescriptor();
1346 uint32 ff = pdispatcher->GetRequestedEvents();
1347 if (ff & (DE_READ | DE_ACCEPT))
1348 FD_SET(fd, &fdsRead);
1349 if (ff & (DE_WRITE | DE_CONNECT))
1350 FD_SET(fd, &fdsWrite);
1354 // Wait then call handlers as appropriate
1357 // > 0 means count of descriptors ready
1358 int n = select(fdmax + 1, &fdsRead, &fdsWrite, NULL, ptvWait);
1360 // If error, return error.
1362 if (errno != EINTR) {
1363 LOG_E(LS_ERROR, EN, errno) << "select";
1366 // Else ignore the error and keep going. If this EINTR was for one of the
1367 // signals managed by this PhysicalSocketServer, the
1368 // PosixSignalDeliveryDispatcher will be in the signaled state in the next
1370 } else if (n == 0) {
1371 // If timeout, return success
1374 // We have signaled descriptors
1375 CritScope cr(&crit_);
1376 for (size_t i = 0; i < dispatchers_.size(); ++i) {
1377 Dispatcher *pdispatcher = dispatchers_[i];
1378 int fd = pdispatcher->GetDescriptor();
1382 // Reap any error code, which can be signaled through reads or writes.
1383 // TODO: Should we set errcode if getsockopt fails?
1384 if (FD_ISSET(fd, &fdsRead) || FD_ISSET(fd, &fdsWrite)) {
1385 socklen_t len = sizeof(errcode);
1386 ::getsockopt(fd, SOL_SOCKET, SO_ERROR, &errcode, &len);
1389 // Check readable descriptors. If we're waiting on an accept, signal
1390 // that. Otherwise we're waiting for data, check to see if we're
1391 // readable or really closed.
1392 // TODO: Only peek at TCP descriptors.
1393 if (FD_ISSET(fd, &fdsRead)) {
1394 FD_CLR(fd, &fdsRead);
1395 if (pdispatcher->GetRequestedEvents() & DE_ACCEPT) {
1397 } else if (errcode || pdispatcher->IsDescriptorClosed()) {
1404 // Check writable descriptors. If we're waiting on a connect, detect
1405 // success versus failure by the reaped error code.
1406 if (FD_ISSET(fd, &fdsWrite)) {
1407 FD_CLR(fd, &fdsWrite);
1408 if (pdispatcher->GetRequestedEvents() & DE_CONNECT) {
1419 // Tell the descriptor about the event.
1421 pdispatcher->OnPreEvent(ff);
1422 pdispatcher->OnEvent(ff, errcode);
1427 // Recalc the time remaining to wait. Doing it here means it doesn't get
1428 // calced twice the first time through the loop
1430 ptvWait->tv_sec = 0;
1431 ptvWait->tv_usec = 0;
1433 gettimeofday(&tvT, NULL);
1434 if ((tvStop.tv_sec > tvT.tv_sec)
1435 || ((tvStop.tv_sec == tvT.tv_sec)
1436 && (tvStop.tv_usec > tvT.tv_usec))) {
1437 ptvWait->tv_sec = tvStop.tv_sec - tvT.tv_sec;
1438 ptvWait->tv_usec = tvStop.tv_usec - tvT.tv_usec;
1439 if (ptvWait->tv_usec < 0) {
1440 ASSERT(ptvWait->tv_sec > 0);
1441 ptvWait->tv_usec += 1000000;
1442 ptvWait->tv_sec -= 1;
1451 static void GlobalSignalHandler(int signum) {
1452 PosixSignalHandler::Instance()->OnPosixSignalReceived(signum);
1455 bool PhysicalSocketServer::SetPosixSignalHandler(int signum,
1456 void (*handler)(int)) {
1457 // If handler is SIG_IGN or SIG_DFL then clear our user-level handler,
1458 // otherwise set one.
1459 if (handler == SIG_IGN || handler == SIG_DFL) {
1460 if (!InstallSignal(signum, handler)) {
1463 if (signal_dispatcher_) {
1464 signal_dispatcher_->ClearHandler(signum);
1465 if (!signal_dispatcher_->HasHandlers()) {
1466 signal_dispatcher_.reset();
1470 if (!signal_dispatcher_) {
1471 signal_dispatcher_.reset(new PosixSignalDispatcher(this));
1473 signal_dispatcher_->SetHandler(signum, handler);
1474 if (!InstallSignal(signum, &GlobalSignalHandler)) {
1481 Dispatcher* PhysicalSocketServer::signal_dispatcher() {
1482 return signal_dispatcher_.get();
1485 bool PhysicalSocketServer::InstallSignal(int signum, void (*handler)(int)) {
1486 struct sigaction act;
1487 // It doesn't really matter what we set this mask to.
1488 if (sigemptyset(&act.sa_mask) != 0) {
1489 LOG_ERR(LS_ERROR) << "Couldn't set mask";
1492 act.sa_handler = handler;
1493 // Use SA_RESTART so that our syscalls don't get EINTR, since we don't need it
1494 // and it's a nuisance. Though some syscalls still return EINTR and there's no
1495 // real standard for which ones. :(
1496 act.sa_flags = SA_RESTART;
1497 if (sigaction(signum, &act, NULL) != 0) {
1498 LOG_ERR(LS_ERROR) << "Couldn't set sigaction";
1506 bool PhysicalSocketServer::Wait(int cmsWait, bool process_io) {
1507 int cmsTotal = cmsWait;
1509 uint32 msStart = Time();
1512 if (last_tick_dispatch_count_ == 0) {
1513 last_tick_tracked_ = msStart;
1519 std::vector<WSAEVENT> events;
1520 std::vector<Dispatcher *> event_owners;
1522 events.push_back(socket_ev_);
1525 CritScope cr(&crit_);
1527 iterators_.push_back(&i);
1528 // Don't track dispatchers_.size(), because we want to pick up any new
1529 // dispatchers that were added while processing the loop.
1530 while (i < dispatchers_.size()) {
1531 Dispatcher* disp = dispatchers_[i++];
1532 if (!process_io && (disp != signal_wakeup_))
1534 SOCKET s = disp->GetSocket();
1535 if (disp->CheckSignalClose()) {
1536 // We just signalled close, don't poll this socket
1537 } else if (s != INVALID_SOCKET) {
1540 FlagsToEvents(disp->GetRequestedEvents()));
1542 events.push_back(disp->GetWSAEvent());
1543 event_owners.push_back(disp);
1546 ASSERT(iterators_.back() == &i);
1547 iterators_.pop_back();
1550 // Which is shorter, the delay wait or the asked wait?
1553 if (cmsWait == kForever) {
1556 cmsNext = _max(0, cmsTotal - cmsElapsed);
1559 // Wait for one of the events to signal
1560 DWORD dw = WSAWaitForMultipleEvents(static_cast<DWORD>(events.size()),
1567 // we track this information purely for logging purposes.
1568 last_tick_dispatch_count_++;
1569 if (last_tick_dispatch_count_ >= 1000) {
1570 int32 elapsed = TimeSince(last_tick_tracked_);
1571 LOG(INFO) << "PhysicalSocketServer took " << elapsed
1572 << "ms for 1000 events";
1574 // If we get more than 1000 events in a second, we are spinning badly
1575 // (normally it should take about 8-20 seconds).
1576 ASSERT(elapsed > 1000);
1578 last_tick_tracked_ = Time();
1579 last_tick_dispatch_count_ = 0;
1583 if (dw == WSA_WAIT_FAILED) {
1585 // TODO: need a better strategy than this!
1586 int error = WSAGetLastError();
1589 } else if (dw == WSA_WAIT_TIMEOUT) {
1593 // Figure out which one it is and call it
1594 CritScope cr(&crit_);
1595 int index = dw - WSA_WAIT_EVENT_0;
1597 --index; // The first event is the socket event
1598 event_owners[index]->OnPreEvent(0);
1599 event_owners[index]->OnEvent(0, 0);
1600 } else if (process_io) {
1601 size_t i = 0, end = dispatchers_.size();
1602 iterators_.push_back(&i);
1603 iterators_.push_back(&end); // Don't iterate over new dispatchers.
1605 Dispatcher* disp = dispatchers_[i++];
1606 SOCKET s = disp->GetSocket();
1607 if (s == INVALID_SOCKET)
1610 WSANETWORKEVENTS wsaEvents;
1611 int err = WSAEnumNetworkEvents(s, events[0], &wsaEvents);
1616 if ((wsaEvents.lNetworkEvents & FD_READ) &&
1617 wsaEvents.iErrorCode[FD_READ_BIT] != 0) {
1618 LOG(WARNING) << "PhysicalSocketServer got FD_READ_BIT error "
1619 << wsaEvents.iErrorCode[FD_READ_BIT];
1621 if ((wsaEvents.lNetworkEvents & FD_WRITE) &&
1622 wsaEvents.iErrorCode[FD_WRITE_BIT] != 0) {
1623 LOG(WARNING) << "PhysicalSocketServer got FD_WRITE_BIT error "
1624 << wsaEvents.iErrorCode[FD_WRITE_BIT];
1626 if ((wsaEvents.lNetworkEvents & FD_CONNECT) &&
1627 wsaEvents.iErrorCode[FD_CONNECT_BIT] != 0) {
1628 LOG(WARNING) << "PhysicalSocketServer got FD_CONNECT_BIT error "
1629 << wsaEvents.iErrorCode[FD_CONNECT_BIT];
1631 if ((wsaEvents.lNetworkEvents & FD_ACCEPT) &&
1632 wsaEvents.iErrorCode[FD_ACCEPT_BIT] != 0) {
1633 LOG(WARNING) << "PhysicalSocketServer got FD_ACCEPT_BIT error "
1634 << wsaEvents.iErrorCode[FD_ACCEPT_BIT];
1636 if ((wsaEvents.lNetworkEvents & FD_CLOSE) &&
1637 wsaEvents.iErrorCode[FD_CLOSE_BIT] != 0) {
1638 LOG(WARNING) << "PhysicalSocketServer got FD_CLOSE_BIT error "
1639 << wsaEvents.iErrorCode[FD_CLOSE_BIT];
1645 if (wsaEvents.lNetworkEvents & FD_READ)
1647 if (wsaEvents.lNetworkEvents & FD_WRITE)
1649 if (wsaEvents.lNetworkEvents & FD_CONNECT) {
1650 if (wsaEvents.iErrorCode[FD_CONNECT_BIT] == 0) {
1654 errcode = wsaEvents.iErrorCode[FD_CONNECT_BIT];
1657 if (wsaEvents.lNetworkEvents & FD_ACCEPT)
1659 if (wsaEvents.lNetworkEvents & FD_CLOSE) {
1661 errcode = wsaEvents.iErrorCode[FD_CLOSE_BIT];
1664 disp->OnPreEvent(ff);
1665 disp->OnEvent(ff, errcode);
1669 ASSERT(iterators_.back() == &end);
1670 iterators_.pop_back();
1671 ASSERT(iterators_.back() == &i);
1672 iterators_.pop_back();
1675 // Reset the network event until new activity occurs
1676 WSAResetEvent(socket_ev_);
1682 cmsElapsed = TimeSince(msStart);
1683 if ((cmsWait != kForever) && (cmsElapsed >= cmsWait)) {
1693 } // namespace talk_base