Upstream version 10.39.225.0
[platform/framework/web/crosswalk.git] / src / net / socket / tcp_socket_win.cc
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/socket/tcp_socket.h"
6 #include "net/socket/tcp_socket_win.h"
7
8 #include <mstcpip.h>
9
10 #include "base/callback_helpers.h"
11 #include "base/logging.h"
12 #include "base/metrics/stats_counters.h"
13 #include "base/win/windows_version.h"
14 #include "net/base/address_list.h"
15 #include "net/base/connection_type_histograms.h"
16 #include "net/base/io_buffer.h"
17 #include "net/base/ip_endpoint.h"
18 #include "net/base/net_errors.h"
19 #include "net/base/net_util.h"
20 #include "net/base/network_change_notifier.h"
21 #include "net/base/winsock_init.h"
22 #include "net/base/winsock_util.h"
23 #include "net/socket/socket_descriptor.h"
24 #include "net/socket/socket_net_log_params.h"
25
26 namespace net {
27
28 namespace {
29
30 const int kTCPKeepAliveSeconds = 45;
31
32 int SetSocketReceiveBufferSize(SOCKET socket, int32 size) {
33   int rv = setsockopt(socket, SOL_SOCKET, SO_RCVBUF,
34                       reinterpret_cast<const char*>(&size), sizeof(size));
35   int net_error = (rv == 0) ? OK : MapSystemError(WSAGetLastError());
36   DCHECK(!rv) << "Could not set socket receive buffer size: " << net_error;
37   return net_error;
38 }
39
40 int SetSocketSendBufferSize(SOCKET socket, int32 size) {
41   int rv = setsockopt(socket, SOL_SOCKET, SO_SNDBUF,
42                       reinterpret_cast<const char*>(&size), sizeof(size));
43   int net_error = (rv == 0) ? OK : MapSystemError(WSAGetLastError());
44   DCHECK(!rv) << "Could not set socket send buffer size: " << net_error;
45   return net_error;
46 }
47
48 // Disable Nagle.
49 // The Nagle implementation on windows is governed by RFC 896.  The idea
50 // behind Nagle is to reduce small packets on the network.  When Nagle is
51 // enabled, if a partial packet has been sent, the TCP stack will disallow
52 // further *partial* packets until an ACK has been received from the other
53 // side.  Good applications should always strive to send as much data as
54 // possible and avoid partial-packet sends.  However, in most real world
55 // applications, there are edge cases where this does not happen, and two
56 // partial packets may be sent back to back.  For a browser, it is NEVER
57 // a benefit to delay for an RTT before the second packet is sent.
58 //
59 // As a practical example in Chromium today, consider the case of a small
60 // POST.  I have verified this:
61 //     Client writes 649 bytes of header  (partial packet #1)
62 //     Client writes 50 bytes of POST data (partial packet #2)
63 // In the above example, with Nagle, a RTT delay is inserted between these
64 // two sends due to nagle.  RTTs can easily be 100ms or more.  The best
65 // fix is to make sure that for POSTing data, we write as much data as
66 // possible and minimize partial packets.  We will fix that.  But disabling
67 // Nagle also ensure we don't run into this delay in other edge cases.
68 // See also:
69 //    http://technet.microsoft.com/en-us/library/bb726981.aspx
70 bool DisableNagle(SOCKET socket, bool disable) {
71   BOOL val = disable ? TRUE : FALSE;
72   int rv = setsockopt(socket, IPPROTO_TCP, TCP_NODELAY,
73                       reinterpret_cast<const char*>(&val),
74                       sizeof(val));
75   DCHECK(!rv) << "Could not disable nagle";
76   return rv == 0;
77 }
78
79 // Enable TCP Keep-Alive to prevent NAT routers from timing out TCP
80 // connections. See http://crbug.com/27400 for details.
81 bool SetTCPKeepAlive(SOCKET socket, BOOL enable, int delay_secs) {
82   int delay = delay_secs * 1000;
83   struct tcp_keepalive keepalive_vals = {
84     enable ? 1 : 0,  // TCP keep-alive on.
85     delay,  // Delay seconds before sending first TCP keep-alive packet.
86     delay,  // Delay seconds between sending TCP keep-alive packets.
87   };
88   DWORD bytes_returned = 0xABAB;
89   int rv = WSAIoctl(socket, SIO_KEEPALIVE_VALS, &keepalive_vals,
90                     sizeof(keepalive_vals), NULL, 0,
91                     &bytes_returned, NULL, NULL);
92   DCHECK(!rv) << "Could not enable TCP Keep-Alive for socket: " << socket
93               << " [error: " << WSAGetLastError() << "].";
94
95   // Disregard any failure in disabling nagle or enabling TCP Keep-Alive.
96   return rv == 0;
97 }
98
99 int MapConnectError(int os_error) {
100   switch (os_error) {
101     // connect fails with WSAEACCES when Windows Firewall blocks the
102     // connection.
103     case WSAEACCES:
104       return ERR_NETWORK_ACCESS_DENIED;
105     case WSAETIMEDOUT:
106       return ERR_CONNECTION_TIMED_OUT;
107     default: {
108       int net_error = MapSystemError(os_error);
109       if (net_error == ERR_FAILED)
110         return ERR_CONNECTION_FAILED;  // More specific than ERR_FAILED.
111
112       // Give a more specific error when the user is offline.
113       if (net_error == ERR_ADDRESS_UNREACHABLE &&
114           NetworkChangeNotifier::IsOffline()) {
115         return ERR_INTERNET_DISCONNECTED;
116       }
117
118       return net_error;
119     }
120   }
121 }
122
123 }  // namespace
124
125 //-----------------------------------------------------------------------------
126
127 // Nothing to do for Windows since it doesn't support TCP FastOpen.
128 // TODO(jri): Remove these along with the corresponding global variables.
129 bool IsTCPFastOpenSupported() { return false; }
130 bool IsTCPFastOpenUserEnabled() { return false; }
131 void CheckSupportAndMaybeEnableTCPFastOpen(bool user_enabled) {}
132
133 // This class encapsulates all the state that has to be preserved as long as
134 // there is a network IO operation in progress. If the owner TCPSocketWin is
135 // destroyed while an operation is in progress, the Core is detached and it
136 // lives until the operation completes and the OS doesn't reference any resource
137 // declared on this class anymore.
138 class TCPSocketWin::Core : public base::RefCounted<Core> {
139  public:
140   explicit Core(TCPSocketWin* socket);
141
142   // Start watching for the end of a read or write operation.
143   void WatchForRead();
144   void WatchForWrite();
145
146   // The TCPSocketWin is going away.
147   void Detach() { socket_ = NULL; }
148
149   // The separate OVERLAPPED variables for asynchronous operation.
150   // |read_overlapped_| is used for both Connect() and Read().
151   // |write_overlapped_| is only used for Write();
152   OVERLAPPED read_overlapped_;
153   OVERLAPPED write_overlapped_;
154
155   // The buffers used in Read() and Write().
156   scoped_refptr<IOBuffer> read_iobuffer_;
157   scoped_refptr<IOBuffer> write_iobuffer_;
158   int read_buffer_length_;
159   int write_buffer_length_;
160
161   bool non_blocking_reads_initialized_;
162
163  private:
164   friend class base::RefCounted<Core>;
165
166   class ReadDelegate : public base::win::ObjectWatcher::Delegate {
167    public:
168     explicit ReadDelegate(Core* core) : core_(core) {}
169     virtual ~ReadDelegate() {}
170
171     // base::ObjectWatcher::Delegate methods:
172     virtual void OnObjectSignaled(HANDLE object);
173
174    private:
175     Core* const core_;
176   };
177
178   class WriteDelegate : public base::win::ObjectWatcher::Delegate {
179    public:
180     explicit WriteDelegate(Core* core) : core_(core) {}
181     virtual ~WriteDelegate() {}
182
183     // base::ObjectWatcher::Delegate methods:
184     virtual void OnObjectSignaled(HANDLE object);
185
186    private:
187     Core* const core_;
188   };
189
190   ~Core();
191
192   // The socket that created this object.
193   TCPSocketWin* socket_;
194
195   // |reader_| handles the signals from |read_watcher_|.
196   ReadDelegate reader_;
197   // |writer_| handles the signals from |write_watcher_|.
198   WriteDelegate writer_;
199
200   // |read_watcher_| watches for events from Connect() and Read().
201   base::win::ObjectWatcher read_watcher_;
202   // |write_watcher_| watches for events from Write();
203   base::win::ObjectWatcher write_watcher_;
204
205   DISALLOW_COPY_AND_ASSIGN(Core);
206 };
207
208 TCPSocketWin::Core::Core(TCPSocketWin* socket)
209     : read_buffer_length_(0),
210       write_buffer_length_(0),
211       non_blocking_reads_initialized_(false),
212       socket_(socket),
213       reader_(this),
214       writer_(this) {
215   memset(&read_overlapped_, 0, sizeof(read_overlapped_));
216   memset(&write_overlapped_, 0, sizeof(write_overlapped_));
217
218   read_overlapped_.hEvent = WSACreateEvent();
219   write_overlapped_.hEvent = WSACreateEvent();
220 }
221
222 TCPSocketWin::Core::~Core() {
223   // Make sure the message loop is not watching this object anymore.
224   read_watcher_.StopWatching();
225   write_watcher_.StopWatching();
226
227   WSACloseEvent(read_overlapped_.hEvent);
228   memset(&read_overlapped_, 0xaf, sizeof(read_overlapped_));
229   WSACloseEvent(write_overlapped_.hEvent);
230   memset(&write_overlapped_, 0xaf, sizeof(write_overlapped_));
231 }
232
233 void TCPSocketWin::Core::WatchForRead() {
234   // We grab an extra reference because there is an IO operation in progress.
235   // Balanced in ReadDelegate::OnObjectSignaled().
236   AddRef();
237   read_watcher_.StartWatching(read_overlapped_.hEvent, &reader_);
238 }
239
240 void TCPSocketWin::Core::WatchForWrite() {
241   // We grab an extra reference because there is an IO operation in progress.
242   // Balanced in WriteDelegate::OnObjectSignaled().
243   AddRef();
244   write_watcher_.StartWatching(write_overlapped_.hEvent, &writer_);
245 }
246
247 void TCPSocketWin::Core::ReadDelegate::OnObjectSignaled(HANDLE object) {
248   DCHECK_EQ(object, core_->read_overlapped_.hEvent);
249   if (core_->socket_) {
250     if (core_->socket_->waiting_connect_)
251       core_->socket_->DidCompleteConnect();
252     else
253       core_->socket_->DidSignalRead();
254   }
255
256   core_->Release();
257 }
258
259 void TCPSocketWin::Core::WriteDelegate::OnObjectSignaled(
260     HANDLE object) {
261   DCHECK_EQ(object, core_->write_overlapped_.hEvent);
262   if (core_->socket_)
263     core_->socket_->DidCompleteWrite();
264
265   core_->Release();
266 }
267
268 //-----------------------------------------------------------------------------
269
270 TCPSocketWin::TCPSocketWin(net::NetLog* net_log,
271                            const net::NetLog::Source& source)
272     : socket_(INVALID_SOCKET),
273       accept_event_(WSA_INVALID_EVENT),
274       accept_socket_(NULL),
275       accept_address_(NULL),
276       waiting_connect_(false),
277       waiting_read_(false),
278       waiting_write_(false),
279       connect_os_error_(0),
280       logging_multiple_connect_attempts_(false),
281       net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)) {
282   net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE,
283                       source.ToEventParametersCallback());
284   EnsureWinsockInit();
285 }
286
287 TCPSocketWin::~TCPSocketWin() {
288   Close();
289   net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE);
290 }
291
292 int TCPSocketWin::Open(AddressFamily family) {
293   DCHECK(CalledOnValidThread());
294   DCHECK_EQ(socket_, INVALID_SOCKET);
295
296   socket_ = CreatePlatformSocket(ConvertAddressFamily(family), SOCK_STREAM,
297                                  IPPROTO_TCP);
298   if (socket_ == INVALID_SOCKET) {
299     PLOG(ERROR) << "CreatePlatformSocket() returned an error";
300     return MapSystemError(WSAGetLastError());
301   }
302
303   if (SetNonBlocking(socket_)) {
304     int result = MapSystemError(WSAGetLastError());
305     Close();
306     return result;
307   }
308
309   return OK;
310 }
311
312 int TCPSocketWin::AdoptConnectedSocket(SOCKET socket,
313                                        const IPEndPoint& peer_address) {
314   DCHECK(CalledOnValidThread());
315   DCHECK_EQ(socket_, INVALID_SOCKET);
316   DCHECK(!core_);
317
318   socket_ = socket;
319
320   if (SetNonBlocking(socket_)) {
321     int result = MapSystemError(WSAGetLastError());
322     Close();
323     return result;
324   }
325
326   core_ = new Core(this);
327   peer_address_.reset(new IPEndPoint(peer_address));
328
329   return OK;
330 }
331
332 int TCPSocketWin::AdoptListenSocket(SOCKET socket) {
333   DCHECK(CalledOnValidThread());
334   DCHECK_EQ(socket_, INVALID_SOCKET);
335
336   socket_ = socket;
337
338   if (SetNonBlocking(socket_)) {
339     int result = MapSystemError(WSAGetLastError());
340     Close();
341     return result;
342   }
343
344   // |core_| is not needed for sockets that are used to accept connections.
345   // The operation here is more like Open but with an existing socket.
346
347   return OK;
348 }
349
350 int TCPSocketWin::Bind(const IPEndPoint& address) {
351   DCHECK(CalledOnValidThread());
352   DCHECK_NE(socket_, INVALID_SOCKET);
353
354   SockaddrStorage storage;
355   if (!address.ToSockAddr(storage.addr, &storage.addr_len))
356     return ERR_ADDRESS_INVALID;
357
358   int result = bind(socket_, storage.addr, storage.addr_len);
359   if (result < 0) {
360     PLOG(ERROR) << "bind() returned an error";
361     return MapSystemError(WSAGetLastError());
362   }
363
364   return OK;
365 }
366
367 int TCPSocketWin::Listen(int backlog) {
368   DCHECK(CalledOnValidThread());
369   DCHECK_GT(backlog, 0);
370   DCHECK_NE(socket_, INVALID_SOCKET);
371   DCHECK_EQ(accept_event_, WSA_INVALID_EVENT);
372
373   accept_event_ = WSACreateEvent();
374   if (accept_event_ == WSA_INVALID_EVENT) {
375     PLOG(ERROR) << "WSACreateEvent()";
376     return MapSystemError(WSAGetLastError());
377   }
378
379   int result = listen(socket_, backlog);
380   if (result < 0) {
381     PLOG(ERROR) << "listen() returned an error";
382     return MapSystemError(WSAGetLastError());
383   }
384
385   return OK;
386 }
387
388 int TCPSocketWin::Accept(scoped_ptr<TCPSocketWin>* socket,
389                          IPEndPoint* address,
390                          const CompletionCallback& callback) {
391   DCHECK(CalledOnValidThread());
392   DCHECK(socket);
393   DCHECK(address);
394   DCHECK(!callback.is_null());
395   DCHECK(accept_callback_.is_null());
396
397   net_log_.BeginEvent(NetLog::TYPE_TCP_ACCEPT);
398
399   int result = AcceptInternal(socket, address);
400
401   if (result == ERR_IO_PENDING) {
402     // Start watching.
403     WSAEventSelect(socket_, accept_event_, FD_ACCEPT);
404     accept_watcher_.StartWatching(accept_event_, this);
405
406     accept_socket_ = socket;
407     accept_address_ = address;
408     accept_callback_ = callback;
409   }
410
411   return result;
412 }
413
414 int TCPSocketWin::Connect(const IPEndPoint& address,
415                           const CompletionCallback& callback) {
416   DCHECK(CalledOnValidThread());
417   DCHECK_NE(socket_, INVALID_SOCKET);
418   DCHECK(!waiting_connect_);
419
420   // |peer_address_| and |core_| will be non-NULL if Connect() has been called.
421   // Unless Close() is called to reset the internal state, a second call to
422   // Connect() is not allowed.
423   // Please note that we enforce this even if the previous Connect() has
424   // completed and failed. Although it is allowed to connect the same |socket_|
425   // again after a connection attempt failed on Windows, it results in
426   // unspecified behavior according to POSIX. Therefore, we make it behave in
427   // the same way as TCPSocketLibevent.
428   DCHECK(!peer_address_ && !core_);
429
430   if (!logging_multiple_connect_attempts_)
431     LogConnectBegin(AddressList(address));
432
433   peer_address_.reset(new IPEndPoint(address));
434
435   int rv = DoConnect();
436   if (rv == ERR_IO_PENDING) {
437     // Synchronous operation not supported.
438     DCHECK(!callback.is_null());
439     read_callback_ = callback;
440     waiting_connect_ = true;
441   } else {
442     DoConnectComplete(rv);
443   }
444
445   return rv;
446 }
447
448 bool TCPSocketWin::IsConnected() const {
449   DCHECK(CalledOnValidThread());
450
451   if (socket_ == INVALID_SOCKET || waiting_connect_)
452     return false;
453
454   if (waiting_read_)
455     return true;
456
457   // Check if connection is alive.
458   char c;
459   int rv = recv(socket_, &c, 1, MSG_PEEK);
460   if (rv == 0)
461     return false;
462   if (rv == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK)
463     return false;
464
465   return true;
466 }
467
468 bool TCPSocketWin::IsConnectedAndIdle() const {
469   DCHECK(CalledOnValidThread());
470
471   if (socket_ == INVALID_SOCKET || waiting_connect_)
472     return false;
473
474   if (waiting_read_)
475     return true;
476
477   // Check if connection is alive and we haven't received any data
478   // unexpectedly.
479   char c;
480   int rv = recv(socket_, &c, 1, MSG_PEEK);
481   if (rv >= 0)
482     return false;
483   if (WSAGetLastError() != WSAEWOULDBLOCK)
484     return false;
485
486   return true;
487 }
488
489 int TCPSocketWin::Read(IOBuffer* buf,
490                        int buf_len,
491                        const CompletionCallback& callback) {
492   DCHECK(CalledOnValidThread());
493   DCHECK_NE(socket_, INVALID_SOCKET);
494   DCHECK(!waiting_read_);
495   DCHECK(read_callback_.is_null());
496   DCHECK(!core_->read_iobuffer_);
497
498   return DoRead(buf, buf_len, callback);
499 }
500
501 int TCPSocketWin::Write(IOBuffer* buf,
502                         int buf_len,
503                         const CompletionCallback& callback) {
504   DCHECK(CalledOnValidThread());
505   DCHECK_NE(socket_, INVALID_SOCKET);
506   DCHECK(!waiting_write_);
507   DCHECK(write_callback_.is_null());
508   DCHECK_GT(buf_len, 0);
509   DCHECK(!core_->write_iobuffer_);
510
511   base::StatsCounter writes("tcp.writes");
512   writes.Increment();
513
514   WSABUF write_buffer;
515   write_buffer.len = buf_len;
516   write_buffer.buf = buf->data();
517
518   // TODO(wtc): Remove the assertion after enough testing.
519   AssertEventNotSignaled(core_->write_overlapped_.hEvent);
520   DWORD num;
521   int rv = WSASend(socket_, &write_buffer, 1, &num, 0,
522                    &core_->write_overlapped_, NULL);
523   if (rv == 0) {
524     if (ResetEventIfSignaled(core_->write_overlapped_.hEvent)) {
525       rv = static_cast<int>(num);
526       if (rv > buf_len || rv < 0) {
527         // It seems that some winsock interceptors report that more was written
528         // than was available. Treat this as an error.  http://crbug.com/27870
529         LOG(ERROR) << "Detected broken LSP: Asked to write " << buf_len
530                    << " bytes, but " << rv << " bytes reported.";
531         return ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
532       }
533       base::StatsCounter write_bytes("tcp.write_bytes");
534       write_bytes.Add(rv);
535       net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, rv,
536                                     buf->data());
537       return rv;
538     }
539   } else {
540     int os_error = WSAGetLastError();
541     if (os_error != WSA_IO_PENDING) {
542       int net_error = MapSystemError(os_error);
543       net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
544                         CreateNetLogSocketErrorCallback(net_error, os_error));
545       return net_error;
546     }
547   }
548   waiting_write_ = true;
549   write_callback_ = callback;
550   core_->write_iobuffer_ = buf;
551   core_->write_buffer_length_ = buf_len;
552   core_->WatchForWrite();
553   return ERR_IO_PENDING;
554 }
555
556 int TCPSocketWin::GetLocalAddress(IPEndPoint* address) const {
557   DCHECK(CalledOnValidThread());
558   DCHECK(address);
559
560   SockaddrStorage storage;
561   if (getsockname(socket_, storage.addr, &storage.addr_len))
562     return MapSystemError(WSAGetLastError());
563   if (!address->FromSockAddr(storage.addr, storage.addr_len))
564     return ERR_ADDRESS_INVALID;
565
566   return OK;
567 }
568
569 int TCPSocketWin::GetPeerAddress(IPEndPoint* address) const {
570   DCHECK(CalledOnValidThread());
571   DCHECK(address);
572   if (!IsConnected())
573     return ERR_SOCKET_NOT_CONNECTED;
574   *address = *peer_address_;
575   return OK;
576 }
577
578 int TCPSocketWin::SetDefaultOptionsForServer() {
579   return SetExclusiveAddrUse();
580 }
581
582 void TCPSocketWin::SetDefaultOptionsForClient() {
583   // Increase the socket buffer sizes from the default sizes for WinXP.  In
584   // performance testing, there is substantial benefit by increasing from 8KB
585   // to 64KB.
586   // See also:
587   //    http://support.microsoft.com/kb/823764/EN-US
588   // On Vista, if we manually set these sizes, Vista turns off its receive
589   // window auto-tuning feature.
590   //    http://blogs.msdn.com/wndp/archive/2006/05/05/Winhec-blog-tcpip-2.aspx
591   // Since Vista's auto-tune is better than any static value we can could set,
592   // only change these on pre-vista machines.
593   if (base::win::GetVersion() < base::win::VERSION_VISTA) {
594     const int32 kSocketBufferSize = 64 * 1024;
595     SetSocketReceiveBufferSize(socket_, kSocketBufferSize);
596     SetSocketSendBufferSize(socket_, kSocketBufferSize);
597   }
598
599   DisableNagle(socket_, true);
600   SetTCPKeepAlive(socket_, true, kTCPKeepAliveSeconds);
601 }
602
603 int TCPSocketWin::SetExclusiveAddrUse() {
604   // On Windows, a bound end point can be hijacked by another process by
605   // setting SO_REUSEADDR. Therefore a Windows-only option SO_EXCLUSIVEADDRUSE
606   // was introduced in Windows NT 4.0 SP4. If the socket that is bound to the
607   // end point has SO_EXCLUSIVEADDRUSE enabled, it is not possible for another
608   // socket to forcibly bind to the end point until the end point is unbound.
609   // It is recommend that all server applications must use SO_EXCLUSIVEADDRUSE.
610   // MSDN: http://goo.gl/M6fjQ.
611   //
612   // Unlike on *nix, on Windows a TCP server socket can always bind to an end
613   // point in TIME_WAIT state without setting SO_REUSEADDR, therefore it is not
614   // needed here.
615   //
616   // SO_EXCLUSIVEADDRUSE will prevent a TCP client socket from binding to an end
617   // point in TIME_WAIT status. It does not have this effect for a TCP server
618   // socket.
619
620   BOOL true_value = 1;
621   int rv = setsockopt(socket_, SOL_SOCKET, SO_EXCLUSIVEADDRUSE,
622                       reinterpret_cast<const char*>(&true_value),
623                       sizeof(true_value));
624   if (rv < 0)
625     return MapSystemError(errno);
626   return OK;
627 }
628
629 int TCPSocketWin::SetReceiveBufferSize(int32 size) {
630   DCHECK(CalledOnValidThread());
631   return SetSocketReceiveBufferSize(socket_, size);
632 }
633
634 int TCPSocketWin::SetSendBufferSize(int32 size) {
635   DCHECK(CalledOnValidThread());
636   return SetSocketSendBufferSize(socket_, size);
637 }
638
639 bool TCPSocketWin::SetKeepAlive(bool enable, int delay) {
640   return SetTCPKeepAlive(socket_, enable, delay);
641 }
642
643 bool TCPSocketWin::SetNoDelay(bool no_delay) {
644   return DisableNagle(socket_, no_delay);
645 }
646
647 void TCPSocketWin::Close() {
648   DCHECK(CalledOnValidThread());
649
650   if (socket_ != INVALID_SOCKET) {
651     // Only log the close event if there's actually a socket to close.
652     net_log_.AddEvent(NetLog::EventType::TYPE_SOCKET_CLOSED);
653
654     // Note: don't use CancelIo to cancel pending IO because it doesn't work
655     // when there is a Winsock layered service provider.
656
657     // In most socket implementations, closing a socket results in a graceful
658     // connection shutdown, but in Winsock we have to call shutdown explicitly.
659     // See the MSDN page "Graceful Shutdown, Linger Options, and Socket Closure"
660     // at http://msdn.microsoft.com/en-us/library/ms738547.aspx
661     shutdown(socket_, SD_SEND);
662
663     // This cancels any pending IO.
664     if (closesocket(socket_) < 0)
665       PLOG(ERROR) << "closesocket";
666     socket_ = INVALID_SOCKET;
667   }
668
669   if (!accept_callback_.is_null()) {
670     accept_watcher_.StopWatching();
671     accept_socket_ = NULL;
672     accept_address_ = NULL;
673     accept_callback_.Reset();
674   }
675
676   if (accept_event_) {
677     WSACloseEvent(accept_event_);
678     accept_event_ = WSA_INVALID_EVENT;
679   }
680
681   if (core_) {
682     if (waiting_connect_) {
683       // We closed the socket, so this notification will never come.
684       // From MSDN' WSAEventSelect documentation:
685       // "Closing a socket with closesocket also cancels the association and
686       // selection of network events specified in WSAEventSelect for the
687       // socket".
688       core_->Release();
689     }
690     core_->Detach();
691     core_ = NULL;
692   }
693
694   waiting_connect_ = false;
695   waiting_read_ = false;
696   waiting_write_ = false;
697
698   read_callback_.Reset();
699   write_callback_.Reset();
700   peer_address_.reset();
701   connect_os_error_ = 0;
702 }
703
704 void TCPSocketWin::StartLoggingMultipleConnectAttempts(
705     const AddressList& addresses) {
706   if (!logging_multiple_connect_attempts_) {
707     logging_multiple_connect_attempts_ = true;
708     LogConnectBegin(addresses);
709   } else {
710     NOTREACHED();
711   }
712 }
713
714 void TCPSocketWin::EndLoggingMultipleConnectAttempts(int net_error) {
715   if (logging_multiple_connect_attempts_) {
716     LogConnectEnd(net_error);
717     logging_multiple_connect_attempts_ = false;
718   } else {
719     NOTREACHED();
720   }
721 }
722
723 int TCPSocketWin::AcceptInternal(scoped_ptr<TCPSocketWin>* socket,
724                                  IPEndPoint* address) {
725   SockaddrStorage storage;
726   int new_socket = accept(socket_, storage.addr, &storage.addr_len);
727   if (new_socket < 0) {
728     int net_error = MapSystemError(WSAGetLastError());
729     if (net_error != ERR_IO_PENDING)
730       net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error);
731     return net_error;
732   }
733
734   IPEndPoint ip_end_point;
735   if (!ip_end_point.FromSockAddr(storage.addr, storage.addr_len)) {
736     NOTREACHED();
737     if (closesocket(new_socket) < 0)
738       PLOG(ERROR) << "closesocket";
739     int net_error = ERR_ADDRESS_INVALID;
740     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error);
741     return net_error;
742   }
743   scoped_ptr<TCPSocketWin> tcp_socket(new TCPSocketWin(
744       net_log_.net_log(), net_log_.source()));
745   int adopt_result = tcp_socket->AdoptConnectedSocket(new_socket, ip_end_point);
746   if (adopt_result != OK) {
747     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, adopt_result);
748     return adopt_result;
749   }
750   *socket = tcp_socket.Pass();
751   *address = ip_end_point;
752   net_log_.EndEvent(NetLog::TYPE_TCP_ACCEPT,
753                     CreateNetLogIPEndPointCallback(&ip_end_point));
754   return OK;
755 }
756
757 void TCPSocketWin::OnObjectSignaled(HANDLE object) {
758   WSANETWORKEVENTS ev;
759   if (WSAEnumNetworkEvents(socket_, accept_event_, &ev) == SOCKET_ERROR) {
760     PLOG(ERROR) << "WSAEnumNetworkEvents()";
761     return;
762   }
763
764   if (ev.lNetworkEvents & FD_ACCEPT) {
765     int result = AcceptInternal(accept_socket_, accept_address_);
766     if (result != ERR_IO_PENDING) {
767       accept_socket_ = NULL;
768       accept_address_ = NULL;
769       base::ResetAndReturn(&accept_callback_).Run(result);
770     }
771   } else {
772     // This happens when a client opens a connection and closes it before we
773     // have a chance to accept it.
774     DCHECK(ev.lNetworkEvents == 0);
775
776     // Start watching the next FD_ACCEPT event.
777     WSAEventSelect(socket_, accept_event_, FD_ACCEPT);
778     accept_watcher_.StartWatching(accept_event_, this);
779   }
780 }
781
782 int TCPSocketWin::DoConnect() {
783   DCHECK_EQ(connect_os_error_, 0);
784   DCHECK(!core_);
785
786   net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
787                       CreateNetLogIPEndPointCallback(peer_address_.get()));
788
789   core_ = new Core(this);
790   // WSAEventSelect sets the socket to non-blocking mode as a side effect.
791   // Our connect() and recv() calls require that the socket be non-blocking.
792   WSAEventSelect(socket_, core_->read_overlapped_.hEvent, FD_CONNECT);
793
794   SockaddrStorage storage;
795   if (!peer_address_->ToSockAddr(storage.addr, &storage.addr_len))
796     return ERR_ADDRESS_INVALID;
797   if (!connect(socket_, storage.addr, storage.addr_len)) {
798     // Connected without waiting!
799     //
800     // The MSDN page for connect says:
801     //   With a nonblocking socket, the connection attempt cannot be completed
802     //   immediately. In this case, connect will return SOCKET_ERROR, and
803     //   WSAGetLastError will return WSAEWOULDBLOCK.
804     // which implies that for a nonblocking socket, connect never returns 0.
805     // It's not documented whether the event object will be signaled or not
806     // if connect does return 0.  So the code below is essentially dead code
807     // and we don't know if it's correct.
808     NOTREACHED();
809
810     if (ResetEventIfSignaled(core_->read_overlapped_.hEvent))
811       return OK;
812   } else {
813     int os_error = WSAGetLastError();
814     if (os_error != WSAEWOULDBLOCK) {
815       LOG(ERROR) << "connect failed: " << os_error;
816       connect_os_error_ = os_error;
817       int rv = MapConnectError(os_error);
818       CHECK_NE(ERR_IO_PENDING, rv);
819       return rv;
820     }
821   }
822
823   core_->WatchForRead();
824   return ERR_IO_PENDING;
825 }
826
827 void TCPSocketWin::DoConnectComplete(int result) {
828   // Log the end of this attempt (and any OS error it threw).
829   int os_error = connect_os_error_;
830   connect_os_error_ = 0;
831   if (result != OK) {
832     net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
833                       NetLog::IntegerCallback("os_error", os_error));
834   } else {
835     net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT);
836   }
837
838   if (!logging_multiple_connect_attempts_)
839     LogConnectEnd(result);
840 }
841
842 void TCPSocketWin::LogConnectBegin(const AddressList& addresses) {
843   base::StatsCounter connects("tcp.connect");
844   connects.Increment();
845
846   net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT,
847                       addresses.CreateNetLogCallback());
848 }
849
850 void TCPSocketWin::LogConnectEnd(int net_error) {
851   if (net_error == OK)
852     UpdateConnectionTypeHistograms(CONNECTION_ANY);
853
854   if (net_error != OK) {
855     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, net_error);
856     return;
857   }
858
859   struct sockaddr_storage source_address;
860   socklen_t addrlen = sizeof(source_address);
861   int rv = getsockname(
862       socket_, reinterpret_cast<struct sockaddr*>(&source_address), &addrlen);
863   if (rv != 0) {
864     LOG(ERROR) << "getsockname() [rv: " << rv
865                << "] error: " << WSAGetLastError();
866     NOTREACHED();
867     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, rv);
868     return;
869   }
870
871   net_log_.EndEvent(
872       NetLog::TYPE_TCP_CONNECT,
873       CreateNetLogSourceAddressCallback(
874           reinterpret_cast<const struct sockaddr*>(&source_address),
875           sizeof(source_address)));
876 }
877
878 int TCPSocketWin::DoRead(IOBuffer* buf, int buf_len,
879                          const CompletionCallback& callback) {
880   if (!core_->non_blocking_reads_initialized_) {
881     WSAEventSelect(socket_, core_->read_overlapped_.hEvent,
882                    FD_READ | FD_CLOSE);
883     core_->non_blocking_reads_initialized_ = true;
884   }
885   int rv = recv(socket_, buf->data(), buf_len, 0);
886   if (rv == SOCKET_ERROR) {
887     int os_error = WSAGetLastError();
888     if (os_error != WSAEWOULDBLOCK) {
889       int net_error = MapSystemError(os_error);
890       net_log_.AddEvent(
891           NetLog::TYPE_SOCKET_READ_ERROR,
892           CreateNetLogSocketErrorCallback(net_error, os_error));
893       return net_error;
894     }
895   } else {
896     base::StatsCounter read_bytes("tcp.read_bytes");
897     if (rv > 0)
898       read_bytes.Add(rv);
899     net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, rv,
900                                   buf->data());
901     return rv;
902   }
903
904   waiting_read_ = true;
905   read_callback_ = callback;
906   core_->read_iobuffer_ = buf;
907   core_->read_buffer_length_ = buf_len;
908   core_->WatchForRead();
909   return ERR_IO_PENDING;
910 }
911
912 void TCPSocketWin::DidCompleteConnect() {
913   DCHECK(waiting_connect_);
914   DCHECK(!read_callback_.is_null());
915   int result;
916
917   WSANETWORKEVENTS events;
918   int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
919                                 &events);
920   int os_error = 0;
921   if (rv == SOCKET_ERROR) {
922     NOTREACHED();
923     os_error = WSAGetLastError();
924     result = MapSystemError(os_error);
925   } else if (events.lNetworkEvents & FD_CONNECT) {
926     os_error = events.iErrorCode[FD_CONNECT_BIT];
927     result = MapConnectError(os_error);
928   } else {
929     NOTREACHED();
930     result = ERR_UNEXPECTED;
931   }
932
933   connect_os_error_ = os_error;
934   DoConnectComplete(result);
935   waiting_connect_ = false;
936
937   DCHECK_NE(result, ERR_IO_PENDING);
938   base::ResetAndReturn(&read_callback_).Run(result);
939 }
940
941 void TCPSocketWin::DidCompleteWrite() {
942   DCHECK(waiting_write_);
943   DCHECK(!write_callback_.is_null());
944
945   DWORD num_bytes, flags;
946   BOOL ok = WSAGetOverlappedResult(socket_, &core_->write_overlapped_,
947                                    &num_bytes, FALSE, &flags);
948   WSAResetEvent(core_->write_overlapped_.hEvent);
949   waiting_write_ = false;
950   int rv;
951   if (!ok) {
952     int os_error = WSAGetLastError();
953     rv = MapSystemError(os_error);
954     net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
955                       CreateNetLogSocketErrorCallback(rv, os_error));
956   } else {
957     rv = static_cast<int>(num_bytes);
958     if (rv > core_->write_buffer_length_ || rv < 0) {
959       // It seems that some winsock interceptors report that more was written
960       // than was available. Treat this as an error.  http://crbug.com/27870
961       LOG(ERROR) << "Detected broken LSP: Asked to write "
962                  << core_->write_buffer_length_ << " bytes, but " << rv
963                  << " bytes reported.";
964       rv = ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
965     } else {
966       base::StatsCounter write_bytes("tcp.write_bytes");
967       write_bytes.Add(num_bytes);
968       net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, num_bytes,
969                                     core_->write_iobuffer_->data());
970     }
971   }
972
973   core_->write_iobuffer_ = NULL;
974
975   DCHECK_NE(rv, ERR_IO_PENDING);
976   base::ResetAndReturn(&write_callback_).Run(rv);
977 }
978
979 void TCPSocketWin::DidSignalRead() {
980   DCHECK(waiting_read_);
981   DCHECK(!read_callback_.is_null());
982
983   int os_error = 0;
984   WSANETWORKEVENTS network_events;
985   int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
986                                 &network_events);
987   if (rv == SOCKET_ERROR) {
988     os_error = WSAGetLastError();
989     rv = MapSystemError(os_error);
990   } else if (network_events.lNetworkEvents) {
991     DCHECK_EQ(network_events.lNetworkEvents & ~(FD_READ | FD_CLOSE), 0);
992     // If network_events.lNetworkEvents is FD_CLOSE and
993     // network_events.iErrorCode[FD_CLOSE_BIT] is 0, it is a graceful
994     // connection closure. It is tempting to directly set rv to 0 in
995     // this case, but the MSDN pages for WSAEventSelect and
996     // WSAAsyncSelect recommend we still call DoRead():
997     //   FD_CLOSE should only be posted after all data is read from a
998     //   socket, but an application should check for remaining data upon
999     //   receipt of FD_CLOSE to avoid any possibility of losing data.
1000     //
1001     // If network_events.iErrorCode[FD_READ_BIT] or
1002     // network_events.iErrorCode[FD_CLOSE_BIT] is nonzero, still call
1003     // DoRead() because recv() reports a more accurate error code
1004     // (WSAECONNRESET vs. WSAECONNABORTED) when the connection was
1005     // reset.
1006     rv = DoRead(core_->read_iobuffer_, core_->read_buffer_length_,
1007                 read_callback_);
1008     if (rv == ERR_IO_PENDING)
1009       return;
1010   } else {
1011     // This may happen because Read() may succeed synchronously and
1012     // consume all the received data without resetting the event object.
1013     core_->WatchForRead();
1014     return;
1015   }
1016
1017   waiting_read_ = false;
1018   core_->read_iobuffer_ = NULL;
1019   core_->read_buffer_length_ = 0;
1020
1021   DCHECK_NE(rv, ERR_IO_PENDING);
1022   base::ResetAndReturn(&read_callback_).Run(rv);
1023 }
1024
1025 }  // namespace net