- add sources.
[platform/framework/web/crosswalk.git] / src / net / socket / tcp_socket_win.cc
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/socket/tcp_socket_win.h"
6
7 #include <mstcpip.h>
8
9 #include "base/callback_helpers.h"
10 #include "base/logging.h"
11 #include "base/metrics/stats_counters.h"
12 #include "base/win/windows_version.h"
13 #include "net/base/address_list.h"
14 #include "net/base/connection_type_histograms.h"
15 #include "net/base/io_buffer.h"
16 #include "net/base/ip_endpoint.h"
17 #include "net/base/net_errors.h"
18 #include "net/base/net_util.h"
19 #include "net/base/network_change_notifier.h"
20 #include "net/base/winsock_init.h"
21 #include "net/base/winsock_util.h"
22 #include "net/socket/socket_descriptor.h"
23 #include "net/socket/socket_net_log_params.h"
24
25 namespace net {
26
27 namespace {
28
29 const int kTCPKeepAliveSeconds = 45;
30
31 bool SetSocketReceiveBufferSize(SOCKET socket, int32 size) {
32   int rv = setsockopt(socket, SOL_SOCKET, SO_RCVBUF,
33                       reinterpret_cast<const char*>(&size), sizeof(size));
34   DCHECK(!rv) << "Could not set socket receive buffer size: " << GetLastError();
35   return rv == 0;
36 }
37
38 bool SetSocketSendBufferSize(SOCKET socket, int32 size) {
39   int rv = setsockopt(socket, SOL_SOCKET, SO_SNDBUF,
40                       reinterpret_cast<const char*>(&size), sizeof(size));
41   DCHECK(!rv) << "Could not set socket send buffer size: " << GetLastError();
42   return rv == 0;
43 }
44
45 // Disable Nagle.
46 // The Nagle implementation on windows is governed by RFC 896.  The idea
47 // behind Nagle is to reduce small packets on the network.  When Nagle is
48 // enabled, if a partial packet has been sent, the TCP stack will disallow
49 // further *partial* packets until an ACK has been received from the other
50 // side.  Good applications should always strive to send as much data as
51 // possible and avoid partial-packet sends.  However, in most real world
52 // applications, there are edge cases where this does not happen, and two
53 // partial packets may be sent back to back.  For a browser, it is NEVER
54 // a benefit to delay for an RTT before the second packet is sent.
55 //
56 // As a practical example in Chromium today, consider the case of a small
57 // POST.  I have verified this:
58 //     Client writes 649 bytes of header  (partial packet #1)
59 //     Client writes 50 bytes of POST data (partial packet #2)
60 // In the above example, with Nagle, a RTT delay is inserted between these
61 // two sends due to nagle.  RTTs can easily be 100ms or more.  The best
62 // fix is to make sure that for POSTing data, we write as much data as
63 // possible and minimize partial packets.  We will fix that.  But disabling
64 // Nagle also ensure we don't run into this delay in other edge cases.
65 // See also:
66 //    http://technet.microsoft.com/en-us/library/bb726981.aspx
67 bool DisableNagle(SOCKET socket, bool disable) {
68   BOOL val = disable ? TRUE : FALSE;
69   int rv = setsockopt(socket, IPPROTO_TCP, TCP_NODELAY,
70                       reinterpret_cast<const char*>(&val),
71                       sizeof(val));
72   DCHECK(!rv) << "Could not disable nagle";
73   return rv == 0;
74 }
75
76 // Enable TCP Keep-Alive to prevent NAT routers from timing out TCP
77 // connections. See http://crbug.com/27400 for details.
78 bool SetTCPKeepAlive(SOCKET socket, BOOL enable, int delay_secs) {
79   int delay = delay_secs * 1000;
80   struct tcp_keepalive keepalive_vals = {
81     enable ? 1 : 0,  // TCP keep-alive on.
82     delay,  // Delay seconds before sending first TCP keep-alive packet.
83     delay,  // Delay seconds between sending TCP keep-alive packets.
84   };
85   DWORD bytes_returned = 0xABAB;
86   int rv = WSAIoctl(socket, SIO_KEEPALIVE_VALS, &keepalive_vals,
87                     sizeof(keepalive_vals), NULL, 0,
88                     &bytes_returned, NULL, NULL);
89   DCHECK(!rv) << "Could not enable TCP Keep-Alive for socket: " << socket
90               << " [error: " << WSAGetLastError() << "].";
91
92   // Disregard any failure in disabling nagle or enabling TCP Keep-Alive.
93   return rv == 0;
94 }
95
96 int MapConnectError(int os_error) {
97   switch (os_error) {
98     // connect fails with WSAEACCES when Windows Firewall blocks the
99     // connection.
100     case WSAEACCES:
101       return ERR_NETWORK_ACCESS_DENIED;
102     case WSAETIMEDOUT:
103       return ERR_CONNECTION_TIMED_OUT;
104     default: {
105       int net_error = MapSystemError(os_error);
106       if (net_error == ERR_FAILED)
107         return ERR_CONNECTION_FAILED;  // More specific than ERR_FAILED.
108
109       // Give a more specific error when the user is offline.
110       if (net_error == ERR_ADDRESS_UNREACHABLE &&
111           NetworkChangeNotifier::IsOffline()) {
112         return ERR_INTERNET_DISCONNECTED;
113       }
114
115       return net_error;
116     }
117   }
118 }
119
120 }  // namespace
121
122 //-----------------------------------------------------------------------------
123
124 // This class encapsulates all the state that has to be preserved as long as
125 // there is a network IO operation in progress. If the owner TCPSocketWin is
126 // destroyed while an operation is in progress, the Core is detached and it
127 // lives until the operation completes and the OS doesn't reference any resource
128 // declared on this class anymore.
129 class TCPSocketWin::Core : public base::RefCounted<Core> {
130  public:
131   explicit Core(TCPSocketWin* socket);
132
133   // Start watching for the end of a read or write operation.
134   void WatchForRead();
135   void WatchForWrite();
136
137   // The TCPSocketWin is going away.
138   void Detach() { socket_ = NULL; }
139
140   // The separate OVERLAPPED variables for asynchronous operation.
141   // |read_overlapped_| is used for both Connect() and Read().
142   // |write_overlapped_| is only used for Write();
143   OVERLAPPED read_overlapped_;
144   OVERLAPPED write_overlapped_;
145
146   // The buffers used in Read() and Write().
147   scoped_refptr<IOBuffer> read_iobuffer_;
148   scoped_refptr<IOBuffer> write_iobuffer_;
149   int read_buffer_length_;
150   int write_buffer_length_;
151
152   bool non_blocking_reads_initialized_;
153
154  private:
155   friend class base::RefCounted<Core>;
156
157   class ReadDelegate : public base::win::ObjectWatcher::Delegate {
158    public:
159     explicit ReadDelegate(Core* core) : core_(core) {}
160     virtual ~ReadDelegate() {}
161
162     // base::ObjectWatcher::Delegate methods:
163     virtual void OnObjectSignaled(HANDLE object);
164
165    private:
166     Core* const core_;
167   };
168
169   class WriteDelegate : public base::win::ObjectWatcher::Delegate {
170    public:
171     explicit WriteDelegate(Core* core) : core_(core) {}
172     virtual ~WriteDelegate() {}
173
174     // base::ObjectWatcher::Delegate methods:
175     virtual void OnObjectSignaled(HANDLE object);
176
177    private:
178     Core* const core_;
179   };
180
181   ~Core();
182
183   // The socket that created this object.
184   TCPSocketWin* socket_;
185
186   // |reader_| handles the signals from |read_watcher_|.
187   ReadDelegate reader_;
188   // |writer_| handles the signals from |write_watcher_|.
189   WriteDelegate writer_;
190
191   // |read_watcher_| watches for events from Connect() and Read().
192   base::win::ObjectWatcher read_watcher_;
193   // |write_watcher_| watches for events from Write();
194   base::win::ObjectWatcher write_watcher_;
195
196   DISALLOW_COPY_AND_ASSIGN(Core);
197 };
198
199 TCPSocketWin::Core::Core(TCPSocketWin* socket)
200     : read_buffer_length_(0),
201       write_buffer_length_(0),
202       non_blocking_reads_initialized_(false),
203       socket_(socket),
204       reader_(this),
205       writer_(this) {
206   memset(&read_overlapped_, 0, sizeof(read_overlapped_));
207   memset(&write_overlapped_, 0, sizeof(write_overlapped_));
208
209   read_overlapped_.hEvent = WSACreateEvent();
210   write_overlapped_.hEvent = WSACreateEvent();
211 }
212
213 TCPSocketWin::Core::~Core() {
214   // Make sure the message loop is not watching this object anymore.
215   read_watcher_.StopWatching();
216   write_watcher_.StopWatching();
217
218   WSACloseEvent(read_overlapped_.hEvent);
219   memset(&read_overlapped_, 0xaf, sizeof(read_overlapped_));
220   WSACloseEvent(write_overlapped_.hEvent);
221   memset(&write_overlapped_, 0xaf, sizeof(write_overlapped_));
222 }
223
224 void TCPSocketWin::Core::WatchForRead() {
225   // We grab an extra reference because there is an IO operation in progress.
226   // Balanced in ReadDelegate::OnObjectSignaled().
227   AddRef();
228   read_watcher_.StartWatching(read_overlapped_.hEvent, &reader_);
229 }
230
231 void TCPSocketWin::Core::WatchForWrite() {
232   // We grab an extra reference because there is an IO operation in progress.
233   // Balanced in WriteDelegate::OnObjectSignaled().
234   AddRef();
235   write_watcher_.StartWatching(write_overlapped_.hEvent, &writer_);
236 }
237
238 void TCPSocketWin::Core::ReadDelegate::OnObjectSignaled(HANDLE object) {
239   DCHECK_EQ(object, core_->read_overlapped_.hEvent);
240   if (core_->socket_) {
241     if (core_->socket_->waiting_connect_)
242       core_->socket_->DidCompleteConnect();
243     else
244       core_->socket_->DidSignalRead();
245   }
246
247   core_->Release();
248 }
249
250 void TCPSocketWin::Core::WriteDelegate::OnObjectSignaled(
251     HANDLE object) {
252   DCHECK_EQ(object, core_->write_overlapped_.hEvent);
253   if (core_->socket_)
254     core_->socket_->DidCompleteWrite();
255
256   core_->Release();
257 }
258
259 //-----------------------------------------------------------------------------
260
261 TCPSocketWin::TCPSocketWin(net::NetLog* net_log,
262                            const net::NetLog::Source& source)
263     : socket_(INVALID_SOCKET),
264       accept_event_(WSA_INVALID_EVENT),
265       accept_socket_(NULL),
266       accept_address_(NULL),
267       waiting_connect_(false),
268       waiting_read_(false),
269       waiting_write_(false),
270       connect_os_error_(0),
271       logging_multiple_connect_attempts_(false),
272       net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)) {
273   net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE,
274                       source.ToEventParametersCallback());
275   EnsureWinsockInit();
276 }
277
278 TCPSocketWin::~TCPSocketWin() {
279   Close();
280   net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE);
281 }
282
283 int TCPSocketWin::Open(AddressFamily family) {
284   DCHECK(CalledOnValidThread());
285   DCHECK_EQ(socket_, INVALID_SOCKET);
286
287   socket_ = CreatePlatformSocket(ConvertAddressFamily(family), SOCK_STREAM,
288                                  IPPROTO_TCP);
289   if (socket_ == INVALID_SOCKET) {
290     PLOG(ERROR) << "CreatePlatformSocket() returned an error";
291     return MapSystemError(WSAGetLastError());
292   }
293
294   if (SetNonBlocking(socket_)) {
295     int result = MapSystemError(WSAGetLastError());
296     Close();
297     return result;
298   }
299
300   return OK;
301 }
302
303 int TCPSocketWin::AdoptConnectedSocket(SOCKET socket,
304                                        const IPEndPoint& peer_address) {
305   DCHECK(CalledOnValidThread());
306   DCHECK_EQ(socket_, INVALID_SOCKET);
307   DCHECK(!core_);
308
309   socket_ = socket;
310
311   if (SetNonBlocking(socket_)) {
312     int result = MapSystemError(WSAGetLastError());
313     Close();
314     return result;
315   }
316
317   core_ = new Core(this);
318   peer_address_.reset(new IPEndPoint(peer_address));
319
320   return OK;
321 }
322
323 int TCPSocketWin::Bind(const IPEndPoint& address) {
324   DCHECK(CalledOnValidThread());
325   DCHECK_NE(socket_, INVALID_SOCKET);
326
327   SockaddrStorage storage;
328   if (!address.ToSockAddr(storage.addr, &storage.addr_len))
329     return ERR_ADDRESS_INVALID;
330
331   int result = bind(socket_, storage.addr, storage.addr_len);
332   if (result < 0) {
333     PLOG(ERROR) << "bind() returned an error";
334     return MapSystemError(WSAGetLastError());
335   }
336
337   return OK;
338 }
339
340 int TCPSocketWin::Listen(int backlog) {
341   DCHECK(CalledOnValidThread());
342   DCHECK_GT(backlog, 0);
343   DCHECK_NE(socket_, INVALID_SOCKET);
344   DCHECK_EQ(accept_event_, WSA_INVALID_EVENT);
345
346   accept_event_ = WSACreateEvent();
347   if (accept_event_ == WSA_INVALID_EVENT) {
348     PLOG(ERROR) << "WSACreateEvent()";
349     return MapSystemError(WSAGetLastError());
350   }
351
352   int result = listen(socket_, backlog);
353   if (result < 0) {
354     PLOG(ERROR) << "listen() returned an error";
355     return MapSystemError(WSAGetLastError());
356   }
357
358   return OK;
359 }
360
361 int TCPSocketWin::Accept(scoped_ptr<TCPSocketWin>* socket,
362                          IPEndPoint* address,
363                          const CompletionCallback& callback) {
364   DCHECK(CalledOnValidThread());
365   DCHECK(socket);
366   DCHECK(address);
367   DCHECK(!callback.is_null());
368   DCHECK(accept_callback_.is_null());
369
370   net_log_.BeginEvent(NetLog::TYPE_TCP_ACCEPT);
371
372   int result = AcceptInternal(socket, address);
373
374   if (result == ERR_IO_PENDING) {
375     // Start watching.
376     WSAEventSelect(socket_, accept_event_, FD_ACCEPT);
377     accept_watcher_.StartWatching(accept_event_, this);
378
379     accept_socket_ = socket;
380     accept_address_ = address;
381     accept_callback_ = callback;
382   }
383
384   return result;
385 }
386
387 int TCPSocketWin::Connect(const IPEndPoint& address,
388                           const CompletionCallback& callback) {
389   DCHECK(CalledOnValidThread());
390   DCHECK_NE(socket_, INVALID_SOCKET);
391   DCHECK(!waiting_connect_);
392
393   // |peer_address_| and |core_| will be non-NULL if Connect() has been called.
394   // Unless Close() is called to reset the internal state, a second call to
395   // Connect() is not allowed.
396   // Please note that we enforce this even if the previous Connect() has
397   // completed and failed. Although it is allowed to connect the same |socket_|
398   // again after a connection attempt failed on Windows, it results in
399   // unspecified behavior according to POSIX. Therefore, we make it behave in
400   // the same way as TCPSocketLibevent.
401   DCHECK(!peer_address_ && !core_);
402
403   if (!logging_multiple_connect_attempts_)
404     LogConnectBegin(AddressList(address));
405
406   peer_address_.reset(new IPEndPoint(address));
407
408   int rv = DoConnect();
409   if (rv == ERR_IO_PENDING) {
410     // Synchronous operation not supported.
411     DCHECK(!callback.is_null());
412     read_callback_ = callback;
413     waiting_connect_ = true;
414   } else {
415     DoConnectComplete(rv);
416   }
417
418   return rv;
419 }
420
421 bool TCPSocketWin::IsConnected() const {
422   DCHECK(CalledOnValidThread());
423
424   if (socket_ == INVALID_SOCKET || waiting_connect_)
425     return false;
426
427   if (waiting_read_)
428     return true;
429
430   // Check if connection is alive.
431   char c;
432   int rv = recv(socket_, &c, 1, MSG_PEEK);
433   if (rv == 0)
434     return false;
435   if (rv == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK)
436     return false;
437
438   return true;
439 }
440
441 bool TCPSocketWin::IsConnectedAndIdle() const {
442   DCHECK(CalledOnValidThread());
443
444   if (socket_ == INVALID_SOCKET || waiting_connect_)
445     return false;
446
447   if (waiting_read_)
448     return true;
449
450   // Check if connection is alive and we haven't received any data
451   // unexpectedly.
452   char c;
453   int rv = recv(socket_, &c, 1, MSG_PEEK);
454   if (rv >= 0)
455     return false;
456   if (WSAGetLastError() != WSAEWOULDBLOCK)
457     return false;
458
459   return true;
460 }
461
462 int TCPSocketWin::Read(IOBuffer* buf,
463                        int buf_len,
464                        const CompletionCallback& callback) {
465   DCHECK(CalledOnValidThread());
466   DCHECK_NE(socket_, INVALID_SOCKET);
467   DCHECK(!waiting_read_);
468   DCHECK(read_callback_.is_null());
469   DCHECK(!core_->read_iobuffer_);
470
471   return DoRead(buf, buf_len, callback);
472 }
473
474 int TCPSocketWin::Write(IOBuffer* buf,
475                         int buf_len,
476                         const CompletionCallback& callback) {
477   DCHECK(CalledOnValidThread());
478   DCHECK_NE(socket_, INVALID_SOCKET);
479   DCHECK(!waiting_write_);
480   DCHECK(write_callback_.is_null());
481   DCHECK_GT(buf_len, 0);
482   DCHECK(!core_->write_iobuffer_);
483
484   base::StatsCounter writes("tcp.writes");
485   writes.Increment();
486
487   WSABUF write_buffer;
488   write_buffer.len = buf_len;
489   write_buffer.buf = buf->data();
490
491   // TODO(wtc): Remove the assertion after enough testing.
492   AssertEventNotSignaled(core_->write_overlapped_.hEvent);
493   DWORD num;
494   int rv = WSASend(socket_, &write_buffer, 1, &num, 0,
495                    &core_->write_overlapped_, NULL);
496   if (rv == 0) {
497     if (ResetEventIfSignaled(core_->write_overlapped_.hEvent)) {
498       rv = static_cast<int>(num);
499       if (rv > buf_len || rv < 0) {
500         // It seems that some winsock interceptors report that more was written
501         // than was available. Treat this as an error.  http://crbug.com/27870
502         LOG(ERROR) << "Detected broken LSP: Asked to write " << buf_len
503                    << " bytes, but " << rv << " bytes reported.";
504         return ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
505       }
506       base::StatsCounter write_bytes("tcp.write_bytes");
507       write_bytes.Add(rv);
508       net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, rv,
509                                     buf->data());
510       return rv;
511     }
512   } else {
513     int os_error = WSAGetLastError();
514     if (os_error != WSA_IO_PENDING) {
515       int net_error = MapSystemError(os_error);
516       net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
517                         CreateNetLogSocketErrorCallback(net_error, os_error));
518       return net_error;
519     }
520   }
521   waiting_write_ = true;
522   write_callback_ = callback;
523   core_->write_iobuffer_ = buf;
524   core_->write_buffer_length_ = buf_len;
525   core_->WatchForWrite();
526   return ERR_IO_PENDING;
527 }
528
529 int TCPSocketWin::GetLocalAddress(IPEndPoint* address) const {
530   DCHECK(CalledOnValidThread());
531   DCHECK(address);
532
533   SockaddrStorage storage;
534   if (getsockname(socket_, storage.addr, &storage.addr_len))
535     return MapSystemError(WSAGetLastError());
536   if (!address->FromSockAddr(storage.addr, storage.addr_len))
537     return ERR_ADDRESS_INVALID;
538
539   return OK;
540 }
541
542 int TCPSocketWin::GetPeerAddress(IPEndPoint* address) const {
543   DCHECK(CalledOnValidThread());
544   DCHECK(address);
545   if (!IsConnected())
546     return ERR_SOCKET_NOT_CONNECTED;
547   *address = *peer_address_;
548   return OK;
549 }
550
551 int TCPSocketWin::SetDefaultOptionsForServer() {
552   return SetExclusiveAddrUse();
553 }
554
555 void TCPSocketWin::SetDefaultOptionsForClient() {
556   // Increase the socket buffer sizes from the default sizes for WinXP.  In
557   // performance testing, there is substantial benefit by increasing from 8KB
558   // to 64KB.
559   // See also:
560   //    http://support.microsoft.com/kb/823764/EN-US
561   // On Vista, if we manually set these sizes, Vista turns off its receive
562   // window auto-tuning feature.
563   //    http://blogs.msdn.com/wndp/archive/2006/05/05/Winhec-blog-tcpip-2.aspx
564   // Since Vista's auto-tune is better than any static value we can could set,
565   // only change these on pre-vista machines.
566   if (base::win::GetVersion() < base::win::VERSION_VISTA) {
567     const int32 kSocketBufferSize = 64 * 1024;
568     SetSocketReceiveBufferSize(socket_, kSocketBufferSize);
569     SetSocketSendBufferSize(socket_, kSocketBufferSize);
570   }
571
572   DisableNagle(socket_, true);
573   SetTCPKeepAlive(socket_, true, kTCPKeepAliveSeconds);
574 }
575
576 int TCPSocketWin::SetExclusiveAddrUse() {
577   // On Windows, a bound end point can be hijacked by another process by
578   // setting SO_REUSEADDR. Therefore a Windows-only option SO_EXCLUSIVEADDRUSE
579   // was introduced in Windows NT 4.0 SP4. If the socket that is bound to the
580   // end point has SO_EXCLUSIVEADDRUSE enabled, it is not possible for another
581   // socket to forcibly bind to the end point until the end point is unbound.
582   // It is recommend that all server applications must use SO_EXCLUSIVEADDRUSE.
583   // MSDN: http://goo.gl/M6fjQ.
584   //
585   // Unlike on *nix, on Windows a TCP server socket can always bind to an end
586   // point in TIME_WAIT state without setting SO_REUSEADDR, therefore it is not
587   // needed here.
588   //
589   // SO_EXCLUSIVEADDRUSE will prevent a TCP client socket from binding to an end
590   // point in TIME_WAIT status. It does not have this effect for a TCP server
591   // socket.
592
593   BOOL true_value = 1;
594   int rv = setsockopt(socket_, SOL_SOCKET, SO_EXCLUSIVEADDRUSE,
595                       reinterpret_cast<const char*>(&true_value),
596                       sizeof(true_value));
597   if (rv < 0)
598     return MapSystemError(errno);
599   return OK;
600 }
601
602 bool TCPSocketWin::SetReceiveBufferSize(int32 size) {
603   DCHECK(CalledOnValidThread());
604   return SetSocketReceiveBufferSize(socket_, size);
605 }
606
607 bool TCPSocketWin::SetSendBufferSize(int32 size) {
608   DCHECK(CalledOnValidThread());
609   return SetSocketSendBufferSize(socket_, size);
610 }
611
612 bool TCPSocketWin::SetKeepAlive(bool enable, int delay) {
613   return SetTCPKeepAlive(socket_, enable, delay);
614 }
615
616 bool TCPSocketWin::SetNoDelay(bool no_delay) {
617   return DisableNagle(socket_, no_delay);
618 }
619
620 void TCPSocketWin::Close() {
621   DCHECK(CalledOnValidThread());
622
623   if (socket_ != INVALID_SOCKET) {
624     // Note: don't use CancelIo to cancel pending IO because it doesn't work
625     // when there is a Winsock layered service provider.
626
627     // In most socket implementations, closing a socket results in a graceful
628     // connection shutdown, but in Winsock we have to call shutdown explicitly.
629     // See the MSDN page "Graceful Shutdown, Linger Options, and Socket Closure"
630     // at http://msdn.microsoft.com/en-us/library/ms738547.aspx
631     shutdown(socket_, SD_SEND);
632
633     // This cancels any pending IO.
634     if (closesocket(socket_) < 0)
635       PLOG(ERROR) << "closesocket";
636     socket_ = INVALID_SOCKET;
637   }
638
639   if (accept_event_) {
640     WSACloseEvent(accept_event_);
641     accept_event_ = WSA_INVALID_EVENT;
642   }
643
644   if (!accept_callback_.is_null()) {
645     accept_watcher_.StopWatching();
646     accept_socket_ = NULL;
647     accept_address_ = NULL;
648     accept_callback_.Reset();
649   }
650
651   if (core_) {
652     if (waiting_connect_) {
653       // We closed the socket, so this notification will never come.
654       // From MSDN' WSAEventSelect documentation:
655       // "Closing a socket with closesocket also cancels the association and
656       // selection of network events specified in WSAEventSelect for the
657       // socket".
658       core_->Release();
659     }
660     core_->Detach();
661     core_ = NULL;
662   }
663
664   waiting_connect_ = false;
665   waiting_read_ = false;
666   waiting_write_ = false;
667
668   read_callback_.Reset();
669   write_callback_.Reset();
670   peer_address_.reset();
671   connect_os_error_ = 0;
672 }
673
674 bool TCPSocketWin::UsingTCPFastOpen() const {
675   // Not supported on windows.
676   return false;
677 }
678
679 void TCPSocketWin::StartLoggingMultipleConnectAttempts(
680     const AddressList& addresses) {
681   if (!logging_multiple_connect_attempts_) {
682     logging_multiple_connect_attempts_ = true;
683     LogConnectBegin(addresses);
684   } else {
685     NOTREACHED();
686   }
687 }
688
689 void TCPSocketWin::EndLoggingMultipleConnectAttempts(int net_error) {
690   if (logging_multiple_connect_attempts_) {
691     LogConnectEnd(net_error);
692     logging_multiple_connect_attempts_ = false;
693   } else {
694     NOTREACHED();
695   }
696 }
697
698 int TCPSocketWin::AcceptInternal(scoped_ptr<TCPSocketWin>* socket,
699                                  IPEndPoint* address) {
700   SockaddrStorage storage;
701   int new_socket = accept(socket_, storage.addr, &storage.addr_len);
702   if (new_socket < 0) {
703     int net_error = MapSystemError(WSAGetLastError());
704     if (net_error != ERR_IO_PENDING)
705       net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error);
706     return net_error;
707   }
708
709   IPEndPoint ip_end_point;
710   if (!ip_end_point.FromSockAddr(storage.addr, storage.addr_len)) {
711     NOTREACHED();
712     if (closesocket(new_socket) < 0)
713       PLOG(ERROR) << "closesocket";
714     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, ERR_FAILED);
715     return ERR_FAILED;
716   }
717   scoped_ptr<TCPSocketWin> tcp_socket(new TCPSocketWin(
718       net_log_.net_log(), net_log_.source()));
719   int adopt_result = tcp_socket->AdoptConnectedSocket(new_socket, ip_end_point);
720   if (adopt_result != OK) {
721     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, adopt_result);
722     return adopt_result;
723   }
724   *socket = tcp_socket.Pass();
725   *address = ip_end_point;
726   net_log_.EndEvent(NetLog::TYPE_TCP_ACCEPT,
727                     CreateNetLogIPEndPointCallback(&ip_end_point));
728   return OK;
729 }
730
731 void TCPSocketWin::OnObjectSignaled(HANDLE object) {
732   WSANETWORKEVENTS ev;
733   if (WSAEnumNetworkEvents(socket_, accept_event_, &ev) == SOCKET_ERROR) {
734     PLOG(ERROR) << "WSAEnumNetworkEvents()";
735     return;
736   }
737
738   if (ev.lNetworkEvents & FD_ACCEPT) {
739     int result = AcceptInternal(accept_socket_, accept_address_);
740     if (result != ERR_IO_PENDING) {
741       accept_socket_ = NULL;
742       accept_address_ = NULL;
743       base::ResetAndReturn(&accept_callback_).Run(result);
744     }
745   } else {
746     // This happens when a client opens a connection and closes it before we
747     // have a chance to accept it.
748     DCHECK(ev.lNetworkEvents == 0);
749
750     // Start watching the next FD_ACCEPT event.
751     WSAEventSelect(socket_, accept_event_, FD_ACCEPT);
752     accept_watcher_.StartWatching(accept_event_, this);
753   }
754 }
755
756 int TCPSocketWin::DoConnect() {
757   DCHECK_EQ(connect_os_error_, 0);
758   DCHECK(!core_);
759
760   net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
761                       CreateNetLogIPEndPointCallback(peer_address_.get()));
762
763   core_ = new Core(this);
764   // WSAEventSelect sets the socket to non-blocking mode as a side effect.
765   // Our connect() and recv() calls require that the socket be non-blocking.
766   WSAEventSelect(socket_, core_->read_overlapped_.hEvent, FD_CONNECT);
767
768   SockaddrStorage storage;
769   if (!peer_address_->ToSockAddr(storage.addr, &storage.addr_len))
770     return ERR_INVALID_ARGUMENT;
771   if (!connect(socket_, storage.addr, storage.addr_len)) {
772     // Connected without waiting!
773     //
774     // The MSDN page for connect says:
775     //   With a nonblocking socket, the connection attempt cannot be completed
776     //   immediately. In this case, connect will return SOCKET_ERROR, and
777     //   WSAGetLastError will return WSAEWOULDBLOCK.
778     // which implies that for a nonblocking socket, connect never returns 0.
779     // It's not documented whether the event object will be signaled or not
780     // if connect does return 0.  So the code below is essentially dead code
781     // and we don't know if it's correct.
782     NOTREACHED();
783
784     if (ResetEventIfSignaled(core_->read_overlapped_.hEvent))
785       return OK;
786   } else {
787     int os_error = WSAGetLastError();
788     if (os_error != WSAEWOULDBLOCK) {
789       LOG(ERROR) << "connect failed: " << os_error;
790       connect_os_error_ = os_error;
791       int rv = MapConnectError(os_error);
792       CHECK_NE(ERR_IO_PENDING, rv);
793       return rv;
794     }
795   }
796
797   core_->WatchForRead();
798   return ERR_IO_PENDING;
799 }
800
801 void TCPSocketWin::DoConnectComplete(int result) {
802   // Log the end of this attempt (and any OS error it threw).
803   int os_error = connect_os_error_;
804   connect_os_error_ = 0;
805   if (result != OK) {
806     net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
807                       NetLog::IntegerCallback("os_error", os_error));
808   } else {
809     net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT);
810   }
811
812   if (!logging_multiple_connect_attempts_)
813     LogConnectEnd(result);
814 }
815
816 void TCPSocketWin::LogConnectBegin(const AddressList& addresses) {
817   base::StatsCounter connects("tcp.connect");
818   connects.Increment();
819
820   net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT,
821                       addresses.CreateNetLogCallback());
822 }
823
824 void TCPSocketWin::LogConnectEnd(int net_error) {
825   if (net_error == OK)
826     UpdateConnectionTypeHistograms(CONNECTION_ANY);
827
828   if (net_error != OK) {
829     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, net_error);
830     return;
831   }
832
833   struct sockaddr_storage source_address;
834   socklen_t addrlen = sizeof(source_address);
835   int rv = getsockname(
836       socket_, reinterpret_cast<struct sockaddr*>(&source_address), &addrlen);
837   if (rv != 0) {
838     LOG(ERROR) << "getsockname() [rv: " << rv
839                << "] error: " << WSAGetLastError();
840     NOTREACHED();
841     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, rv);
842     return;
843   }
844
845   net_log_.EndEvent(
846       NetLog::TYPE_TCP_CONNECT,
847       CreateNetLogSourceAddressCallback(
848           reinterpret_cast<const struct sockaddr*>(&source_address),
849           sizeof(source_address)));
850 }
851
852 int TCPSocketWin::DoRead(IOBuffer* buf, int buf_len,
853                          const CompletionCallback& callback) {
854   if (!core_->non_blocking_reads_initialized_) {
855     WSAEventSelect(socket_, core_->read_overlapped_.hEvent,
856                    FD_READ | FD_CLOSE);
857     core_->non_blocking_reads_initialized_ = true;
858   }
859   int rv = recv(socket_, buf->data(), buf_len, 0);
860   if (rv == SOCKET_ERROR) {
861     int os_error = WSAGetLastError();
862     if (os_error != WSAEWOULDBLOCK) {
863       int net_error = MapSystemError(os_error);
864       net_log_.AddEvent(
865           NetLog::TYPE_SOCKET_READ_ERROR,
866           CreateNetLogSocketErrorCallback(net_error, os_error));
867       return net_error;
868     }
869   } else {
870     base::StatsCounter read_bytes("tcp.read_bytes");
871     if (rv > 0)
872       read_bytes.Add(rv);
873     net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, rv,
874                                   buf->data());
875     return rv;
876   }
877
878   waiting_read_ = true;
879   read_callback_ = callback;
880   core_->read_iobuffer_ = buf;
881   core_->read_buffer_length_ = buf_len;
882   core_->WatchForRead();
883   return ERR_IO_PENDING;
884 }
885
886 void TCPSocketWin::DidCompleteConnect() {
887   DCHECK(waiting_connect_);
888   DCHECK(!read_callback_.is_null());
889   int result;
890
891   WSANETWORKEVENTS events;
892   int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
893                                 &events);
894   int os_error = 0;
895   if (rv == SOCKET_ERROR) {
896     NOTREACHED();
897     os_error = WSAGetLastError();
898     result = MapSystemError(os_error);
899   } else if (events.lNetworkEvents & FD_CONNECT) {
900     os_error = events.iErrorCode[FD_CONNECT_BIT];
901     result = MapConnectError(os_error);
902   } else {
903     NOTREACHED();
904     result = ERR_UNEXPECTED;
905   }
906
907   connect_os_error_ = os_error;
908   DoConnectComplete(result);
909   waiting_connect_ = false;
910
911   DCHECK_NE(result, ERR_IO_PENDING);
912   base::ResetAndReturn(&read_callback_).Run(result);
913 }
914
915 void TCPSocketWin::DidCompleteWrite() {
916   DCHECK(waiting_write_);
917   DCHECK(!write_callback_.is_null());
918
919   DWORD num_bytes, flags;
920   BOOL ok = WSAGetOverlappedResult(socket_, &core_->write_overlapped_,
921                                    &num_bytes, FALSE, &flags);
922   WSAResetEvent(core_->write_overlapped_.hEvent);
923   waiting_write_ = false;
924   int rv;
925   if (!ok) {
926     int os_error = WSAGetLastError();
927     rv = MapSystemError(os_error);
928     net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
929                       CreateNetLogSocketErrorCallback(rv, os_error));
930   } else {
931     rv = static_cast<int>(num_bytes);
932     if (rv > core_->write_buffer_length_ || rv < 0) {
933       // It seems that some winsock interceptors report that more was written
934       // than was available. Treat this as an error.  http://crbug.com/27870
935       LOG(ERROR) << "Detected broken LSP: Asked to write "
936                  << core_->write_buffer_length_ << " bytes, but " << rv
937                  << " bytes reported.";
938       rv = ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
939     } else {
940       base::StatsCounter write_bytes("tcp.write_bytes");
941       write_bytes.Add(num_bytes);
942       net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, num_bytes,
943                                     core_->write_iobuffer_->data());
944     }
945   }
946
947   core_->write_iobuffer_ = NULL;
948
949   DCHECK_NE(rv, ERR_IO_PENDING);
950   base::ResetAndReturn(&write_callback_).Run(rv);
951 }
952
953 void TCPSocketWin::DidSignalRead() {
954   DCHECK(waiting_read_);
955   DCHECK(!read_callback_.is_null());
956
957   int os_error = 0;
958   WSANETWORKEVENTS network_events;
959   int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
960                                 &network_events);
961   if (rv == SOCKET_ERROR) {
962     os_error = WSAGetLastError();
963     rv = MapSystemError(os_error);
964   } else if (network_events.lNetworkEvents) {
965     DCHECK_EQ(network_events.lNetworkEvents & ~(FD_READ | FD_CLOSE), 0);
966     // If network_events.lNetworkEvents is FD_CLOSE and
967     // network_events.iErrorCode[FD_CLOSE_BIT] is 0, it is a graceful
968     // connection closure. It is tempting to directly set rv to 0 in
969     // this case, but the MSDN pages for WSAEventSelect and
970     // WSAAsyncSelect recommend we still call DoRead():
971     //   FD_CLOSE should only be posted after all data is read from a
972     //   socket, but an application should check for remaining data upon
973     //   receipt of FD_CLOSE to avoid any possibility of losing data.
974     //
975     // If network_events.iErrorCode[FD_READ_BIT] or
976     // network_events.iErrorCode[FD_CLOSE_BIT] is nonzero, still call
977     // DoRead() because recv() reports a more accurate error code
978     // (WSAECONNRESET vs. WSAECONNABORTED) when the connection was
979     // reset.
980     rv = DoRead(core_->read_iobuffer_, core_->read_buffer_length_,
981                 read_callback_);
982     if (rv == ERR_IO_PENDING)
983       return;
984   } else {
985     // This may happen because Read() may succeed synchronously and
986     // consume all the received data without resetting the event object.
987     core_->WatchForRead();
988     return;
989   }
990
991   waiting_read_ = false;
992   core_->read_iobuffer_ = NULL;
993   core_->read_buffer_length_ = 0;
994
995   DCHECK_NE(rv, ERR_IO_PENDING);
996   base::ResetAndReturn(&read_callback_).Run(rv);
997 }
998
999 }  // namespace net
1000