Objects/fileobject.c

   1 /* File object implementation */
   2
   3 #define PY_SSIZE_T_CLEAN
   4 #include "Python.h"
   5 #include "structmember.h"
   6
   7 #ifdef HAVE_SYS_TYPES_H
   8 #include <sys/types.h>
   9 #endif /* HAVE_SYS_TYPES_H */
  10
  11 #ifdef MS_WINDOWS
  12 #define fileno _fileno
  13 /* can simulate truncate with Win32 API functions; see file_truncate */
  14 #define HAVE_FTRUNCATE
  15 #define WIN32_LEAN_AND_MEAN
  16 #include <windows.h>
  17 #endif
  18
  19 #if defined(PYOS_OS2) && defined(PYCC_GCC)
  20 #include <io.h>
  21 #endif
  22
  23 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  24
  25 #ifdef HAVE_ERRNO_H
  26 #include <errno.h>
  27 #endif
  28
  29 #ifdef HAVE_GETC_UNLOCKED
  30 #define GETC(f) getc_unlocked(f)
  31 #define FLOCKFILE(f) flockfile(f)
  32 #define FUNLOCKFILE(f) funlockfile(f)
  33 #else
  34 #define GETC(f) getc(f)
  35 #define FLOCKFILE(f)
  36 #define FUNLOCKFILE(f)
  37 #endif
  38
  39 /* Bits in f_newlinetypes */
  40 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  41 #define NEWLINE_CR 1            /* \r newline seen */
  42 #define NEWLINE_LF 2            /* \n newline seen */
  43 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  44
  45 /*
  46  * These macros release the GIL while preventing the f_close() function being
  47  * called in the interval between them.  For that purpose, a running total of
  48  * the number of currently running unlocked code sections is kept in
  49  * the unlocked_count field of the PyFileObject. The close() method raises
  50  * an IOError if that field is non-zero.  See issue #815646, #595601.
  51  */
  52
  53 #define FILE_BEGIN_ALLOW_THREADS(fobj) \
  54 { \
  55     fobj->unlocked_count++; \
  56     Py_BEGIN_ALLOW_THREADS
  57
  58 #define FILE_END_ALLOW_THREADS(fobj) \
  59     Py_END_ALLOW_THREADS \
  60     fobj->unlocked_count--; \
  61     assert(fobj->unlocked_count >= 0); \
  62 }
  63
  64 #define FILE_ABORT_ALLOW_THREADS(fobj) \
  65     Py_BLOCK_THREADS \
  66     fobj->unlocked_count--; \
  67     assert(fobj->unlocked_count >= 0);
  68
  69 #ifdef __cplusplus
  70 extern "C" {
  71 #endif
  72
  73 FILE *
  74 PyFile_AsFile(PyObject *f)
  75 {
  76     if (f == NULL || !PyFile_Check(f))
  77         return NULL;
  78     else
  79         return ((PyFileObject *)f)->f_fp;
  80 }
  81
  82 void PyFile_IncUseCount(PyFileObject *fobj)
  83 {
  84     fobj->unlocked_count++;
  85 }
  86
  87 void PyFile_DecUseCount(PyFileObject *fobj)
  88 {
  89     fobj->unlocked_count--;
  90     assert(fobj->unlocked_count >= 0);
  91 }
  92
  93 PyObject *
  94 PyFile_Name(PyObject *f)
  95 {
  96     if (f == NULL || !PyFile_Check(f))
  97         return NULL;
  98     else
  99         return ((PyFileObject *)f)->f_name;
 100 }
 101
 102 /* This is a safe wrapper around PyObject_Print to print to the FILE
 103    of a PyFileObject. PyObject_Print releases the GIL but knows nothing
 104    about PyFileObject. */
 105 static int
 106 file_PyObject_Print(PyObject *op, PyFileObject *f, int flags)
 107 {
 108     int result;
 109     PyFile_IncUseCount(f);
 110     result = PyObject_Print(op, f->f_fp, flags);
 111     PyFile_DecUseCount(f);
 112     return result;
 113 }
 114
 115 /* On Unix, fopen will succeed for directories.
 116    In Python, there should be no file objects referring to
 117    directories, so we need a check.  */
 118
 119 static PyFileObject*
 120 dircheck(PyFileObject* f)
 121 {
 122 #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
 123     struct stat buf;
 124     if (f->f_fp == NULL)
 125         return f;
 126     if (fstat(fileno(f->f_fp), &buf) == 0 &&
 127         S_ISDIR(buf.st_mode)) {
 128         char *msg = strerror(EISDIR);
 129         PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(isO)",
 130                                               EISDIR, msg, f->f_name);
 131         PyErr_SetObject(PyExc_IOError, exc);
 132         Py_XDECREF(exc);
 133         return NULL;
 134     }
 135 #endif
 136     return f;
 137 }
 138
 139
 140 static PyObject *
 141 fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
 142                  int (*close)(FILE *))
 143 {
 144     assert(name != NULL);
 145     assert(f != NULL);
 146     assert(PyFile_Check(f));
 147     assert(f->f_fp == NULL);
 148
 149     Py_DECREF(f->f_name);
 150     Py_DECREF(f->f_mode);
 151     Py_DECREF(f->f_encoding);
 152     Py_DECREF(f->f_errors);
 153
 154     Py_INCREF(name);
 155     f->f_name = name;
 156
 157     f->f_mode = PyString_FromString(mode);
 158
 159     f->f_close = close;
 160     f->f_softspace = 0;
 161     f->f_binary = strchr(mode,'b') != NULL;
 162     f->f_buf = NULL;
 163     f->f_univ_newline = (strchr(mode, 'U') != NULL);
 164     f->f_newlinetypes = NEWLINE_UNKNOWN;
 165     f->f_skipnextlf = 0;
 166     Py_INCREF(Py_None);
 167     f->f_encoding = Py_None;
 168     Py_INCREF(Py_None);
 169     f->f_errors = Py_None;
 170     f->readable = f->writable = 0;
 171     if (strchr(mode, 'r') != NULL || f->f_univ_newline)
 172         f->readable = 1;
 173     if (strchr(mode, 'w') != NULL || strchr(mode, 'a') != NULL)
 174         f->writable = 1;
 175     if (strchr(mode, '+') != NULL)
 176         f->readable = f->writable = 1;
 177
 178     if (f->f_mode == NULL)
 179         return NULL;
 180     f->f_fp = fp;
 181     f = dircheck(f);
 182     return (PyObject *) f;
 183 }
 184
 185 #if defined _MSC_VER && _MSC_VER >= 1400 && defined(__STDC_SECURE_LIB__)
 186 #define Py_VERIFY_WINNT
 187 /* The CRT on windows compiled with Visual Studio 2005 and higher may
 188  * assert if given invalid mode strings.  This is all fine and well
 189  * in static languages like C where the mode string is typcially hard
 190  * coded.  But in Python, were we pass in the mode string from the user,
 191  * we need to verify it first manually
 192  */
 193 static int _PyVerify_Mode_WINNT(const char *mode)
 194 {
 195     /* See if mode string is valid on Windows to avoid hard assertions */
 196     /* remove leading spacese */
 197     int singles = 0;
 198     int pairs = 0;
 199     int encoding = 0;
 200     const char *s, *c;
 201
 202     while(*mode == ' ') /* strip initial spaces */
 203         ++mode;
 204     if (!strchr("rwa", *mode)) /* must start with one of these */
 205         return 0;
 206     while (*++mode) {
 207         if (*mode == ' ' || *mode == 'N') /* ignore spaces and N */
 208             continue;
 209         s = "+TD"; /* each of this can appear only once */
 210         c = strchr(s, *mode);
 211         if (c) {
 212             ptrdiff_t idx = s-c;
 213             if (singles & (1<<idx))
 214                 return 0;
 215             singles |= (1<<idx);
 216             continue;
 217         }
 218         s = "btcnSR"; /* only one of each letter in the pairs allowed */
 219         c = strchr(s, *mode);
 220         if (c) {
 221             ptrdiff_t idx = (s-c)/2;
 222             if (pairs & (1<<idx))
 223                 return 0;
 224             pairs |= (1<<idx);
 225             continue;
 226         }
 227         if (*mode == ',') {
 228             encoding = 1;
 229             break;
 230         }
 231         return 0; /* found an invalid char */
 232     }
 233
 234     if (encoding) {
 235         char *e[] = {"UTF-8", "UTF-16LE", "UNICODE"};
 236         while (*mode == ' ')
 237             ++mode;
 238         /* find 'ccs =' */
 239         if (strncmp(mode, "ccs", 3))
 240             return 0;
 241         mode += 3;
 242         while (*mode == ' ')
 243             ++mode;
 244         if (*mode != '=')
 245             return 0;
 246         while (*mode == ' ')
 247             ++mode;
 248         for(encoding = 0; encoding<_countof(e); ++encoding) {
 249             size_t l = strlen(e[encoding]);
 250             if (!strncmp(mode, e[encoding], l)) {
 251                 mode += l; /* found a valid encoding */
 252                 break;
 253             }
 254         }
 255         if (encoding == _countof(e))
 256             return 0;
 257     }
 258     /* skip trailing spaces */
 259     while (*mode == ' ')
 260         ++mode;
 261
 262     return *mode == '\0'; /* must be at the end of the string */
 263 }
 264 #endif
 265
 266 /* check for known incorrect mode strings - problem is, platforms are
 267    free to accept any mode characters they like and are supposed to
 268    ignore stuff they don't understand... write or append mode with
 269    universal newline support is expressly forbidden by PEP 278.
 270    Additionally, remove the 'U' from the mode string as platforms
 271    won't know what it is. Non-zero return signals an exception */
 272 int
 273 _PyFile_SanitizeMode(char *mode)
 274 {
 275     char *upos;
 276     size_t len = strlen(mode);
 277
 278     if (!len) {
 279         PyErr_SetString(PyExc_ValueError, "empty mode string");
 280         return -1;
 281     }
 282
 283     upos = strchr(mode, 'U');
 284     if (upos) {
 285         memmove(upos, upos+1, len-(upos-mode)); /* incl null char */
 286
 287         if (mode[0] == 'w' || mode[0] == 'a') {
 288             PyErr_Format(PyExc_ValueError, "universal newline "
 289                          "mode can only be used with modes "
 290                          "starting with 'r'");
 291             return -1;
 292         }
 293
 294         if (mode[0] != 'r') {
 295             memmove(mode+1, mode, strlen(mode)+1);
 296             mode[0] = 'r';
 297         }
 298
 299         if (!strchr(mode, 'b')) {
 300             memmove(mode+2, mode+1, strlen(mode));
 301             mode[1] = 'b';
 302         }
 303     } else if (mode[0] != 'r' && mode[0] != 'w' && mode[0] != 'a') {
 304         PyErr_Format(PyExc_ValueError, "mode string must begin with "
 305                     "one of 'r', 'w', 'a' or 'U', not '%.200s'", mode);
 306         return -1;
 307     }
 308 #ifdef Py_VERIFY_WINNT
 309     /* additional checks on NT with visual studio 2005 and higher */
 310     if (!_PyVerify_Mode_WINNT(mode)) {
 311         PyErr_Format(PyExc_ValueError, "Invalid mode ('%.50s')", mode);
 312         return -1;
 313     }
 314 #endif
 315     return 0;
 316 }
 317
 318 static PyObject *
 319 open_the_file(PyFileObject *f, char *name, char *mode)
 320 {
 321     char *newmode;
 322     assert(f != NULL);
 323     assert(PyFile_Check(f));
 324 #ifdef MS_WINDOWS
 325     /* windows ignores the passed name in order to support Unicode */
 326     assert(f->f_name != NULL);
 327 #else
 328     assert(name != NULL);
 329 #endif
 330     assert(mode != NULL);
 331     assert(f->f_fp == NULL);
 332
 333     /* probably need to replace 'U' by 'rb' */
 334     newmode = PyMem_MALLOC(strlen(mode) + 3);
 335     if (!newmode) {
 336         PyErr_NoMemory();
 337         return NULL;
 338     }
 339     strcpy(newmode, mode);
 340
 341     if (_PyFile_SanitizeMode(newmode)) {
 342         f = NULL;
 343         goto cleanup;
 344     }
 345
 346     /* rexec.py can't stop a user from getting the file() constructor --
 347        all they have to do is get *any* file object f, and then do
 348        type(f).  Here we prevent them from doing damage with it. */
 349     if (PyEval_GetRestricted()) {
 350         PyErr_SetString(PyExc_IOError,
 351         "file() constructor not accessible in restricted mode");
 352         f = NULL;
 353         goto cleanup;
 354     }
 355     errno = 0;
 356
 357 #ifdef MS_WINDOWS
 358     if (PyUnicode_Check(f->f_name)) {
 359         PyObject *wmode;
 360         wmode = PyUnicode_DecodeASCII(newmode, strlen(newmode), NULL);
 361         if (f->f_name && wmode) {
 362             FILE_BEGIN_ALLOW_THREADS(f)
 363             /* PyUnicode_AS_UNICODE OK without thread
 364                lock as it is a simple dereference. */
 365             f->f_fp = _wfopen(PyUnicode_AS_UNICODE(f->f_name),
 366                               PyUnicode_AS_UNICODE(wmode));
 367             FILE_END_ALLOW_THREADS(f)
 368         }
 369         Py_XDECREF(wmode);
 370     }
 371 #endif
 372     if (NULL == f->f_fp && NULL != name) {
 373         FILE_BEGIN_ALLOW_THREADS(f)
 374         f->f_fp = fopen(name, newmode);
 375         FILE_END_ALLOW_THREADS(f)
 376     }
 377
 378     if (f->f_fp == NULL) {
 379 #if defined  _MSC_VER && (_MSC_VER < 1400 || !defined(__STDC_SECURE_LIB__))
 380         /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
 381          * across all Windows flavors.  When it sets EINVAL varies
 382          * across Windows flavors, the exact conditions aren't
 383          * documented, and the answer lies in the OS's implementation
 384          * of Win32's CreateFile function (whose source is secret).
 385          * Seems the best we can do is map EINVAL to ENOENT.
 386          * Starting with Visual Studio .NET 2005, EINVAL is correctly
 387          * set by our CRT error handler (set in exceptions.c.)
 388          */
 389         if (errno == 0)         /* bad mode string */
 390             errno = EINVAL;
 391         else if (errno == EINVAL) /* unknown, but not a mode string */
 392             errno = ENOENT;
 393 #endif
 394         /* EINVAL is returned when an invalid filename or
 395          * an invalid mode is supplied. */
 396         if (errno == EINVAL) {
 397             PyObject *v;
 398             char message[100];
 399             PyOS_snprintf(message, 100,
 400                 "invalid mode ('%.50s') or filename", mode);
 401             v = Py_BuildValue("(isO)", errno, message, f->f_name);
 402             if (v != NULL) {
 403                 PyErr_SetObject(PyExc_IOError, v);
 404                 Py_DECREF(v);
 405             }
 406         }
 407         else
 408             PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, f->f_name);
 409         f = NULL;
 410     }
 411     if (f != NULL)
 412         f = dircheck(f);
 413
 414 cleanup:
 415     PyMem_FREE(newmode);
 416
 417     return (PyObject *)f;
 418 }
 419
 420 static PyObject *
 421 close_the_file(PyFileObject *f)
 422 {
 423     int sts = 0;
 424     int (*local_close)(FILE *);
 425     FILE *local_fp = f->f_fp;
 426     char *local_setbuf = f->f_setbuf;
 427     if (local_fp != NULL) {
 428         local_close = f->f_close;
 429         if (local_close != NULL && f->unlocked_count > 0) {
 430             if (f->ob_refcnt > 0) {
 431                 PyErr_SetString(PyExc_IOError,
 432                     "close() called during concurrent "
 433                     "operation on the same file object.");
 434             } else {
 435                 /* This should not happen unless someone is
 436                  * carelessly playing with the PyFileObject
 437                  * struct fields and/or its associated FILE
 438                  * pointer. */
 439                 PyErr_SetString(PyExc_SystemError,
 440                     "PyFileObject locking error in "
 441                     "destructor (refcnt <= 0 at close).");
 442             }
 443             return NULL;
 444         }
 445         /* NULL out the FILE pointer before releasing the GIL, because
 446          * it will not be valid anymore after the close() function is
 447          * called. */
 448         f->f_fp = NULL;
 449         if (local_close != NULL) {
 450             /* Issue #9295: must temporarily reset f_setbuf so that another
 451                thread doesn't free it when running file_close() concurrently.
 452                Otherwise this close() will crash when flushing the buffer. */
 453             f->f_setbuf = NULL;
 454             Py_BEGIN_ALLOW_THREADS
 455             errno = 0;
 456             sts = (*local_close)(local_fp);
 457             Py_END_ALLOW_THREADS
 458             f->f_setbuf = local_setbuf;
 459             if (sts == EOF)
 460                 return PyErr_SetFromErrno(PyExc_IOError);
 461             if (sts != 0)
 462                 return PyInt_FromLong((long)sts);
 463         }
 464     }
 465     Py_RETURN_NONE;
 466 }
 467
 468 PyObject *
 469 PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
 470 {
 471     PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type,
 472                                                          NULL, NULL);
 473     if (f != NULL) {
 474         PyObject *o_name = PyString_FromString(name);
 475         if (o_name == NULL)
 476             return NULL;
 477         if (fill_file_fields(f, fp, o_name, mode, close) == NULL) {
 478             Py_DECREF(f);
 479             f = NULL;
 480         }
 481         Py_DECREF(o_name);
 482     }
 483     return (PyObject *) f;
 484 }
 485
 486 PyObject *
 487 PyFile_FromString(char *name, char *mode)
 488 {
 489     extern int fclose(FILE *);
 490     PyFileObject *f;
 491
 492     f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
 493     if (f != NULL) {
 494         if (open_the_file(f, name, mode) == NULL) {
 495             Py_DECREF(f);
 496             f = NULL;
 497         }
 498     }
 499     return (PyObject *)f;
 500 }
 501
 502 void
 503 PyFile_SetBufSize(PyObject *f, int bufsize)
 504 {
 505     PyFileObject *file = (PyFileObject *)f;
 506     if (bufsize >= 0) {
 507         int type;
 508         switch (bufsize) {
 509         case 0:
 510             type = _IONBF;
 511             break;
 512 #ifdef HAVE_SETVBUF
 513         case 1:
 514             type = _IOLBF;
 515             bufsize = BUFSIZ;
 516             break;
 517 #endif
 518         default:
 519             type = _IOFBF;
 520 #ifndef HAVE_SETVBUF
 521             bufsize = BUFSIZ;
 522 #endif
 523             break;
 524         }
 525         fflush(file->f_fp);
 526         if (type == _IONBF) {
 527             PyMem_Free(file->f_setbuf);
 528             file->f_setbuf = NULL;
 529         } else {
 530             file->f_setbuf = (char *)PyMem_Realloc(file->f_setbuf,
 531                                                     bufsize);
 532         }
 533 #ifdef HAVE_SETVBUF
 534         setvbuf(file->f_fp, file->f_setbuf, type, bufsize);
 535 #else /* !HAVE_SETVBUF */
 536         setbuf(file->f_fp, file->f_setbuf);
 537 #endif /* !HAVE_SETVBUF */
 538     }
 539 }
 540
 541 /* Set the encoding used to output Unicode strings.
 542    Return 1 on success, 0 on failure. */
 543
 544 int
 545 PyFile_SetEncoding(PyObject *f, const char *enc)
 546 {
 547     return PyFile_SetEncodingAndErrors(f, enc, NULL);
 548 }
 549
 550 int
 551 PyFile_SetEncodingAndErrors(PyObject *f, const char *enc, char* errors)
 552 {
 553     PyFileObject *file = (PyFileObject*)f;
 554     PyObject *str, *oerrors;
 555
 556     assert(PyFile_Check(f));
 557     str = PyString_FromString(enc);
 558     if (!str)
 559         return 0;
 560     if (errors) {
 561         oerrors = PyString_FromString(errors);
 562         if (!oerrors) {
 563             Py_DECREF(str);
 564             return 0;
 565         }
 566     } else {
 567         oerrors = Py_None;
 568         Py_INCREF(Py_None);
 569     }
 570     Py_DECREF(file->f_encoding);
 571     file->f_encoding = str;
 572     Py_DECREF(file->f_errors);
 573     file->f_errors = oerrors;
 574     return 1;
 575 }
 576
 577 static PyObject *
 578 err_closed(void)
 579 {
 580     PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
 581     return NULL;
 582 }
 583
 584 static PyObject *
 585 err_mode(char *action)
 586 {
 587     PyErr_Format(PyExc_IOError, "File not open for %s", action);
 588     return NULL;
 589 }
 590
 591 /* Refuse regular file I/O if there's data in the iteration-buffer.
 592  * Mixing them would cause data to arrive out of order, as the read*
 593  * methods don't use the iteration buffer. */
 594 static PyObject *
 595 err_iterbuffered(void)
 596 {
 597     PyErr_SetString(PyExc_ValueError,
 598         "Mixing iteration and read methods would lose data");
 599     return NULL;
 600 }
 601
 602 static void drop_readahead(PyFileObject *);
 603
 604 /* Methods */
 605
 606 static void
 607 file_dealloc(PyFileObject *f)
 608 {
 609     PyObject *ret;
 610     if (f->weakreflist != NULL)
 611         PyObject_ClearWeakRefs((PyObject *) f);
 612     ret = close_the_file(f);
 613     if (!ret) {
 614         PySys_WriteStderr("close failed in file object destructor:\n");
 615         PyErr_Print();
 616     }
 617     else {
 618         Py_DECREF(ret);
 619     }
 620     PyMem_Free(f->f_setbuf);
 621     Py_XDECREF(f->f_name);
 622     Py_XDECREF(f->f_mode);
 623     Py_XDECREF(f->f_encoding);
 624     Py_XDECREF(f->f_errors);
 625     drop_readahead(f);
 626     Py_TYPE(f)->tp_free((PyObject *)f);
 627 }
 628
 629 static PyObject *
 630 file_repr(PyFileObject *f)
 631 {
 632     if (PyUnicode_Check(f->f_name)) {
 633 #ifdef Py_USING_UNICODE
 634         PyObject *ret = NULL;
 635         PyObject *name = PyUnicode_AsUnicodeEscapeString(f->f_name);
 636         const char *name_str = name ? PyString_AsString(name) : "?";
 637         ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
 638                            f->f_fp == NULL ? "closed" : "open",
 639                            name_str,
 640                            PyString_AsString(f->f_mode),
 641                            f);
 642         Py_XDECREF(name);
 643         return ret;
 644 #endif
 645     } else {
 646         return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
 647                            f->f_fp == NULL ? "closed" : "open",
 648                            PyString_AsString(f->f_name),
 649                            PyString_AsString(f->f_mode),
 650                            f);
 651     }
 652 }
 653
 654 static PyObject *
 655 file_close(PyFileObject *f)
 656 {
 657     PyObject *sts = close_the_file(f);
 658     if (sts) {
 659         PyMem_Free(f->f_setbuf);
 660         f->f_setbuf = NULL;
 661     }
 662     return sts;
 663 }
 664
 665
 666 /* Our very own off_t-like type, 64-bit if possible */
 667 #if !defined(HAVE_LARGEFILE_SUPPORT)
 668 typedef off_t Py_off_t;
 669 #elif SIZEOF_OFF_T >= 8
 670 typedef off_t Py_off_t;
 671 #elif SIZEOF_FPOS_T >= 8
 672 typedef fpos_t Py_off_t;
 673 #else
 674 #error "Large file support, but neither off_t nor fpos_t is large enough."
 675 #endif
 676
 677
 678 /* a portable fseek() function
 679    return 0 on success, non-zero on failure (with errno set) */
 680 static int
 681 _portable_fseek(FILE *fp, Py_off_t offset, int whence)
 682 {
 683 #if !defined(HAVE_LARGEFILE_SUPPORT)
 684     return fseek(fp, offset, whence);
 685 #elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
 686     return fseeko(fp, offset, whence);
 687 #elif defined(HAVE_FSEEK64)
 688     return fseek64(fp, offset, whence);
 689 #elif defined(__BEOS__)
 690     return _fseek(fp, offset, whence);
 691 #elif SIZEOF_FPOS_T >= 8
 692     /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
 693        and fgetpos() to implement fseek()*/
 694     fpos_t pos;
 695     switch (whence) {
 696     case SEEK_END:
 697 #ifdef MS_WINDOWS
 698         fflush(fp);
 699         if (_lseeki64(fileno(fp), 0, 2) == -1)
 700             return -1;
 701 #else
 702         if (fseek(fp, 0, SEEK_END) != 0)
 703             return -1;
 704 #endif
 705         /* fall through */
 706     case SEEK_CUR:
 707         if (fgetpos(fp, &pos) != 0)
 708             return -1;
 709         offset += pos;
 710         break;
 711     /* case SEEK_SET: break; */
 712     }
 713     return fsetpos(fp, &offset);
 714 #else
 715 #error "Large file support, but no way to fseek."
 716 #endif
 717 }
 718
 719
 720 /* a portable ftell() function
 721    Return -1 on failure with errno set appropriately, current file
 722    position on success */
 723 static Py_off_t
 724 _portable_ftell(FILE* fp)
 725 {
 726 #if !defined(HAVE_LARGEFILE_SUPPORT)
 727     return ftell(fp);
 728 #elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
 729     return ftello(fp);
 730 #elif defined(HAVE_FTELL64)
 731     return ftell64(fp);
 732 #elif SIZEOF_FPOS_T >= 8
 733     fpos_t pos;
 734     if (fgetpos(fp, &pos) != 0)
 735         return -1;
 736     return pos;
 737 #else
 738 #error "Large file support, but no way to ftell."
 739 #endif
 740 }
 741
 742
 743 static PyObject *
 744 file_seek(PyFileObject *f, PyObject *args)
 745 {
 746     int whence;
 747     int ret;
 748     Py_off_t offset;
 749     PyObject *offobj, *off_index;
 750
 751     if (f->f_fp == NULL)
 752         return err_closed();
 753     drop_readahead(f);
 754     whence = 0;
 755     if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
 756         return NULL;
 757     off_index = PyNumber_Index(offobj);
 758     if (!off_index) {
 759         if (!PyFloat_Check(offobj))
 760             return NULL;
 761         /* Deprecated in 2.6 */
 762         PyErr_Clear();
 763         if (PyErr_WarnEx(PyExc_DeprecationWarning,
 764                          "integer argument expected, got float",
 765                          1) < 0)
 766             return NULL;
 767         off_index = offobj;
 768         Py_INCREF(offobj);
 769     }
 770 #if !defined(HAVE_LARGEFILE_SUPPORT)
 771     offset = PyInt_AsLong(off_index);
 772 #else
 773     offset = PyLong_Check(off_index) ?
 774         PyLong_AsLongLong(off_index) : PyInt_AsLong(off_index);
 775 #endif
 776     Py_DECREF(off_index);
 777     if (PyErr_Occurred())
 778         return NULL;
 779
 780     FILE_BEGIN_ALLOW_THREADS(f)
 781     errno = 0;
 782     ret = _portable_fseek(f->f_fp, offset, whence);
 783     FILE_END_ALLOW_THREADS(f)
 784
 785     if (ret != 0) {
 786         PyErr_SetFromErrno(PyExc_IOError);
 787         clearerr(f->f_fp);
 788         return NULL;
 789     }
 790     f->f_skipnextlf = 0;
 791     Py_INCREF(Py_None);
 792     return Py_None;
 793 }
 794
 795
 796 #ifdef HAVE_FTRUNCATE
 797 static PyObject *
 798 file_truncate(PyFileObject *f, PyObject *args)
 799 {
 800     Py_off_t newsize;
 801     PyObject *newsizeobj = NULL;
 802     Py_off_t initialpos;
 803     int ret;
 804
 805     if (f->f_fp == NULL)
 806         return err_closed();
 807     if (!f->writable)
 808         return err_mode("writing");
 809     if (!PyArg_UnpackTuple(args, "truncate", 0, 1, &newsizeobj))
 810         return NULL;
 811
 812     /* Get current file position.  If the file happens to be open for
 813      * update and the last operation was an input operation, C doesn't
 814      * define what the later fflush() will do, but we promise truncate()
 815      * won't change the current position (and fflush() *does* change it
 816      * then at least on Windows).  The easiest thing is to capture
 817      * current pos now and seek back to it at the end.
 818      */
 819     FILE_BEGIN_ALLOW_THREADS(f)
 820     errno = 0;
 821     initialpos = _portable_ftell(f->f_fp);
 822     FILE_END_ALLOW_THREADS(f)
 823     if (initialpos == -1)
 824         goto onioerror;
 825
 826     /* Set newsize to current postion if newsizeobj NULL, else to the
 827      * specified value.
 828      */
 829     if (newsizeobj != NULL) {
 830 #if !defined(HAVE_LARGEFILE_SUPPORT)
 831         newsize = PyInt_AsLong(newsizeobj);
 832 #else
 833         newsize = PyLong_Check(newsizeobj) ?
 834                         PyLong_AsLongLong(newsizeobj) :
 835                 PyInt_AsLong(newsizeobj);
 836 #endif
 837         if (PyErr_Occurred())
 838             return NULL;
 839     }
 840     else /* default to current position */
 841         newsize = initialpos;
 842
 843     /* Flush the stream.  We're mixing stream-level I/O with lower-level
 844      * I/O, and a flush may be necessary to synch both platform views
 845      * of the current file state.
 846      */
 847     FILE_BEGIN_ALLOW_THREADS(f)
 848     errno = 0;
 849     ret = fflush(f->f_fp);
 850     FILE_END_ALLOW_THREADS(f)
 851     if (ret != 0)
 852         goto onioerror;
 853
 854 #ifdef MS_WINDOWS
 855     /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
 856        so don't even try using it. */
 857     {
 858         HANDLE hFile;
 859
 860         /* Have to move current pos to desired endpoint on Windows. */
 861         FILE_BEGIN_ALLOW_THREADS(f)
 862         errno = 0;
 863         ret = _portable_fseek(f->f_fp, newsize, SEEK_SET) != 0;
 864         FILE_END_ALLOW_THREADS(f)
 865         if (ret)
 866             goto onioerror;
 867
 868         /* Truncate.  Note that this may grow the file! */
 869         FILE_BEGIN_ALLOW_THREADS(f)
 870         errno = 0;
 871         hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
 872         ret = hFile == (HANDLE)-1;
 873         if (ret == 0) {
 874             ret = SetEndOfFile(hFile) == 0;
 875             if (ret)
 876                 errno = EACCES;
 877         }
 878         FILE_END_ALLOW_THREADS(f)
 879         if (ret)
 880             goto onioerror;
 881     }
 882 #else
 883     FILE_BEGIN_ALLOW_THREADS(f)
 884     errno = 0;
 885     ret = ftruncate(fileno(f->f_fp), newsize);
 886     FILE_END_ALLOW_THREADS(f)
 887     if (ret != 0)
 888         goto onioerror;
 889 #endif /* !MS_WINDOWS */
 890
 891     /* Restore original file position. */
 892     FILE_BEGIN_ALLOW_THREADS(f)
 893     errno = 0;
 894     ret = _portable_fseek(f->f_fp, initialpos, SEEK_SET) != 0;
 895     FILE_END_ALLOW_THREADS(f)
 896     if (ret)
 897         goto onioerror;
 898
 899     Py_INCREF(Py_None);
 900     return Py_None;
 901
 902 onioerror:
 903     PyErr_SetFromErrno(PyExc_IOError);
 904     clearerr(f->f_fp);
 905     return NULL;
 906 }
 907 #endif /* HAVE_FTRUNCATE */
 908
 909 static PyObject *
 910 file_tell(PyFileObject *f)
 911 {
 912     Py_off_t pos;
 913
 914     if (f->f_fp == NULL)
 915         return err_closed();
 916     FILE_BEGIN_ALLOW_THREADS(f)
 917     errno = 0;
 918     pos = _portable_ftell(f->f_fp);
 919     FILE_END_ALLOW_THREADS(f)
 920
 921     if (pos == -1) {
 922         PyErr_SetFromErrno(PyExc_IOError);
 923         clearerr(f->f_fp);
 924         return NULL;
 925     }
 926     if (f->f_skipnextlf) {
 927         int c;
 928         c = GETC(f->f_fp);
 929         if (c == '\n') {
 930             f->f_newlinetypes |= NEWLINE_CRLF;
 931             pos++;
 932             f->f_skipnextlf = 0;
 933         } else if (c != EOF) ungetc(c, f->f_fp);
 934     }
 935 #if !defined(HAVE_LARGEFILE_SUPPORT)
 936     return PyInt_FromLong(pos);
 937 #else
 938     return PyLong_FromLongLong(pos);
 939 #endif
 940 }
 941
 942 static PyObject *
 943 file_fileno(PyFileObject *f)
 944 {
 945     if (f->f_fp == NULL)
 946         return err_closed();
 947     return PyInt_FromLong((long) fileno(f->f_fp));
 948 }
 949
 950 static PyObject *
 951 file_flush(PyFileObject *f)
 952 {
 953     int res;
 954
 955     if (f->f_fp == NULL)
 956         return err_closed();
 957     FILE_BEGIN_ALLOW_THREADS(f)
 958     errno = 0;
 959     res = fflush(f->f_fp);
 960     FILE_END_ALLOW_THREADS(f)
 961     if (res != 0) {
 962         PyErr_SetFromErrno(PyExc_IOError);
 963         clearerr(f->f_fp);
 964         return NULL;
 965     }
 966     Py_INCREF(Py_None);
 967     return Py_None;
 968 }
 969
 970 static PyObject *
 971 file_isatty(PyFileObject *f)
 972 {
 973     long res;
 974     if (f->f_fp == NULL)
 975         return err_closed();
 976     FILE_BEGIN_ALLOW_THREADS(f)
 977     res = isatty((int)fileno(f->f_fp));
 978     FILE_END_ALLOW_THREADS(f)
 979     return PyBool_FromLong(res);
 980 }
 981
 982
 983 #if BUFSIZ < 8192
 984 #define SMALLCHUNK 8192
 985 #else
 986 #define SMALLCHUNK BUFSIZ
 987 #endif
 988
 989 #if SIZEOF_INT < 4
 990 #define BIGCHUNK  (512 * 32)
 991 #else
 992 #define BIGCHUNK  (512 * 1024)
 993 #endif
 994
 995 static size_t
 996 new_buffersize(PyFileObject *f, size_t currentsize)
 997 {
 998 #ifdef HAVE_FSTAT
 999     off_t pos, end;
1000     struct stat st;
1001     if (fstat(fileno(f->f_fp), &st) == 0) {
1002         end = st.st_size;
1003         /* The following is not a bug: we really need to call lseek()
1004            *and* ftell().  The reason is that some stdio libraries
1005            mistakenly flush their buffer when ftell() is called and
1006            the lseek() call it makes fails, thereby throwing away
1007            data that cannot be recovered in any way.  To avoid this,
1008            we first test lseek(), and only call ftell() if lseek()
1009            works.  We can't use the lseek() value either, because we
1010            need to take the amount of buffered data into account.
1011            (Yet another reason why stdio stinks. :-) */
1012         pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
1013         if (pos >= 0) {
1014             pos = ftell(f->f_fp);
1015         }
1016         if (pos < 0)
1017             clearerr(f->f_fp);
1018         if (end > pos && pos >= 0)
1019             return currentsize + end - pos + 1;
1020         /* Add 1 so if the file were to grow we'd notice. */
1021     }
1022 #endif
1023     if (currentsize > SMALLCHUNK) {
1024         /* Keep doubling until we reach BIGCHUNK;
1025            then keep adding BIGCHUNK. */
1026         if (currentsize <= BIGCHUNK)
1027             return currentsize + currentsize;
1028         else
1029             return currentsize + BIGCHUNK;
1030     }
1031     return currentsize + SMALLCHUNK;
1032 }
1033
1034 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
1035 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
1036 #else
1037 #ifdef EWOULDBLOCK
1038 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
1039 #else
1040 #ifdef EAGAIN
1041 #define BLOCKED_ERRNO(x) ((x) == EAGAIN)
1042 #else
1043 #define BLOCKED_ERRNO(x) 0
1044 #endif
1045 #endif
1046 #endif
1047
1048 static PyObject *
1049 file_read(PyFileObject *f, PyObject *args)
1050 {
1051     long bytesrequested = -1;
1052     size_t bytesread, buffersize, chunksize;
1053     PyObject *v;
1054
1055     if (f->f_fp == NULL)
1056         return err_closed();
1057     if (!f->readable)
1058         return err_mode("reading");
1059     /* refuse to mix with f.next() */
1060     if (f->f_buf != NULL &&
1061         (f->f_bufend - f->f_bufptr) > 0 &&
1062         f->f_buf[0] != '\0')
1063         return err_iterbuffered();
1064     if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
1065         return NULL;
1066     if (bytesrequested < 0)
1067         buffersize = new_buffersize(f, (size_t)0);
1068     else
1069         buffersize = bytesrequested;
1070     if (buffersize > PY_SSIZE_T_MAX) {
1071         PyErr_SetString(PyExc_OverflowError,
1072     "requested number of bytes is more than a Python string can hold");
1073         return NULL;
1074     }
1075     v = PyString_FromStringAndSize((char *)NULL, buffersize);
1076     if (v == NULL)
1077         return NULL;
1078     bytesread = 0;
1079     for (;;) {
1080         FILE_BEGIN_ALLOW_THREADS(f)
1081         errno = 0;
1082         chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
1083                   buffersize - bytesread, f->f_fp, (PyObject *)f);
1084         FILE_END_ALLOW_THREADS(f)
1085         if (chunksize == 0) {
1086             if (!ferror(f->f_fp))
1087                 break;
1088             clearerr(f->f_fp);
1089             /* When in non-blocking mode, data shouldn't
1090              * be discarded if a blocking signal was
1091              * received. That will also happen if
1092              * chunksize != 0, but bytesread < buffersize. */
1093             if (bytesread > 0 && BLOCKED_ERRNO(errno))
1094                 break;
1095             PyErr_SetFromErrno(PyExc_IOError);
1096             Py_DECREF(v);
1097             return NULL;
1098         }
1099         bytesread += chunksize;
1100         if (bytesread < buffersize) {
1101             clearerr(f->f_fp);
1102             break;
1103         }
1104         if (bytesrequested < 0) {
1105             buffersize = new_buffersize(f, buffersize);
1106             if (_PyString_Resize(&v, buffersize) < 0)
1107                 return NULL;
1108         } else {
1109             /* Got what was requested. */
1110             break;
1111         }
1112     }
1113     if (bytesread != buffersize && _PyString_Resize(&v, bytesread))
1114         return NULL;
1115     return v;
1116 }
1117
1118 static PyObject *
1119 file_readinto(PyFileObject *f, PyObject *args)
1120 {
1121     char *ptr;
1122     Py_ssize_t ntodo;
1123     Py_ssize_t ndone, nnow;
1124     Py_buffer pbuf;
1125
1126     if (f->f_fp == NULL)
1127         return err_closed();
1128     if (!f->readable)
1129         return err_mode("reading");
1130     /* refuse to mix with f.next() */
1131     if (f->f_buf != NULL &&
1132         (f->f_bufend - f->f_bufptr) > 0 &&
1133         f->f_buf[0] != '\0')
1134         return err_iterbuffered();
1135     if (!PyArg_ParseTuple(args, "w*", &pbuf))
1136         return NULL;
1137     ptr = pbuf.buf;
1138     ntodo = pbuf.len;
1139     ndone = 0;
1140     while (ntodo > 0) {
1141         FILE_BEGIN_ALLOW_THREADS(f)
1142         errno = 0;
1143         nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
1144                                         (PyObject *)f);
1145         FILE_END_ALLOW_THREADS(f)
1146         if (nnow == 0) {
1147             if (!ferror(f->f_fp))
1148                 break;
1149             PyErr_SetFromErrno(PyExc_IOError);
1150             clearerr(f->f_fp);
1151             PyBuffer_Release(&pbuf);
1152             return NULL;
1153         }
1154         ndone += nnow;
1155         ntodo -= nnow;
1156     }
1157     PyBuffer_Release(&pbuf);
1158     return PyInt_FromSsize_t(ndone);
1159 }
1160
1161 /**************************************************************************
1162 Routine to get next line using platform fgets().
1163
1164 Under MSVC 6:
1165
1166 + MS threadsafe getc is very slow (multiple layers of function calls before+
1167   after each character, to lock+unlock the stream).
1168 + The stream-locking functions are MS-internal -- can't access them from user
1169   code.
1170 + There's nothing Tim could find in the MS C or platform SDK libraries that
1171   can worm around this.
1172 + MS fgets locks/unlocks only once per line; it's the only hook we have.
1173
1174 So we use fgets for speed(!), despite that it's painful.
1175
1176 MS realloc is also slow.
1177
1178 Reports from other platforms on this method vs getc_unlocked (which MS doesn't
1179 have):
1180     Linux               a wash
1181     Solaris             a wash
1182     Tru64 Unix          getline_via_fgets significantly faster
1183
1184 CAUTION:  The C std isn't clear about this:  in those cases where fgets
1185 writes something into the buffer, can it write into any position beyond the
1186 required trailing null byte?  MSVC 6 fgets does not, and no platform is (yet)
1187 known on which it does; and it would be a strange way to code fgets. Still,
1188 getline_via_fgets may not work correctly if it does.  The std test
1189 test_bufio.py should fail if platform fgets() routinely writes beyond the
1190 trailing null byte.  #define DONT_USE_FGETS_IN_GETLINE to disable this code.
1191 **************************************************************************/
1192
1193 /* Use this routine if told to, or by default on non-get_unlocked()
1194  * platforms unless told not to.  Yikes!  Let's spell that out:
1195  * On a platform with getc_unlocked():
1196  *     By default, use getc_unlocked().
1197  *     If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
1198  * On a platform without getc_unlocked():
1199  *     By default, use fgets().
1200  *     If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
1201  */
1202 #if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
1203 #define USE_FGETS_IN_GETLINE
1204 #endif
1205
1206 #if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
1207 #undef USE_FGETS_IN_GETLINE
1208 #endif
1209
1210 #ifdef USE_FGETS_IN_GETLINE
1211 static PyObject*
1212 getline_via_fgets(PyFileObject *f, FILE *fp)
1213 {
1214 /* INITBUFSIZE is the maximum line length that lets us get away with the fast
1215  * no-realloc, one-fgets()-call path.  Boosting it isn't free, because we have
1216  * to fill this much of the buffer with a known value in order to figure out
1217  * how much of the buffer fgets() overwrites.  So if INITBUFSIZE is larger
1218  * than "most" lines, we waste time filling unused buffer slots.  100 is
1219  * surely adequate for most peoples' email archives, chewing over source code,
1220  * etc -- "regular old text files".
1221  * MAXBUFSIZE is the maximum line length that lets us get away with the less
1222  * fast (but still zippy) no-realloc, two-fgets()-call path.  See above for
1223  * cautions about boosting that.  300 was chosen because the worst real-life
1224  * text-crunching job reported on Python-Dev was a mail-log crawler where over
1225  * half the lines were 254 chars.
1226  */
1227 #define INITBUFSIZE 100
1228 #define MAXBUFSIZE 300
1229     char* p;            /* temp */
1230     char buf[MAXBUFSIZE];
1231     PyObject* v;        /* the string object result */
1232     char* pvfree;       /* address of next free slot */
1233     char* pvend;    /* address one beyond last free slot */
1234     size_t nfree;       /* # of free buffer slots; pvend-pvfree */
1235     size_t total_v_size;  /* total # of slots in buffer */
1236     size_t increment;           /* amount to increment the buffer */
1237     size_t prev_v_size;
1238
1239     /* Optimize for normal case:  avoid _PyString_Resize if at all
1240      * possible via first reading into stack buffer "buf".
1241      */
1242     total_v_size = INITBUFSIZE;         /* start small and pray */
1243     pvfree = buf;
1244     for (;;) {
1245         FILE_BEGIN_ALLOW_THREADS(f)
1246         pvend = buf + total_v_size;
1247         nfree = pvend - pvfree;
1248         memset(pvfree, '\n', nfree);
1249         assert(nfree < INT_MAX); /* Should be atmost MAXBUFSIZE */
1250         p = fgets(pvfree, (int)nfree, fp);
1251         FILE_END_ALLOW_THREADS(f)
1252
1253         if (p == NULL) {
1254             clearerr(fp);
1255             if (PyErr_CheckSignals())
1256                 return NULL;
1257             v = PyString_FromStringAndSize(buf, pvfree - buf);
1258             return v;
1259         }
1260         /* fgets read *something* */
1261         p = memchr(pvfree, '\n', nfree);
1262         if (p != NULL) {
1263             /* Did the \n come from fgets or from us?
1264              * Since fgets stops at the first \n, and then writes
1265              * \0, if it's from fgets a \0 must be next.  But if
1266              * that's so, it could not have come from us, since
1267              * the \n's we filled the buffer with have only more
1268              * \n's to the right.
1269              */
1270             if (p+1 < pvend && *(p+1) == '\0') {
1271                 /* It's from fgets:  we win!  In particular,
1272                  * we haven't done any mallocs yet, and can
1273                  * build the final result on the first try.
1274                  */
1275                 ++p;                    /* include \n from fgets */
1276             }
1277             else {
1278                 /* Must be from us:  fgets didn't fill the
1279                  * buffer and didn't find a newline, so it
1280                  * must be the last and newline-free line of
1281                  * the file.
1282                  */
1283                 assert(p > pvfree && *(p-1) == '\0');
1284                 --p;                    /* don't include \0 from fgets */
1285             }
1286             v = PyString_FromStringAndSize(buf, p - buf);
1287             return v;
1288         }
1289         /* yuck:  fgets overwrote all the newlines, i.e. the entire
1290          * buffer.  So this line isn't over yet, or maybe it is but
1291          * we're exactly at EOF.  If we haven't already, try using the
1292          * rest of the stack buffer.
1293          */
1294         assert(*(pvend-1) == '\0');
1295         if (pvfree == buf) {
1296             pvfree = pvend - 1;                 /* overwrite trailing null */
1297             total_v_size = MAXBUFSIZE;
1298         }
1299         else
1300             break;
1301     }
1302
1303     /* The stack buffer isn't big enough; malloc a string object and read
1304      * into its buffer.
1305      */
1306     total_v_size = MAXBUFSIZE << 1;
1307     v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
1308     if (v == NULL)
1309         return v;
1310     /* copy over everything except the last null byte */
1311     memcpy(BUF(v), buf, MAXBUFSIZE-1);
1312     pvfree = BUF(v) + MAXBUFSIZE - 1;
1313
1314     /* Keep reading stuff into v; if it ever ends successfully, break
1315      * after setting p one beyond the end of the line.  The code here is
1316      * very much like the code above, except reads into v's buffer; see
1317      * the code above for detailed comments about the logic.
1318      */
1319     for (;;) {
1320         FILE_BEGIN_ALLOW_THREADS(f)
1321         pvend = BUF(v) + total_v_size;
1322         nfree = pvend - pvfree;
1323         memset(pvfree, '\n', nfree);
1324         assert(nfree < INT_MAX);
1325         p = fgets(pvfree, (int)nfree, fp);
1326         FILE_END_ALLOW_THREADS(f)
1327
1328         if (p == NULL) {
1329             clearerr(fp);
1330             if (PyErr_CheckSignals()) {
1331                 Py_DECREF(v);
1332                 return NULL;
1333             }
1334             p = pvfree;
1335             break;
1336         }
1337         p = memchr(pvfree, '\n', nfree);
1338         if (p != NULL) {
1339             if (p+1 < pvend && *(p+1) == '\0') {
1340                 /* \n came from fgets */
1341                 ++p;
1342                 break;
1343             }
1344             /* \n came from us; last line of file, no newline */
1345             assert(p > pvfree && *(p-1) == '\0');
1346             --p;
1347             break;
1348         }
1349         /* expand buffer and try again */
1350         assert(*(pvend-1) == '\0');
1351         increment = total_v_size >> 2;          /* mild exponential growth */
1352         prev_v_size = total_v_size;
1353         total_v_size += increment;
1354         /* check for overflow */
1355         if (total_v_size <= prev_v_size ||
1356             total_v_size > PY_SSIZE_T_MAX) {
1357             PyErr_SetString(PyExc_OverflowError,
1358                 "line is longer than a Python string can hold");
1359             Py_DECREF(v);
1360             return NULL;
1361         }
1362         if (_PyString_Resize(&v, (int)total_v_size) < 0)
1363             return NULL;
1364         /* overwrite the trailing null byte */
1365         pvfree = BUF(v) + (prev_v_size - 1);
1366     }
1367     if (BUF(v) + total_v_size != p && _PyString_Resize(&v, p - BUF(v)))
1368         return NULL;
1369     return v;
1370 #undef INITBUFSIZE
1371 #undef MAXBUFSIZE
1372 }
1373 #endif  /* ifdef USE_FGETS_IN_GETLINE */
1374
1375 /* Internal routine to get a line.
1376    Size argument interpretation:
1377    > 0: max length;
1378    <= 0: read arbitrary line
1379 */
1380
1381 static PyObject *
1382 get_line(PyFileObject *f, int n)
1383 {
1384     FILE *fp = f->f_fp;
1385     int c;
1386     char *buf, *end;
1387     size_t total_v_size;        /* total # of slots in buffer */
1388     size_t used_v_size;         /* # used slots in buffer */
1389     size_t increment;       /* amount to increment the buffer */
1390     PyObject *v;
1391     int newlinetypes = f->f_newlinetypes;
1392     int skipnextlf = f->f_skipnextlf;
1393     int univ_newline = f->f_univ_newline;
1394
1395 #if defined(USE_FGETS_IN_GETLINE)
1396     if (n <= 0 && !univ_newline )
1397         return getline_via_fgets(f, fp);
1398 #endif
1399     total_v_size = n > 0 ? n : 100;
1400     v = PyString_FromStringAndSize((char *)NULL, total_v_size);
1401     if (v == NULL)
1402         return NULL;
1403     buf = BUF(v);
1404     end = buf + total_v_size;
1405
1406     for (;;) {
1407         FILE_BEGIN_ALLOW_THREADS(f)
1408         FLOCKFILE(fp);
1409         if (univ_newline) {
1410             c = 'x'; /* Shut up gcc warning */
1411             while ( buf != end && (c = GETC(fp)) != EOF ) {
1412                 if (skipnextlf ) {
1413                     skipnextlf = 0;
1414                     if (c == '\n') {
1415                         /* Seeing a \n here with
1416                          * skipnextlf true means we
1417                          * saw a \r before.
1418                          */
1419                         newlinetypes |= NEWLINE_CRLF;
1420                         c = GETC(fp);
1421                         if (c == EOF) break;
1422                     } else {
1423                         newlinetypes |= NEWLINE_CR;
1424                     }
1425                 }
1426                 if (c == '\r') {
1427                     skipnextlf = 1;
1428                     c = '\n';
1429                 } else if ( c == '\n')
1430                     newlinetypes |= NEWLINE_LF;
1431                 *buf++ = c;
1432                 if (c == '\n') break;
1433             }
1434             if ( c == EOF && skipnextlf )
1435                 newlinetypes |= NEWLINE_CR;
1436         } else /* If not universal newlines use the normal loop */
1437         while ((c = GETC(fp)) != EOF &&
1438                (*buf++ = c) != '\n' &&
1439             buf != end)
1440             ;
1441         FUNLOCKFILE(fp);
1442         FILE_END_ALLOW_THREADS(f)
1443         f->f_newlinetypes = newlinetypes;
1444         f->f_skipnextlf = skipnextlf;
1445         if (c == '\n')
1446             break;
1447         if (c == EOF) {
1448             if (ferror(fp)) {
1449                 PyErr_SetFromErrno(PyExc_IOError);
1450                 clearerr(fp);
1451                 Py_DECREF(v);
1452                 return NULL;
1453             }
1454             clearerr(fp);
1455             if (PyErr_CheckSignals()) {
1456                 Py_DECREF(v);
1457                 return NULL;
1458             }
1459             break;
1460         }
1461         /* Must be because buf == end */
1462         if (n > 0)
1463             break;
1464         used_v_size = total_v_size;
1465         increment = total_v_size >> 2; /* mild exponential growth */
1466         total_v_size += increment;
1467         if (total_v_size > PY_SSIZE_T_MAX) {
1468             PyErr_SetString(PyExc_OverflowError,
1469                 "line is longer than a Python string can hold");
1470             Py_DECREF(v);
1471             return NULL;
1472         }
1473         if (_PyString_Resize(&v, total_v_size) < 0)
1474             return NULL;
1475         buf = BUF(v) + used_v_size;
1476         end = BUF(v) + total_v_size;
1477     }
1478
1479     used_v_size = buf - BUF(v);
1480     if (used_v_size != total_v_size && _PyString_Resize(&v, used_v_size))
1481         return NULL;
1482     return v;
1483 }
1484
1485 /* External C interface */
1486
1487 PyObject *
1488 PyFile_GetLine(PyObject *f, int n)
1489 {
1490     PyObject *result;
1491
1492     if (f == NULL) {
1493         PyErr_BadInternalCall();
1494         return NULL;
1495     }
1496
1497     if (PyFile_Check(f)) {
1498         PyFileObject *fo = (PyFileObject *)f;
1499         if (fo->f_fp == NULL)
1500             return err_closed();
1501         if (!fo->readable)
1502             return err_mode("reading");
1503         /* refuse to mix with f.next() */
1504         if (fo->f_buf != NULL &&
1505             (fo->f_bufend - fo->f_bufptr) > 0 &&
1506             fo->f_buf[0] != '\0')
1507             return err_iterbuffered();
1508         result = get_line(fo, n);
1509     }
1510     else {
1511         PyObject *reader;
1512         PyObject *args;
1513
1514         reader = PyObject_GetAttrString(f, "readline");
1515         if (reader == NULL)
1516             return NULL;
1517         if (n <= 0)
1518             args = PyTuple_New(0);
1519         else
1520             args = Py_BuildValue("(i)", n);
1521         if (args == NULL) {
1522             Py_DECREF(reader);
1523             return NULL;
1524         }
1525         result = PyEval_CallObject(reader, args);
1526         Py_DECREF(reader);
1527         Py_DECREF(args);
1528         if (result != NULL && !PyString_Check(result) &&
1529             !PyUnicode_Check(result)) {
1530             Py_DECREF(result);
1531             result = NULL;
1532             PyErr_SetString(PyExc_TypeError,
1533                        "object.readline() returned non-string");
1534         }
1535     }
1536
1537     if (n < 0 && result != NULL && PyString_Check(result)) {
1538         char *s = PyString_AS_STRING(result);
1539         Py_ssize_t len = PyString_GET_SIZE(result);
1540         if (len == 0) {
1541             Py_DECREF(result);
1542             result = NULL;
1543             PyErr_SetString(PyExc_EOFError,
1544                             "EOF when reading a line");
1545         }
1546         else if (s[len-1] == '\n') {
1547             if (result->ob_refcnt == 1) {
1548                 if (_PyString_Resize(&result, len-1))
1549                     return NULL;
1550             }
1551             else {
1552                 PyObject *v;
1553                 v = PyString_FromStringAndSize(s, len-1);
1554                 Py_DECREF(result);
1555                 result = v;
1556             }
1557         }
1558     }
1559 #ifdef Py_USING_UNICODE
1560     if (n < 0 && result != NULL && PyUnicode_Check(result)) {
1561         Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
1562         Py_ssize_t len = PyUnicode_GET_SIZE(result);
1563         if (len == 0) {
1564             Py_DECREF(result);
1565             result = NULL;
1566             PyErr_SetString(PyExc_EOFError,
1567                             "EOF when reading a line");
1568         }
1569         else if (s[len-1] == '\n') {
1570             if (result->ob_refcnt == 1)
1571                 PyUnicode_Resize(&result, len-1);
1572             else {
1573                 PyObject *v;
1574                 v = PyUnicode_FromUnicode(s, len-1);
1575                 Py_DECREF(result);
1576                 result = v;
1577             }
1578         }
1579     }
1580 #endif
1581     return result;
1582 }
1583
1584 /* Python method */
1585
1586 static PyObject *
1587 file_readline(PyFileObject *f, PyObject *args)
1588 {
1589     int n = -1;
1590
1591     if (f->f_fp == NULL)
1592         return err_closed();
1593     if (!f->readable)
1594         return err_mode("reading");
1595     /* refuse to mix with f.next() */
1596     if (f->f_buf != NULL &&
1597         (f->f_bufend - f->f_bufptr) > 0 &&
1598         f->f_buf[0] != '\0')
1599         return err_iterbuffered();
1600     if (!PyArg_ParseTuple(args, "|i:readline", &n))
1601         return NULL;
1602     if (n == 0)
1603         return PyString_FromString("");
1604     if (n < 0)
1605         n = 0;
1606     return get_line(f, n);
1607 }
1608
1609 static PyObject *
1610 file_readlines(PyFileObject *f, PyObject *args)
1611 {
1612     long sizehint = 0;
1613     PyObject *list = NULL;
1614     PyObject *line;
1615     char small_buffer[SMALLCHUNK];
1616     char *buffer = small_buffer;
1617     size_t buffersize = SMALLCHUNK;
1618     PyObject *big_buffer = NULL;
1619     size_t nfilled = 0;
1620     size_t nread;
1621     size_t totalread = 0;
1622     char *p, *q, *end;
1623     int err;
1624     int shortread = 0;
1625
1626     if (f->f_fp == NULL)
1627         return err_closed();
1628     if (!f->readable)
1629         return err_mode("reading");
1630     /* refuse to mix with f.next() */
1631     if (f->f_buf != NULL &&
1632         (f->f_bufend - f->f_bufptr) > 0 &&
1633         f->f_buf[0] != '\0')
1634         return err_iterbuffered();
1635     if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
1636         return NULL;
1637     if ((list = PyList_New(0)) == NULL)
1638         return NULL;
1639     for (;;) {
1640         if (shortread)
1641             nread = 0;
1642         else {
1643             FILE_BEGIN_ALLOW_THREADS(f)
1644             errno = 0;
1645             nread = Py_UniversalNewlineFread(buffer+nfilled,
1646                 buffersize-nfilled, f->f_fp, (PyObject *)f);
1647             FILE_END_ALLOW_THREADS(f)
1648             shortread = (nread < buffersize-nfilled);
1649         }
1650         if (nread == 0) {
1651             sizehint = 0;
1652             if (!ferror(f->f_fp))
1653                 break;
1654             PyErr_SetFromErrno(PyExc_IOError);
1655             clearerr(f->f_fp);
1656             goto error;
1657         }
1658         totalread += nread;
1659         p = (char *)memchr(buffer+nfilled, '\n', nread);
1660         if (p == NULL) {
1661             /* Need a larger buffer to fit this line */
1662             nfilled += nread;
1663             buffersize *= 2;
1664             if (buffersize > PY_SSIZE_T_MAX) {
1665                 PyErr_SetString(PyExc_OverflowError,
1666                 "line is longer than a Python string can hold");
1667                 goto error;
1668             }
1669             if (big_buffer == NULL) {
1670                 /* Create the big buffer */
1671                 big_buffer = PyString_FromStringAndSize(
1672                     NULL, buffersize);
1673                 if (big_buffer == NULL)
1674                     goto error;
1675                 buffer = PyString_AS_STRING(big_buffer);
1676                 memcpy(buffer, small_buffer, nfilled);
1677             }
1678             else {
1679                 /* Grow the big buffer */
1680                 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1681                     goto error;
1682                 buffer = PyString_AS_STRING(big_buffer);
1683             }
1684             continue;
1685         }
1686         end = buffer+nfilled+nread;
1687         q = buffer;
1688         do {
1689             /* Process complete lines */
1690             p++;
1691             line = PyString_FromStringAndSize(q, p-q);
1692             if (line == NULL)
1693                 goto error;
1694             err = PyList_Append(list, line);
1695             Py_DECREF(line);
1696             if (err != 0)
1697                 goto error;
1698             q = p;
1699             p = (char *)memchr(q, '\n', end-q);
1700         } while (p != NULL);
1701         /* Move the remaining incomplete line to the start */
1702         nfilled = end-q;
1703         memmove(buffer, q, nfilled);
1704         if (sizehint > 0)
1705             if (totalread >= (size_t)sizehint)
1706                 break;
1707     }
1708     if (nfilled != 0) {
1709         /* Partial last line */
1710         line = PyString_FromStringAndSize(buffer, nfilled);
1711         if (line == NULL)
1712             goto error;
1713         if (sizehint > 0) {
1714             /* Need to complete the last line */
1715             PyObject *rest = get_line(f, 0);
1716             if (rest == NULL) {
1717                 Py_DECREF(line);
1718                 goto error;
1719             }
1720             PyString_Concat(&line, rest);
1721             Py_DECREF(rest);
1722             if (line == NULL)
1723                 goto error;
1724         }
1725         err = PyList_Append(list, line);
1726         Py_DECREF(line);
1727         if (err != 0)
1728             goto error;
1729     }
1730
1731 cleanup:
1732     Py_XDECREF(big_buffer);
1733     return list;
1734
1735 error:
1736     Py_CLEAR(list);
1737     goto cleanup;
1738 }
1739
1740 static PyObject *
1741 file_write(PyFileObject *f, PyObject *args)
1742 {
1743     Py_buffer pbuf;
1744     const char *s;
1745     Py_ssize_t n, n2;
1746     PyObject *encoded = NULL;
1747
1748     if (f->f_fp == NULL)
1749         return err_closed();
1750     if (!f->writable)
1751         return err_mode("writing");
1752     if (f->f_binary) {
1753         if (!PyArg_ParseTuple(args, "s*", &pbuf))
1754             return NULL;
1755         s = pbuf.buf;
1756         n = pbuf.len;
1757     }
1758     else {
1759         const char *encoding, *errors;
1760         PyObject *text;
1761         if (!PyArg_ParseTuple(args, "O", &text))
1762             return NULL;
1763
1764         if (PyString_Check(text)) {
1765             s = PyString_AS_STRING(text);
1766             n = PyString_GET_SIZE(text);
1767         } else if (PyUnicode_Check(text)) {
1768             if (f->f_encoding != Py_None)
1769                 encoding = PyString_AS_STRING(f->f_encoding);
1770             else
1771                 encoding = PyUnicode_GetDefaultEncoding();
1772             if (f->f_errors != Py_None)
1773                 errors = PyString_AS_STRING(f->f_errors);
1774             else
1775                 errors = "strict";
1776             encoded = PyUnicode_AsEncodedString(text, encoding, errors);
1777             if (encoded == NULL)
1778                 return NULL;
1779             s = PyString_AS_STRING(encoded);
1780             n = PyString_GET_SIZE(encoded);
1781         } else {
1782             if (PyObject_AsCharBuffer(text, &s, &n))
1783                 return NULL;
1784         }
1785     }
1786     f->f_softspace = 0;
1787     FILE_BEGIN_ALLOW_THREADS(f)
1788     errno = 0;
1789     n2 = fwrite(s, 1, n, f->f_fp);
1790     FILE_END_ALLOW_THREADS(f)
1791     Py_XDECREF(encoded);
1792     if (f->f_binary)
1793         PyBuffer_Release(&pbuf);
1794     if (n2 != n) {
1795         PyErr_SetFromErrno(PyExc_IOError);
1796         clearerr(f->f_fp);
1797         return NULL;
1798     }
1799     Py_INCREF(Py_None);
1800     return Py_None;
1801 }
1802
1803 static PyObject *
1804 file_writelines(PyFileObject *f, PyObject *seq)
1805 {
1806 #define CHUNKSIZE 1000
1807     PyObject *list, *line;
1808     PyObject *it;       /* iter(seq) */
1809     PyObject *result;
1810     int index, islist;
1811     Py_ssize_t i, j, nwritten, len;
1812
1813     assert(seq != NULL);
1814     if (f->f_fp == NULL)
1815         return err_closed();
1816     if (!f->writable)
1817         return err_mode("writing");
1818
1819     result = NULL;
1820     list = NULL;
1821     islist = PyList_Check(seq);
1822     if  (islist)
1823         it = NULL;
1824     else {
1825         it = PyObject_GetIter(seq);
1826         if (it == NULL) {
1827             PyErr_SetString(PyExc_TypeError,
1828                 "writelines() requires an iterable argument");
1829             return NULL;
1830         }
1831         /* From here on, fail by going to error, to reclaim "it". */
1832         list = PyList_New(CHUNKSIZE);
1833         if (list == NULL)
1834             goto error;
1835     }
1836
1837     /* Strategy: slurp CHUNKSIZE lines into a private list,
1838        checking that they are all strings, then write that list
1839        without holding the interpreter lock, then come back for more. */
1840     for (index = 0; ; index += CHUNKSIZE) {
1841         if (islist) {
1842             Py_XDECREF(list);
1843             list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
1844             if (list == NULL)
1845                 goto error;
1846             j = PyList_GET_SIZE(list);
1847         }
1848         else {
1849             for (j = 0; j < CHUNKSIZE; j++) {
1850                 line = PyIter_Next(it);
1851                 if (line == NULL) {
1852                     if (PyErr_Occurred())
1853                         goto error;
1854                     break;
1855                 }
1856                 PyList_SetItem(list, j, line);
1857             }
1858             /* The iterator might have closed the file on us. */
1859             if (f->f_fp == NULL) {
1860                 err_closed();
1861                 goto error;
1862             }
1863         }
1864         if (j == 0)
1865             break;
1866
1867         /* Check that all entries are indeed strings. If not,
1868            apply the same rules as for file.write() and
1869            convert the results to strings. This is slow, but
1870            seems to be the only way since all conversion APIs
1871            could potentially execute Python code. */
1872         for (i = 0; i < j; i++) {
1873             PyObject *v = PyList_GET_ITEM(list, i);
1874             if (!PyString_Check(v)) {
1875                 const char *buffer;
1876                 if (((f->f_binary &&
1877                       PyObject_AsReadBuffer(v,
1878                           (const void**)&buffer,
1879                                         &len)) ||
1880                      PyObject_AsCharBuffer(v,
1881                                            &buffer,
1882                                            &len))) {
1883                     PyErr_SetString(PyExc_TypeError,
1884             "writelines() argument must be a sequence of strings");
1885                             goto error;
1886                 }
1887                 line = PyString_FromStringAndSize(buffer,
1888                                                   len);
1889                 if (line == NULL)
1890                     goto error;
1891                 Py_DECREF(v);
1892                 PyList_SET_ITEM(list, i, line);
1893             }
1894         }
1895
1896         /* Since we are releasing the global lock, the
1897            following code may *not* execute Python code. */
1898         f->f_softspace = 0;
1899         FILE_BEGIN_ALLOW_THREADS(f)
1900         errno = 0;
1901         for (i = 0; i < j; i++) {
1902             line = PyList_GET_ITEM(list, i);
1903             len = PyString_GET_SIZE(line);
1904             nwritten = fwrite(PyString_AS_STRING(line),
1905                               1, len, f->f_fp);
1906             if (nwritten != len) {
1907                 FILE_ABORT_ALLOW_THREADS(f)
1908                 PyErr_SetFromErrno(PyExc_IOError);
1909                 clearerr(f->f_fp);
1910                 goto error;
1911             }
1912         }
1913         FILE_END_ALLOW_THREADS(f)
1914
1915         if (j < CHUNKSIZE)
1916             break;
1917     }
1918
1919     Py_INCREF(Py_None);
1920     result = Py_None;
1921   error:
1922     Py_XDECREF(list);
1923     Py_XDECREF(it);
1924     return result;
1925 #undef CHUNKSIZE
1926 }
1927
1928 static PyObject *
1929 file_self(PyFileObject *f)
1930 {
1931     if (f->f_fp == NULL)
1932         return err_closed();
1933     Py_INCREF(f);
1934     return (PyObject *)f;
1935 }
1936
1937 static PyObject *
1938 file_xreadlines(PyFileObject *f)
1939 {
1940     if (PyErr_WarnPy3k("f.xreadlines() not supported in 3.x, "
1941                        "try 'for line in f' instead", 1) < 0)
1942            return NULL;
1943     return file_self(f);
1944 }
1945
1946 static PyObject *
1947 file_exit(PyObject *f, PyObject *args)
1948 {
1949     PyObject *ret = PyObject_CallMethod(f, "close", NULL);
1950     if (!ret)
1951         /* If error occurred, pass through */
1952         return NULL;
1953     Py_DECREF(ret);
1954     /* We cannot return the result of close since a true
1955      * value will be interpreted as "yes, swallow the
1956      * exception if one was raised inside the with block". */
1957     Py_RETURN_NONE;
1958 }
1959
1960 PyDoc_STRVAR(readline_doc,
1961 "readline([size]) -> next line from the file, as a string.\n"
1962 "\n"
1963 "Retain newline.  A non-negative size argument limits the maximum\n"
1964 "number of bytes to return (an incomplete line may be returned then).\n"
1965 "Return an empty string at EOF.");
1966
1967 PyDoc_STRVAR(read_doc,
1968 "read([size]) -> read at most size bytes, returned as a string.\n"
1969 "\n"
1970 "If the size argument is negative or omitted, read until EOF is reached.\n"
1971 "Notice that when in non-blocking mode, less data than what was requested\n"
1972 "may be returned, even if no size parameter was given.");
1973
1974 PyDoc_STRVAR(write_doc,
1975 "write(str) -> None.  Write string str to file.\n"
1976 "\n"
1977 "Note that due to buffering, flush() or close() may be needed before\n"
1978 "the file on disk reflects the data written.");
1979
1980 PyDoc_STRVAR(fileno_doc,
1981 "fileno() -> integer \"file descriptor\".\n"
1982 "\n"
1983 "This is needed for lower-level file interfaces, such os.read().");
1984
1985 PyDoc_STRVAR(seek_doc,
1986 "seek(offset[, whence]) -> None.  Move to new file position.\n"
1987 "\n"
1988 "Argument offset is a byte count.  Optional argument whence defaults to\n"
1989 "0 (offset from start of file, offset should be >= 0); other values are 1\n"
1990 "(move relative to current position, positive or negative), and 2 (move\n"
1991 "relative to end of file, usually negative, although many platforms allow\n"
1992 "seeking beyond the end of a file).  If the file is opened in text mode,\n"
1993 "only offsets returned by tell() are legal.  Use of other offsets causes\n"
1994 "undefined behavior."
1995 "\n"
1996 "Note that not all file objects are seekable.");
1997
1998 #ifdef HAVE_FTRUNCATE
1999 PyDoc_STRVAR(truncate_doc,
2000 "truncate([size]) -> None.  Truncate the file to at most size bytes.\n"
2001 "\n"
2002 "Size defaults to the current file position, as returned by tell().");
2003 #endif
2004
2005 PyDoc_STRVAR(tell_doc,
2006 "tell() -> current file position, an integer (may be a long integer).");
2007
2008 PyDoc_STRVAR(readinto_doc,
2009 "readinto() -> Undocumented.  Don't use this; it may go away.");
2010
2011 PyDoc_STRVAR(readlines_doc,
2012 "readlines([size]) -> list of strings, each a line from the file.\n"
2013 "\n"
2014 "Call readline() repeatedly and return a list of the lines so read.\n"
2015 "The optional size argument, if given, is an approximate bound on the\n"
2016 "total number of bytes in the lines returned.");
2017
2018 PyDoc_STRVAR(xreadlines_doc,
2019 "xreadlines() -> returns self.\n"
2020 "\n"
2021 "For backward compatibility. File objects now include the performance\n"
2022 "optimizations previously implemented in the xreadlines module.");
2023
2024 PyDoc_STRVAR(writelines_doc,
2025 "writelines(sequence_of_strings) -> None.  Write the strings to the file.\n"
2026 "\n"
2027 "Note that newlines are not added.  The sequence can be any iterable object\n"
2028 "producing strings. This is equivalent to calling write() for each string.");
2029
2030 PyDoc_STRVAR(flush_doc,
2031 "flush() -> None.  Flush the internal I/O buffer.");
2032
2033 PyDoc_STRVAR(close_doc,
2034 "close() -> None or (perhaps) an integer.  Close the file.\n"
2035 "\n"
2036 "Sets data attribute .closed to True.  A closed file cannot be used for\n"
2037 "further I/O operations.  close() may be called more than once without\n"
2038 "error.  Some kinds of file objects (for example, opened by popen())\n"
2039 "may return an exit status upon closing.");
2040
2041 PyDoc_STRVAR(isatty_doc,
2042 "isatty() -> true or false.  True if the file is connected to a tty device.");
2043
2044 PyDoc_STRVAR(enter_doc,
2045              "__enter__() -> self.");
2046
2047 PyDoc_STRVAR(exit_doc,
2048              "__exit__(*excinfo) -> None.  Closes the file.");
2049
2050 static PyMethodDef file_methods[] = {
2051     {"readline",  (PyCFunction)file_readline, METH_VARARGS, readline_doc},
2052     {"read",      (PyCFunction)file_read,     METH_VARARGS, read_doc},
2053     {"write",     (PyCFunction)file_write,    METH_VARARGS, write_doc},
2054     {"fileno",    (PyCFunction)file_fileno,   METH_NOARGS,  fileno_doc},
2055     {"seek",      (PyCFunction)file_seek,     METH_VARARGS, seek_doc},
2056 #ifdef HAVE_FTRUNCATE
2057     {"truncate",  (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
2058 #endif
2059     {"tell",      (PyCFunction)file_tell,     METH_NOARGS,  tell_doc},
2060     {"readinto",  (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
2061     {"readlines", (PyCFunction)file_readlines, METH_VARARGS, readlines_doc},
2062     {"xreadlines",(PyCFunction)file_xreadlines, METH_NOARGS, xreadlines_doc},
2063     {"writelines",(PyCFunction)file_writelines, METH_O,     writelines_doc},
2064     {"flush",     (PyCFunction)file_flush,    METH_NOARGS,  flush_doc},
2065     {"close",     (PyCFunction)file_close,    METH_NOARGS,  close_doc},
2066     {"isatty",    (PyCFunction)file_isatty,   METH_NOARGS,  isatty_doc},
2067     {"__enter__", (PyCFunction)file_self,     METH_NOARGS,  enter_doc},
2068     {"__exit__",  (PyCFunction)file_exit,     METH_VARARGS, exit_doc},
2069     {NULL,            NULL}             /* sentinel */
2070 };
2071
2072 #define OFF(x) offsetof(PyFileObject, x)
2073
2074 static PyMemberDef file_memberlist[] = {
2075     {"mode",            T_OBJECT,       OFF(f_mode),    RO,
2076      "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
2077     {"name",            T_OBJECT,       OFF(f_name),    RO,
2078      "file name"},
2079     {"encoding",        T_OBJECT,       OFF(f_encoding),        RO,
2080      "file encoding"},
2081     {"errors",          T_OBJECT,       OFF(f_errors),  RO,
2082      "Unicode error handler"},
2083     /* getattr(f, "closed") is implemented without this table */
2084     {NULL}      /* Sentinel */
2085 };
2086
2087 static PyObject *
2088 get_closed(PyFileObject *f, void *closure)
2089 {
2090     return PyBool_FromLong((long)(f->f_fp == 0));
2091 }
2092 static PyObject *
2093 get_newlines(PyFileObject *f, void *closure)
2094 {
2095     switch (f->f_newlinetypes) {
2096     case NEWLINE_UNKNOWN:
2097         Py_INCREF(Py_None);
2098         return Py_None;
2099     case NEWLINE_CR:
2100         return PyString_FromString("\r");
2101     case NEWLINE_LF:
2102         return PyString_FromString("\n");
2103     case NEWLINE_CR|NEWLINE_LF:
2104         return Py_BuildValue("(ss)", "\r", "\n");
2105     case NEWLINE_CRLF:
2106         return PyString_FromString("\r\n");
2107     case NEWLINE_CR|NEWLINE_CRLF:
2108         return Py_BuildValue("(ss)", "\r", "\r\n");
2109     case NEWLINE_LF|NEWLINE_CRLF:
2110         return Py_BuildValue("(ss)", "\n", "\r\n");
2111     case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
2112         return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
2113     default:
2114         PyErr_Format(PyExc_SystemError,
2115                      "Unknown newlines value 0x%x\n",
2116                      f->f_newlinetypes);
2117         return NULL;
2118     }
2119 }
2120
2121 static PyObject *
2122 get_softspace(PyFileObject *f, void *closure)
2123 {
2124     if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2125         return NULL;
2126     return PyInt_FromLong(f->f_softspace);
2127 }
2128
2129 static int
2130 set_softspace(PyFileObject *f, PyObject *value)
2131 {
2132     int new;
2133     if (PyErr_WarnPy3k("file.softspace not supported in 3.x", 1) < 0)
2134         return -1;
2135
2136     if (value == NULL) {
2137         PyErr_SetString(PyExc_TypeError,
2138                         "can't delete softspace attribute");
2139         return -1;
2140     }
2141
2142     new = PyInt_AsLong(value);
2143     if (new == -1 && PyErr_Occurred())
2144         return -1;
2145     f->f_softspace = new;
2146     return 0;
2147 }
2148
2149 static PyGetSetDef file_getsetlist[] = {
2150     {"closed", (getter)get_closed, NULL, "True if the file is closed"},
2151     {"newlines", (getter)get_newlines, NULL,
2152      "end-of-line convention used in this file"},
2153     {"softspace", (getter)get_softspace, (setter)set_softspace,
2154      "flag indicating that a space needs to be printed; used by print"},
2155     {0},
2156 };
2157
2158 static void
2159 drop_readahead(PyFileObject *f)
2160 {
2161     if (f->f_buf != NULL) {
2162         PyMem_Free(f->f_buf);
2163         f->f_buf = NULL;
2164     }
2165 }
2166
2167 /* Make sure that file has a readahead buffer with at least one byte
2168    (unless at EOF) and no more than bufsize.  Returns negative value on
2169    error, will set MemoryError if bufsize bytes cannot be allocated. */
2170 static int
2171 readahead(PyFileObject *f, int bufsize)
2172 {
2173     Py_ssize_t chunksize;
2174
2175     if (f->f_buf != NULL) {
2176         if( (f->f_bufend - f->f_bufptr) >= 1)
2177             return 0;
2178         else
2179             drop_readahead(f);
2180     }
2181     if ((f->f_buf = (char *)PyMem_Malloc(bufsize)) == NULL) {
2182         PyErr_NoMemory();
2183         return -1;
2184     }
2185     FILE_BEGIN_ALLOW_THREADS(f)
2186     errno = 0;
2187     chunksize = Py_UniversalNewlineFread(
2188         f->f_buf, bufsize, f->f_fp, (PyObject *)f);
2189     FILE_END_ALLOW_THREADS(f)
2190     if (chunksize == 0) {
2191         if (ferror(f->f_fp)) {
2192             PyErr_SetFromErrno(PyExc_IOError);
2193             clearerr(f->f_fp);
2194             drop_readahead(f);
2195             return -1;
2196         }
2197     }
2198     f->f_bufptr = f->f_buf;
2199     f->f_bufend = f->f_buf + chunksize;
2200     return 0;
2201 }
2202
2203 /* Used by file_iternext.  The returned string will start with 'skip'
2204    uninitialized bytes followed by the remainder of the line. Don't be
2205    horrified by the recursive call: maximum recursion depth is limited by
2206    logarithmic buffer growth to about 50 even when reading a 1gb line. */
2207
2208 static PyStringObject *
2209 readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
2210 {
2211     PyStringObject* s;
2212     char *bufptr;
2213     char *buf;
2214     Py_ssize_t len;
2215
2216     if (f->f_buf == NULL)
2217         if (readahead(f, bufsize) < 0)
2218             return NULL;
2219
2220     len = f->f_bufend - f->f_bufptr;
2221     if (len == 0)
2222         return (PyStringObject *)
2223             PyString_FromStringAndSize(NULL, skip);
2224     bufptr = (char *)memchr(f->f_bufptr, '\n', len);
2225     if (bufptr != NULL) {
2226         bufptr++;                               /* Count the '\n' */
2227         len = bufptr - f->f_bufptr;
2228         s = (PyStringObject *)
2229             PyString_FromStringAndSize(NULL, skip+len);
2230         if (s == NULL)
2231             return NULL;
2232         memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
2233         f->f_bufptr = bufptr;
2234         if (bufptr == f->f_bufend)
2235             drop_readahead(f);
2236     } else {
2237         bufptr = f->f_bufptr;
2238         buf = f->f_buf;
2239         f->f_buf = NULL;                /* Force new readahead buffer */
2240         assert(skip+len < INT_MAX);
2241         s = readahead_get_line_skip(
2242             f, (int)(skip+len), bufsize + (bufsize>>2) );
2243         if (s == NULL) {
2244             PyMem_Free(buf);
2245             return NULL;
2246         }
2247         memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
2248         PyMem_Free(buf);
2249     }
2250     return s;
2251 }
2252
2253 /* A larger buffer size may actually decrease performance. */
2254 #define READAHEAD_BUFSIZE 8192
2255
2256 static PyObject *
2257 file_iternext(PyFileObject *f)
2258 {
2259     PyStringObject* l;
2260
2261     if (f->f_fp == NULL)
2262         return err_closed();
2263     if (!f->readable)
2264         return err_mode("reading");
2265
2266     l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
2267     if (l == NULL || PyString_GET_SIZE(l) == 0) {
2268         Py_XDECREF(l);
2269         return NULL;
2270     }
2271     return (PyObject *)l;
2272 }
2273
2274
2275 static PyObject *
2276 file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2277 {
2278     PyObject *self;
2279     static PyObject *not_yet_string;
2280
2281     assert(type != NULL && type->tp_alloc != NULL);
2282
2283     if (not_yet_string == NULL) {
2284         not_yet_string = PyString_InternFromString("<uninitialized file>");
2285         if (not_yet_string == NULL)
2286             return NULL;
2287     }
2288
2289     self = type->tp_alloc(type, 0);
2290     if (self != NULL) {
2291         /* Always fill in the name and mode, so that nobody else
2292            needs to special-case NULLs there. */
2293         Py_INCREF(not_yet_string);
2294         ((PyFileObject *)self)->f_name = not_yet_string;
2295         Py_INCREF(not_yet_string);
2296         ((PyFileObject *)self)->f_mode = not_yet_string;
2297         Py_INCREF(Py_None);
2298         ((PyFileObject *)self)->f_encoding = Py_None;
2299         Py_INCREF(Py_None);
2300         ((PyFileObject *)self)->f_errors = Py_None;
2301         ((PyFileObject *)self)->weakreflist = NULL;
2302         ((PyFileObject *)self)->unlocked_count = 0;
2303     }
2304     return self;
2305 }
2306
2307 static int
2308 file_init(PyObject *self, PyObject *args, PyObject *kwds)
2309 {
2310     PyFileObject *foself = (PyFileObject *)self;
2311     int ret = 0;
2312     static char *kwlist[] = {"name", "mode", "buffering", 0};
2313     char *name = NULL;
2314     char *mode = "r";
2315     int bufsize = -1;
2316     int wideargument = 0;
2317 #ifdef MS_WINDOWS
2318     PyObject *po;
2319 #endif
2320
2321     assert(PyFile_Check(self));
2322     if (foself->f_fp != NULL) {
2323         /* Have to close the existing file first. */
2324         PyObject *closeresult = file_close(foself);
2325         if (closeresult == NULL)
2326             return -1;
2327         Py_DECREF(closeresult);
2328     }
2329
2330 #ifdef MS_WINDOWS
2331     if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
2332                                     kwlist, &po, &mode, &bufsize)) {
2333         wideargument = 1;
2334         if (fill_file_fields(foself, NULL, po, mode,
2335                              fclose) == NULL)
2336             goto Error;
2337     } else {
2338         /* Drop the argument parsing error as narrow
2339            strings are also valid. */
2340         PyErr_Clear();
2341     }
2342 #endif
2343
2344     if (!wideargument) {
2345         PyObject *o_name;
2346
2347         if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
2348                                          Py_FileSystemDefaultEncoding,
2349                                          &name,
2350                                          &mode, &bufsize))
2351             return -1;
2352
2353         /* We parse again to get the name as a PyObject */
2354         if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:file",
2355                                          kwlist, &o_name, &mode,
2356                                          &bufsize))
2357             goto Error;
2358
2359         if (fill_file_fields(foself, NULL, o_name, mode,
2360                              fclose) == NULL)
2361             goto Error;
2362     }
2363     if (open_the_file(foself, name, mode) == NULL)
2364         goto Error;
2365     foself->f_setbuf = NULL;
2366     PyFile_SetBufSize(self, bufsize);
2367     goto Done;
2368
2369 Error:
2370     ret = -1;
2371     /* fall through */
2372 Done:
2373     PyMem_Free(name); /* free the encoded string */
2374     return ret;
2375 }
2376
2377 PyDoc_VAR(file_doc) =
2378 PyDoc_STR(
2379 "file(name[, mode[, buffering]]) -> file object\n"
2380 "\n"
2381 "Open a file.  The mode can be 'r', 'w' or 'a' for reading (default),\n"
2382 "writing or appending.  The file will be created if it doesn't exist\n"
2383 "when opened for writing or appending; it will be truncated when\n"
2384 "opened for writing.  Add a 'b' to the mode for binary files.\n"
2385 "Add a '+' to the mode to allow simultaneous reading and writing.\n"
2386 "If the buffering argument is given, 0 means unbuffered, 1 means line\n"
2387 "buffered, and larger numbers specify the buffer size.  The preferred way\n"
2388 "to open a file is with the builtin open() function.\n"
2389 )
2390 PyDoc_STR(
2391 "Add a 'U' to mode to open the file for input with universal newline\n"
2392 "support.  Any line ending in the input file will be seen as a '\\n'\n"
2393 "in Python.  Also, a file so opened gains the attribute 'newlines';\n"
2394 "the value for this attribute is one of None (no newline read yet),\n"
2395 "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
2396 "\n"
2397 "'U' cannot be combined with 'w' or '+' mode.\n"
2398 );
2399
2400 PyTypeObject PyFile_Type = {
2401     PyVarObject_HEAD_INIT(&PyType_Type, 0)
2402     "file",
2403     sizeof(PyFileObject),
2404     0,
2405     (destructor)file_dealloc,                   /* tp_dealloc */
2406     0,                                          /* tp_print */
2407     0,                                          /* tp_getattr */
2408     0,                                          /* tp_setattr */
2409     0,                                          /* tp_compare */
2410     (reprfunc)file_repr,                        /* tp_repr */
2411     0,                                          /* tp_as_number */
2412     0,                                          /* tp_as_sequence */
2413     0,                                          /* tp_as_mapping */
2414     0,                                          /* tp_hash */
2415     0,                                          /* tp_call */
2416     0,                                          /* tp_str */
2417     PyObject_GenericGetAttr,                    /* tp_getattro */
2418     /* softspace is writable:  we must supply tp_setattro */
2419     PyObject_GenericSetAttr,                    /* tp_setattro */
2420     0,                                          /* tp_as_buffer */
2421     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
2422     file_doc,                                   /* tp_doc */
2423     0,                                          /* tp_traverse */
2424     0,                                          /* tp_clear */
2425     0,                                          /* tp_richcompare */
2426     offsetof(PyFileObject, weakreflist),        /* tp_weaklistoffset */
2427     (getiterfunc)file_self,                     /* tp_iter */
2428     (iternextfunc)file_iternext,                /* tp_iternext */
2429     file_methods,                               /* tp_methods */
2430     file_memberlist,                            /* tp_members */
2431     file_getsetlist,                            /* tp_getset */
2432     0,                                          /* tp_base */
2433     0,                                          /* tp_dict */
2434     0,                                          /* tp_descr_get */
2435     0,                                          /* tp_descr_set */
2436     0,                                          /* tp_dictoffset */
2437     file_init,                                  /* tp_init */
2438     PyType_GenericAlloc,                        /* tp_alloc */
2439     file_new,                                   /* tp_new */
2440     PyObject_Del,                           /* tp_free */
2441 };
2442
2443 /* Interface for the 'soft space' between print items. */
2444
2445 int
2446 PyFile_SoftSpace(PyObject *f, int newflag)
2447 {
2448     long oldflag = 0;
2449     if (f == NULL) {
2450         /* Do nothing */
2451     }
2452     else if (PyFile_Check(f)) {
2453         oldflag = ((PyFileObject *)f)->f_softspace;
2454         ((PyFileObject *)f)->f_softspace = newflag;
2455     }
2456     else {
2457         PyObject *v;
2458         v = PyObject_GetAttrString(f, "softspace");
2459         if (v == NULL)
2460             PyErr_Clear();
2461         else {
2462             if (PyInt_Check(v))
2463                 oldflag = PyInt_AsLong(v);
2464             assert(oldflag < INT_MAX);
2465             Py_DECREF(v);
2466         }
2467         v = PyInt_FromLong((long)newflag);
2468         if (v == NULL)
2469             PyErr_Clear();
2470         else {
2471             if (PyObject_SetAttrString(f, "softspace", v) != 0)
2472                 PyErr_Clear();
2473             Py_DECREF(v);
2474         }
2475     }
2476     return (int)oldflag;
2477 }
2478
2479 /* Interfaces to write objects/strings to file-like objects */
2480
2481 int
2482 PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
2483 {
2484     PyObject *writer, *value, *args, *result;
2485     if (f == NULL) {
2486         PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
2487         return -1;
2488     }
2489     else if (PyFile_Check(f)) {
2490         PyFileObject *fobj = (PyFileObject *) f;
2491 #ifdef Py_USING_UNICODE
2492         PyObject *enc = fobj->f_encoding;
2493         int result;
2494 #endif
2495         if (fobj->f_fp == NULL) {
2496             err_closed();
2497             return -1;
2498         }
2499 #ifdef Py_USING_UNICODE
2500         if ((flags & Py_PRINT_RAW) &&
2501             PyUnicode_Check(v) && enc != Py_None) {
2502             char *cenc = PyString_AS_STRING(enc);
2503             char *errors = fobj->f_errors == Py_None ?
2504               "strict" : PyString_AS_STRING(fobj->f_errors);
2505             value = PyUnicode_AsEncodedString(v, cenc, errors);
2506             if (value == NULL)
2507                 return -1;
2508         } else {
2509             value = v;
2510             Py_INCREF(value);
2511         }
2512         result = file_PyObject_Print(value, fobj, flags);
2513         Py_DECREF(value);
2514         return result;
2515 #else
2516         return file_PyObject_Print(v, fobj, flags);
2517 #endif
2518     }
2519     writer = PyObject_GetAttrString(f, "write");
2520     if (writer == NULL)
2521         return -1;
2522     if (flags & Py_PRINT_RAW) {
2523         if (PyUnicode_Check(v)) {
2524             value = v;
2525             Py_INCREF(value);
2526         } else
2527             value = PyObject_Str(v);
2528     }
2529     else
2530         value = PyObject_Repr(v);
2531     if (value == NULL) {
2532         Py_DECREF(writer);
2533         return -1;
2534     }
2535     args = PyTuple_Pack(1, value);
2536     if (args == NULL) {
2537         Py_DECREF(value);
2538         Py_DECREF(writer);
2539         return -1;
2540     }
2541     result = PyEval_CallObject(writer, args);
2542     Py_DECREF(args);
2543     Py_DECREF(value);
2544     Py_DECREF(writer);
2545     if (result == NULL)
2546         return -1;
2547     Py_DECREF(result);
2548     return 0;
2549 }
2550
2551 int
2552 PyFile_WriteString(const char *s, PyObject *f)
2553 {
2554
2555     if (f == NULL) {
2556         /* Should be caused by a pre-existing error */
2557         if (!PyErr_Occurred())
2558             PyErr_SetString(PyExc_SystemError,
2559                             "null file for PyFile_WriteString");
2560         return -1;
2561     }
2562     else if (PyFile_Check(f)) {
2563         PyFileObject *fobj = (PyFileObject *) f;
2564         FILE *fp = PyFile_AsFile(f);
2565         if (fp == NULL) {
2566             err_closed();
2567             return -1;
2568         }
2569         FILE_BEGIN_ALLOW_THREADS(fobj)
2570         fputs(s, fp);
2571         FILE_END_ALLOW_THREADS(fobj)
2572         return 0;
2573     }
2574     else if (!PyErr_Occurred()) {
2575         PyObject *v = PyString_FromString(s);
2576         int err;
2577         if (v == NULL)
2578             return -1;
2579         err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
2580         Py_DECREF(v);
2581         return err;
2582     }
2583     else
2584         return -1;
2585 }
2586
2587 /* Try to get a file-descriptor from a Python object.  If the object
2588    is an integer or long integer, its value is returned.  If not, the
2589    object's fileno() method is called if it exists; the method must return
2590    an integer or long integer, which is returned as the file descriptor value.
2591    -1 is returned on failure.
2592 */
2593
2594 int PyObject_AsFileDescriptor(PyObject *o)
2595 {
2596     int fd;
2597     PyObject *meth;
2598
2599     if (PyInt_Check(o)) {
2600         fd = PyInt_AsLong(o);
2601     }
2602     else if (PyLong_Check(o)) {
2603         fd = PyLong_AsLong(o);
2604     }
2605     else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2606     {
2607         PyObject *fno = PyEval_CallObject(meth, NULL);
2608         Py_DECREF(meth);
2609         if (fno == NULL)
2610             return -1;
2611
2612         if (PyInt_Check(fno)) {
2613             fd = PyInt_AsLong(fno);
2614             Py_DECREF(fno);
2615         }
2616         else if (PyLong_Check(fno)) {
2617             fd = PyLong_AsLong(fno);
2618             Py_DECREF(fno);
2619         }
2620         else {
2621             PyErr_SetString(PyExc_TypeError,
2622                             "fileno() returned a non-integer");
2623             Py_DECREF(fno);
2624             return -1;
2625         }
2626     }
2627     else {
2628         PyErr_SetString(PyExc_TypeError,
2629                         "argument must be an int, or have a fileno() method.");
2630         return -1;
2631     }
2632
2633     if (fd < 0) {
2634         PyErr_Format(PyExc_ValueError,
2635                      "file descriptor cannot be a negative integer (%i)",
2636                      fd);
2637         return -1;
2638     }
2639     return fd;
2640 }
2641
2642 /* From here on we need access to the real fgets and fread */
2643 #undef fgets
2644 #undef fread
2645
2646 /*
2647 ** Py_UniversalNewlineFgets is an fgets variation that understands
2648 ** all of \r, \n and \r\n conventions.
2649 ** The stream should be opened in binary mode.
2650 ** If fobj is NULL the routine always does newline conversion, and
2651 ** it may peek one char ahead to gobble the second char in \r\n.
2652 ** If fobj is non-NULL it must be a PyFileObject. In this case there
2653 ** is no readahead but in stead a flag is used to skip a following
2654 ** \n on the next read. Also, if the file is open in binary mode
2655 ** the whole conversion is skipped. Finally, the routine keeps track of
2656 ** the different types of newlines seen.
2657 ** Note that we need no error handling: fgets() treats error and eof
2658 ** identically.
2659 */
2660 char *
2661 Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2662 {
2663     char *p = buf;
2664     int c;
2665     int newlinetypes = 0;
2666     int skipnextlf = 0;
2667     int univ_newline = 1;
2668
2669     if (fobj) {
2670         if (!PyFile_Check(fobj)) {
2671             errno = ENXIO;              /* What can you do... */
2672             return NULL;
2673         }
2674         univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2675         if ( !univ_newline )
2676             return fgets(buf, n, stream);
2677         newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2678         skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2679     }
2680     FLOCKFILE(stream);
2681     c = 'x'; /* Shut up gcc warning */
2682     while (--n > 0 && (c = GETC(stream)) != EOF ) {
2683         if (skipnextlf ) {
2684             skipnextlf = 0;
2685             if (c == '\n') {
2686                 /* Seeing a \n here with skipnextlf true
2687                 ** means we saw a \r before.
2688                 */
2689                 newlinetypes |= NEWLINE_CRLF;
2690                 c = GETC(stream);
2691                 if (c == EOF) break;
2692             } else {
2693                 /*
2694                 ** Note that c == EOF also brings us here,
2695                 ** so we're okay if the last char in the file
2696                 ** is a CR.
2697                 */
2698                 newlinetypes |= NEWLINE_CR;
2699             }
2700         }
2701         if (c == '\r') {
2702             /* A \r is translated into a \n, and we skip
2703             ** an adjacent \n, if any. We don't set the
2704             ** newlinetypes flag until we've seen the next char.
2705             */
2706             skipnextlf = 1;
2707             c = '\n';
2708         } else if ( c == '\n') {
2709             newlinetypes |= NEWLINE_LF;
2710         }
2711         *p++ = c;
2712         if (c == '\n') break;
2713     }
2714     if ( c == EOF && skipnextlf )
2715         newlinetypes |= NEWLINE_CR;
2716     FUNLOCKFILE(stream);
2717     *p = '\0';
2718     if (fobj) {
2719         ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2720         ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2721     } else if ( skipnextlf ) {
2722         /* If we have no file object we cannot save the
2723         ** skipnextlf flag. We have to readahead, which
2724         ** will cause a pause if we're reading from an
2725         ** interactive stream, but that is very unlikely
2726         ** unless we're doing something silly like
2727         ** execfile("/dev/tty").
2728         */
2729         c = GETC(stream);
2730         if ( c != '\n' )
2731             ungetc(c, stream);
2732     }
2733     if (p == buf)
2734         return NULL;
2735     return buf;
2736 }
2737
2738 /*
2739 ** Py_UniversalNewlineFread is an fread variation that understands
2740 ** all of \r, \n and \r\n conventions.
2741 ** The stream should be opened in binary mode.
2742 ** fobj must be a PyFileObject. In this case there
2743 ** is no readahead but in stead a flag is used to skip a following
2744 ** \n on the next read. Also, if the file is open in binary mode
2745 ** the whole conversion is skipped. Finally, the routine keeps track of
2746 ** the different types of newlines seen.
2747 */
2748 size_t
2749 Py_UniversalNewlineFread(char *buf, size_t n,
2750                          FILE *stream, PyObject *fobj)
2751 {
2752     char *dst = buf;
2753     PyFileObject *f = (PyFileObject *)fobj;
2754     int newlinetypes, skipnextlf;
2755
2756     assert(buf != NULL);
2757     assert(stream != NULL);
2758
2759     if (!fobj || !PyFile_Check(fobj)) {
2760         errno = ENXIO;          /* What can you do... */
2761         return 0;
2762     }
2763     if (!f->f_univ_newline)
2764         return fread(buf, 1, n, stream);
2765     newlinetypes = f->f_newlinetypes;
2766     skipnextlf = f->f_skipnextlf;
2767     /* Invariant:  n is the number of bytes remaining to be filled
2768      * in the buffer.
2769      */
2770     while (n) {
2771         size_t nread;
2772         int shortread;
2773         char *src = dst;
2774
2775         nread = fread(dst, 1, n, stream);
2776         assert(nread <= n);
2777         if (nread == 0)
2778             break;
2779
2780         n -= nread; /* assuming 1 byte out for each in; will adjust */
2781         shortread = n != 0;             /* true iff EOF or error */
2782         while (nread--) {
2783             char c = *src++;
2784             if (c == '\r') {
2785                 /* Save as LF and set flag to skip next LF. */
2786                 *dst++ = '\n';
2787                 skipnextlf = 1;
2788             }
2789             else if (skipnextlf && c == '\n') {
2790                 /* Skip LF, and remember we saw CR LF. */
2791                 skipnextlf = 0;
2792                 newlinetypes |= NEWLINE_CRLF;
2793                 ++n;
2794             }
2795             else {
2796                 /* Normal char to be stored in buffer.  Also
2797                  * update the newlinetypes flag if either this
2798                  * is an LF or the previous char was a CR.
2799                  */
2800                 if (c == '\n')
2801                     newlinetypes |= NEWLINE_LF;
2802                 else if (skipnextlf)
2803                     newlinetypes |= NEWLINE_CR;
2804                 *dst++ = c;
2805                 skipnextlf = 0;
2806             }
2807         }
2808         if (shortread) {
2809             /* If this is EOF, update type flags. */
2810             if (skipnextlf && feof(stream))
2811                 newlinetypes |= NEWLINE_CR;
2812             break;
2813         }
2814     }
2815     f->f_newlinetypes = newlinetypes;
2816     f->f_skipnextlf = skipnextlf;
2817     return dst - buf;
2818 }
2819
2820 #ifdef __cplusplus
2821 }
2822 #endif