lib/_stream_transform.js

   1 // a transform stream is a readable/writable stream where you do
   2 // something with the data.  Sometimes it's called a "filter",
   3 // but that's not a great name for it, since that implies a thing where
   4 // some bits pass through, and others are simply ignored.  (That would
   5 // be a valid example of a transform, of course.)
   6 //
   7 // While the output is causally related to the input, it's not a
   8 // necessarily symmetric or synchronous transformation.  For example,
   9 // a zlib stream might take multiple plain-text writes(), and then
  10 // emit a single compressed chunk some time in the future.
  11 //
  12 // Here's how this works:
  13 //
  14 // The Transform stream has all the aspects of the readable and writable
  15 // stream classes.  When you write(chunk), that calls _write(chunk,cb)
  16 // internally, and returns false if there's a lot of pending writes
  17 // buffered up.  When you call read(), that calls _read(n) until
  18 // there's enough pending readable data buffered up.
  19 //
  20 // In a transform stream, the written data is placed in a buffer.  When
  21 // _read(n) is called, it transforms the queued up data, calling the
  22 // buffered _write cb's as it consumes chunks.  If consuming a single
  23 // written chunk would result in multiple output chunks, then the first
  24 // outputted bit calls the readcb, and subsequent chunks just go into
  25 // the read buffer, and will cause it to emit 'readable' if necessary.
  26 //
  27 // This way, back-pressure is actually determined by the reading side,
  28 // since _read has to be called to start processing a new chunk.  However,
  29 // a pathological inflate type of transform can cause excessive buffering
  30 // here.  For example, imagine a stream where every byte of input is
  31 // interpreted as an integer from 0-255, and then results in that many
  32 // bytes of output.  Writing the 4 bytes {ff,ff,ff,ff} would result in
  33 // 1kb of data being output.  In this case, you could write a very small
  34 // amount of input, and end up with a very large amount of output.  In
  35 // such a pathological inflating mechanism, there'd be no way to tell
  36 // the system to stop doing the transform.  A single 4MB write could
  37 // cause the system to run out of memory.
  38 //
  39 // However, even in such a pathological case, only a single written chunk
  40 // would be consumed, and then the rest would wait (un-transformed) until
  41 // the results of the previous transformed chunk were consumed.
  42
  43 'use strict';
  44
  45 module.exports = Transform;
  46
  47 const Duplex = require('_stream_duplex');
  48 const util = require('util');
  49 util.inherits(Transform, Duplex);
  50
  51
  52 function TransformState(stream) {
  53   this.afterTransform = function(er, data) {
  54     return afterTransform(stream, er, data);
  55   };
  56
  57   this.needTransform = false;
  58   this.transforming = false;
  59   this.writecb = null;
  60   this.writechunk = null;
  61 }
  62
  63 function afterTransform(stream, er, data) {
  64   var ts = stream._transformState;
  65   ts.transforming = false;
  66
  67   var cb = ts.writecb;
  68
  69   if (!cb)
  70     return stream.emit('error', new Error('no writecb in Transform class'));
  71
  72   ts.writechunk = null;
  73   ts.writecb = null;
  74
  75   if (data !== null && data !== undefined)
  76     stream.push(data);
  77
  78   if (cb)
  79     cb(er);
  80
  81   var rs = stream._readableState;
  82   rs.reading = false;
  83   if (rs.needReadable || rs.length < rs.highWaterMark) {
  84     stream._read(rs.highWaterMark);
  85   }
  86 }
  87
  88
  89 function Transform(options) {
  90   if (!(this instanceof Transform))
  91     return new Transform(options);
  92
  93   Duplex.call(this, options);
  94
  95   this._transformState = new TransformState(this);
  96
  97   // when the writable side finishes, then flush out anything remaining.
  98   var stream = this;
  99
 100   // start out asking for a readable event once data is transformed.
 101   this._readableState.needReadable = true;
 102
 103   // we have implemented the _read method, and done the other things
 104   // that Readable wants before the first _read call, so unset the
 105   // sync guard flag.
 106   this._readableState.sync = false;
 107
 108   if (options) {
 109     if (typeof options.transform === 'function')
 110       this._transform = options.transform;
 111
 112     if (typeof options.flush === 'function')
 113       this._flush = options.flush;
 114   }
 115
 116   this.once('prefinish', function() {
 117     if (typeof this._flush === 'function')
 118       this._flush(function(er) {
 119         done(stream, er);
 120       });
 121     else
 122       done(stream);
 123   });
 124 }
 125
 126 Transform.prototype.push = function(chunk, encoding) {
 127   this._transformState.needTransform = false;
 128   return Duplex.prototype.push.call(this, chunk, encoding);
 129 };
 130
 131 // This is the part where you do stuff!
 132 // override this function in implementation classes.
 133 // 'chunk' is an input chunk.
 134 //
 135 // Call `push(newChunk)` to pass along transformed output
 136 // to the readable side.  You may call 'push' zero or more times.
 137 //
 138 // Call `cb(err)` when you are done with this chunk.  If you pass
 139 // an error, then that'll put the hurt on the whole operation.  If you
 140 // never call cb(), then you'll never get another chunk.
 141 Transform.prototype._transform = function(chunk, encoding, cb) {
 142   throw new Error('not implemented');
 143 };
 144
 145 Transform.prototype._write = function(chunk, encoding, cb) {
 146   var ts = this._transformState;
 147   ts.writecb = cb;
 148   ts.writechunk = chunk;
 149   ts.writeencoding = encoding;
 150   if (!ts.transforming) {
 151     var rs = this._readableState;
 152     if (ts.needTransform ||
 153         rs.needReadable ||
 154         rs.length < rs.highWaterMark)
 155       this._read(rs.highWaterMark);
 156   }
 157 };
 158
 159 // Doesn't matter what the args are here.
 160 // _transform does all the work.
 161 // That we got here means that the readable side wants more data.
 162 Transform.prototype._read = function(n) {
 163   var ts = this._transformState;
 164
 165   if (ts.writechunk !== null && ts.writecb && !ts.transforming) {
 166     ts.transforming = true;
 167     this._transform(ts.writechunk, ts.writeencoding, ts.afterTransform);
 168   } else {
 169     // mark that we need a transform, so that any data that comes in
 170     // will get processed, now that we've asked for it.
 171     ts.needTransform = true;
 172   }
 173 };
 174
 175
 176 function done(stream, er) {
 177   if (er)
 178     return stream.emit('error', er);
 179
 180   // if there's nothing in the write buffer, then that means
 181   // that nothing more will ever be provided
 182   var ws = stream._writableState;
 183   var ts = stream._transformState;
 184
 185   if (ws.length)
 186     throw new Error('calling transform done when ws.length != 0');
 187
 188   if (ts.transforming)
 189     throw new Error('calling transform done when still transforming');
 190
 191   return stream.push(null);
 192 }