2 var assert = require('assert'),
3 Stream = require('stream'),
4 inherits = require('util').inherits;
8 * This filter consumes a stream of characters and emits one string per line.
10 function LineSplitter() {
17 this.write = function(data) {
18 var lines = (buffer + data).split(/\r\n|\n\r|\n|\r/);
19 for (var i = 0; i < lines.length - 1; i++) {
20 self.emit('data', lines[i]);
22 buffer = lines[lines.length - 1];
26 this.end = function(data) {
27 this.write(data || '');
29 self.emit('data', buffer);
34 inherits(LineSplitter, Stream);
38 * This filter consumes lines and emits paragraph objects.
40 function ParagraphParser() {
42 block_is_license_block = false,
43 block_has_c_style_comment,
44 is_first_line_in_paragraph,
45 paragraph_line_indent,
53 this.write = function(data) {
58 this.end = function(data) {
66 function resetParagraph() {
67 is_first_line_in_paragraph = true;
68 paragraph_line_indent = -1;
72 in_license_block: block_is_license_block,
77 function resetBlock(is_license_block) {
78 block_is_license_block = is_license_block;
79 block_has_c_style_comment = false;
83 function flushParagraph() {
84 if (paragraph.lines.length || paragraph.li) {
85 self.emit('data', paragraph);
90 function parseLine(line) {
91 // Strip trailing whitespace
92 line = line.replace(/\s*$/, '');
94 // Detect block separator
95 if (/^\s*(=|"){3,}\s*$/.test(line)) {
97 resetBlock(!block_is_license_block);
101 // Strip comments around block
102 if (block_is_license_block) {
103 if (!block_has_c_style_comment)
104 block_has_c_style_comment = /^\s*(\/\*)/.test(line);
105 if (block_has_c_style_comment) {
107 line = line.replace(/^(\s*?)(?:\s?\*\/|\/\*\s|\s\*\s?)/, '$1');
109 line = line.replace(/^\s{2}/, '');
110 if (/\*\//.test(prev))
111 block_has_c_style_comment = false;
113 // Strip C++ and perl style comments.
114 line = line.replace(/^(\s*)(?:\/\/\s?|#\s?)/, '$1');
118 // Detect blank line (paragraph separator)
119 if (!/\S/.test(line)) {
124 // Detect separator "lines" within a block. These mark a paragraph break
125 // and are stripped from the output.
126 if (/^\s*[=*\-]{5,}\s*$/.test(line)) {
131 // Find out indentation level and the start of a lied or numbered list;
132 var result = /^(\s*)(\d+\.|\*|-)?\s*/.exec(line);
134 // The number of characters that will be stripped from the beginning of
136 var line_strip_length = result[0].length;
137 // The indentation size that will be used to detect indentation jumps.
139 var line_indent = Math.floor(result[0].length / 2) * 2;
140 // The indentation level that will be exported
141 var level = Math.floor(result[1].length / 2);
142 // The list indicator that precedes the actual content, if any.
143 var line_li = result[2];
145 // Flush the paragraph when there is a li or an indentation jump
146 if (line_li || (line_indent != paragraph_line_indent &&
147 paragraph_line_indent != -1)) {
149 paragraph.li = line_li;
152 // Set the paragraph indent that we use to detect indentation jumps. When
153 // we just detected a list indicator, wait
154 // for the next line to arrive before setting this.
155 if (!line_li && paragraph_line_indent != -1) {
156 paragraph_line_indent = line_indent;
159 // Set the output indent level if it has not been set yet.
160 if (paragraph.level === undefined)
161 paragraph.level = level;
163 // Strip leading whitespace and li.
164 line = line.slice(line_strip_length);
167 paragraph.lines.push(line);
169 is_first_line_in_paragraph = false;
172 inherits(ParagraphParser, Stream);
176 * This filter consumes paragraph objects and emits modified paragraph objects.
177 * The lines within the paragraph are unwrapped where appropriate. It also
178 * replaces multiple consecutive whitespace characters by a single one.
180 function Unwrapper() {
184 this.writable = true;
186 this.write = function(paragraph) {
187 var lines = paragraph.lines,
191 for (i = 0; i < lines.length - 1; i++) {
194 // When a line is really short, the line was probably kept separate for a
196 if (line.length < 50) {
197 // If the first word on the next line really didn't fit after the line,
198 // it probably was just ordinary wrapping after all.
199 var next_first_word_length = lines[i + 1].replace(/\s.*$/, '').length;
200 if (line.length + next_first_word_length < 60) {
201 break_after[i] = true;
206 for (i = 0; i < lines.length - 1; ) {
207 if (!break_after[i]) {
208 lines[i] += ' ' + lines.splice(i + 1, 1)[0];
214 for (i = 0; i < lines.length; i++) {
215 // Replace multiple whitespace characters by a single one, and strip
216 // trailing whitespace.
217 lines[i] = lines[i].replace(/\s+/g, ' ').replace(/\s+$/, '');
220 self.emit('data', paragraph);
223 this.end = function(data) {
229 inherits(Unwrapper, Stream);
233 * This filter generates an rtf document from a stream of paragraph objects.
235 function RtfGenerator() {
237 did_write_anything = false;
240 this.writable = true;
242 this.write = function(paragraph) {
243 if (!did_write_anything) {
245 did_write_anything = true;
248 var li = paragraph.li,
249 level = paragraph.level + (li ? 1 : 0),
250 lic = paragraph.in_license_block;
253 rtf += '\\sa150\\sl300\\slmult1';
255 rtf += '\\li' + (level * 240);
257 rtf += '\\tx' + (level) * 240;
265 rtf += ' ' + li + '\\tab';
267 rtf += paragraph.lines.map(rtfEscape).join('\\line ');
272 self.emit('data', rtf);
275 this.end = function(data) {
278 if (did_write_anything)
283 function toHex(number, length) {
284 var hex = (~~number).toString(16);
285 while (hex.length < length)
290 function rtfEscape(string) {
292 .replace(/[\\\{\}]/g, function(m) {
295 .replace(/\t/g, function() {
298 .replace(/[\x00-\x1f\x7f-\xff]/g, function(m) {
299 return '\\\'' + toHex(m.charCodeAt(0), 2);
301 .replace(/\ufeff/g, '')
302 .replace(/[\u0100-\uffff]/g, function(m) {
303 return '\\u' + toHex(m.charCodeAt(0), 4) + '?';
307 function emitHeader() {
308 self.emit('data', '{\\rtf1\\ansi\\ansicpg1252\\uc1\\deff0\\deflang1033' +
309 '{\\fonttbl{\\f0\\fswiss\\fcharset0 Tahoma;}}\\fs20\n' +
310 '{\\*\\generator txt2rtf 0.0.1;}\n');
313 function emitFooter() {
314 self.emit('data', '}');
317 inherits(RtfGenerator, Stream);
320 var stdin = process.stdin,
321 stdout = process.stdout,
322 line_splitter = new LineSplitter(),
323 paragraph_parser = new ParagraphParser(),
324 unwrapper = new Unwrapper(),
325 rtf_generator = new RtfGenerator();
327 stdin.setEncoding('utf-8');
330 stdin.pipe(line_splitter);
331 line_splitter.pipe(paragraph_parser);
332 paragraph_parser.pipe(unwrapper);
333 unwrapper.pipe(rtf_generator);
334 rtf_generator.pipe(stdout);