Upstream version 9.38.198.0
[platform/framework/web/crosswalk.git] / src / third_party / icu / source / tools / toolutil / pkg_genc.c
1 /******************************************************************************
2  *   Copyright (C) 2009-2013, International Business Machines
3  *   Corporation and others.  All Rights Reserved.
4  *******************************************************************************
5  */
6 #include "unicode/utypes.h"
7
8 #if U_PLATFORM_HAS_WIN32_API
9 #   define VC_EXTRALEAN
10 #   define WIN32_LEAN_AND_MEAN
11 #   define NOUSER
12 #   define NOSERVICE
13 #   define NOIME
14 #   define NOMCX
15 #include <windows.h>
16 #include <time.h>
17 #   ifdef __GNUC__
18 #       define WINDOWS_WITH_GNUC
19 #   endif
20 #endif
21
22 #if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H
23 #   define U_ELF
24 #endif
25
26 #ifdef U_ELF
27 #   include <elf.h>
28 #   if defined(ELFCLASS64)
29 #       define U_ELF64
30 #   endif
31     /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
32 #   ifndef EM_X86_64
33 #       define EM_X86_64 62
34 #   endif
35 #   define ICU_ENTRY_OFFSET 0
36 #endif
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include "unicode/putil.h"
41 #include "cmemory.h"
42 #include "cstring.h"
43 #include "filestrm.h"
44 #include "toolutil.h"
45 #include "unicode/uclean.h"
46 #include "uoptions.h"
47 #include "pkg_genc.h"
48
49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
50
51 #define HEX_0X 0 /*  0x1234 */
52 #define HEX_0H 1 /*  01234h */
53
54 /* prototypes --------------------------------------------------------------- */
55 static void
56 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
57
58 static uint32_t
59 write8(FileStream *out, uint8_t byte, uint32_t column);
60
61 static uint32_t
62 write32(FileStream *out, uint32_t byte, uint32_t column);
63
64 #if U_PLATFORM == U_PF_OS400
65 static uint32_t
66 write8str(FileStream *out, uint8_t byte, uint32_t column);
67 #endif
68 /* -------------------------------------------------------------------------- */
69
70 /*
71 Creating Template Files for New Platforms
72
73 Let the cc compiler help you get started.
74 Compile this program
75     const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
76 with the -S option to produce assembly output.
77
78 For example, this will generate array.s:
79 gcc -S array.c
80
81 This will produce a .s file that may look like this:
82
83     .file   "array.c"
84     .version        "01.01"
85 gcc2_compiled.:
86     .globl x
87     .section        .rodata
88     .align 4
89     .type    x,@object
90     .size    x,20
91 x:
92     .long   1
93     .long   2
94     .long   -559038737
95     .long   -1
96     .long   16
97     .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
98
99 which gives a starting point that will compile, and can be transformed
100 to become the template, generally with some consulting of as docs and
101 some experimentation.
102
103 If you want ICU to automatically use this assembly, you should
104 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
105 where the name is the compiler or platform that you used in this
106 assemblyHeader data structure.
107 */
108 static const struct AssemblyType {
109     const char *name;
110     const char *header;
111     const char *beginLine;
112     const char *footer;
113     int8_t      hexType; /* HEX_0X or HEX_0h */
114 } assemblyHeader[] = {
115     // For gcc assemblers, the meaning of .align changes depending on the
116     // hardware, so we use .balign 16 which always means 16 bytes.
117     // https://sourceware.org/binutils/docs/as/Pseudo-Ops.html
118     {"gcc",
119         ".globl %s\n"
120         "\t.section .note.GNU-stack,\"\",%%progbits\n"
121         "\t.section .rodata\n"
122         "\t.balign 16\n"
123         /* The 3 lines below are added for Chrome. */
124         "#ifdef U_HIDE_DATA_SYMBOL\n"
125         "\t.hidden %s\n"
126         "#endif\n"
127         "\t.type %s,%%object\n"
128         "%s:\n\n",
129
130         ".long ","",HEX_0X
131     },
132     {"gcc-darwin",
133         /*"\t.section __TEXT,__text,regular,pure_instructions\n"
134         "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
135         ".globl _%s\n"
136         /* The 3 lines below are added for Chrome. */
137         "#ifdef U_HIDE_DATA_SYMBOL\n"
138         "\t.private_extern _%s\n"
139         "#endif\n"
140         "\t.data\n"
141         "\t.const\n"
142         "\t.balign 16\n"
143         "_%s:\n\n",
144
145         ".long ","",HEX_0X
146     },
147     {"gcc-cygwin",
148         ".globl _%s\n"
149         "\t.section .rodata\n"
150         "\t.balign 16\n"
151         "_%s:\n\n",
152
153         ".long ","",HEX_0X
154     },
155     {"gcc-mingw64",
156         ".globl %s\n"
157         "\t.section .rodata\n"
158         "\t.balign 16\n"
159         "%s:\n\n",
160
161         ".long ","",HEX_0X
162     },
163 // 16 bytes alignment.
164 // http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf
165     {"sun",
166         "\t.section \".rodata\"\n"
167         "\t.align   16\n"
168         ".globl     %s\n"
169         "%s:\n",
170
171         ".word ","",HEX_0X
172     },
173 // 16 bytes alignment for sun-x86.
174 // http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html
175     {"sun-x86",
176         "Drodata.rodata:\n"
177         "\t.type   Drodata.rodata,@object\n"
178         "\t.size   Drodata.rodata,0\n"
179         "\t.globl  %s\n"
180         "\t.align  16\n" 
181         "%s:\n",
182
183         ".4byte ","",HEX_0X
184     },
185 // 1<<4 bit alignment for aix.
186 // http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm
187     {"xlc",
188         ".globl %s{RO}\n"
189         "\t.toc\n"
190         "%s:\n"
191         "\t.csect %s{RO}, 4\n",
192
193         ".long ","",HEX_0X
194     },
195     {"aCC-ia64",
196         "\t.file   \"%s.s\"\n"
197         "\t.type   %s,@object\n"
198         "\t.global %s\n"
199         "\t.secalias .abe$0.rodata, \".rodata\"\n"
200         "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
201         "\t.align  16\n"
202         "%s::\t",
203
204         "data4 ","",HEX_0X
205     },
206     {"aCC-parisc",
207         "\t.SPACE  $TEXT$\n"
208         "\t.SUBSPA $LIT$\n"
209         "%s\n"
210         "\t.EXPORT %s\n"
211         "\t.ALIGN  16\n",
212
213         ".WORD ","",HEX_0X
214     },
215 // align 16 bytes
216 //  http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx
217     { "masm",
218       "\tTITLE %s\n"
219       "; generated by genccode\n"
220       ".386\n"
221       ".model flat\n"
222       "\tPUBLIC _%s\n"
223       "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
224       "\tALIGN 16\n"
225       "_%s\tLABEL DWORD\n",
226       "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
227     }
228 };
229
230 static int32_t assemblyHeaderIndex = -1;
231 static int32_t hexType = HEX_0X;
232
233 U_CAPI UBool U_EXPORT2
234 checkAssemblyHeaderName(const char* optAssembly) {
235     int32_t idx;
236     assemblyHeaderIndex = -1;
237     for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
238         if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
239             assemblyHeaderIndex = idx;
240             hexType = assemblyHeader[idx].hexType; /* set the hex type */
241             return TRUE;
242         }
243     }
244
245     return FALSE;
246 }
247
248
249 U_CAPI void U_EXPORT2
250 printAssemblyHeadersToStdErr(void) {
251     int32_t idx;
252     fprintf(stderr, "%s", assemblyHeader[0].name);
253     for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
254         fprintf(stderr, ", %s", assemblyHeader[idx].name);
255     }
256     fprintf(stderr,
257         ")\n");
258 }
259
260 U_CAPI void U_EXPORT2
261 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
262     uint32_t column = MAX_COLUMN;
263     char entry[64];
264     uint32_t buffer[1024];
265     char *bufferStr = (char *)buffer;
266     FileStream *in, *out;
267     size_t i, length;
268
269     in=T_FileStream_open(filename, "rb");
270     if(in==NULL) {
271         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
272         exit(U_FILE_ACCESS_ERROR);
273     }
274
275     getOutFilename(filename, destdir, bufferStr, entry, ".S", optFilename);
276     out=T_FileStream_open(bufferStr, "w");
277     if(out==NULL) {
278         fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
279         exit(U_FILE_ACCESS_ERROR);
280     }
281
282     if (outFilePath != NULL) {
283         uprv_strcpy(outFilePath, bufferStr);
284     }
285
286 #ifdef WINDOWS_WITH_GNUC
287     /* Need to fix the file seperator character when using MinGW. */
288     swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
289 #endif
290
291     if(optEntryPoint != NULL) {
292         uprv_strcpy(entry, optEntryPoint);
293         uprv_strcat(entry, "_dat");
294     }
295
296     /* turn dashes or dots in the entry name into underscores */
297     length=uprv_strlen(entry);
298     for(i=0; i<length; ++i) {
299         if(entry[i]=='-' || entry[i]=='.') {
300             entry[i]='_';
301         }
302     }
303
304     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
305         entry, entry, entry, entry,
306         entry, entry, entry, entry);
307     T_FileStream_writeLine(out, bufferStr);
308     T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
309
310     for(;;) {
311         length=T_FileStream_read(in, buffer, sizeof(buffer));
312         if(length==0) {
313             break;
314         }
315         if (length != sizeof(buffer)) {
316             /* pad with extra 0's when at the end of the file */
317             for(i=0; i < (length % sizeof(uint32_t)); ++i) {
318                 buffer[length+i] = 0;
319             }
320         }
321         for(i=0; i<(length/sizeof(buffer[0])); i++) {
322             column = write32(out, buffer[i], column);
323         }
324     }
325
326     T_FileStream_writeLine(out, "\n");
327
328     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
329         entry, entry, entry, entry,
330         entry, entry, entry, entry);
331     T_FileStream_writeLine(out, bufferStr);
332
333     if(T_FileStream_error(in)) {
334         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
335         exit(U_FILE_ACCESS_ERROR);
336     }
337
338     if(T_FileStream_error(out)) {
339         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
340         exit(U_FILE_ACCESS_ERROR);
341     }
342
343     T_FileStream_close(out);
344     T_FileStream_close(in);
345 }
346
347 U_CAPI void U_EXPORT2
348 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
349     uint32_t column = MAX_COLUMN;
350     char buffer[4096], entry[64];
351     FileStream *in, *out;
352     size_t i, length;
353
354     in=T_FileStream_open(filename, "rb");
355     if(in==NULL) {
356         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
357         exit(U_FILE_ACCESS_ERROR);
358     }
359
360     if(optName != NULL) { /* prepend  'icudt28_' */
361       strcpy(entry, optName);
362       strcat(entry, "_");
363     } else {
364       entry[0] = 0;
365     }
366
367     getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
368     if (outFilePath != NULL) {
369         uprv_strcpy(outFilePath, buffer);
370     }
371     out=T_FileStream_open(buffer, "w");
372     if(out==NULL) {
373         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
374         exit(U_FILE_ACCESS_ERROR);
375     }
376
377     /* turn dashes or dots in the entry name into underscores */
378     length=uprv_strlen(entry);
379     for(i=0; i<length; ++i) {
380         if(entry[i]=='-' || entry[i]=='.') {
381             entry[i]='_';
382         }
383     }
384
385 #if U_PLATFORM == U_PF_OS400
386     /*
387     TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
388
389     This is here because this platform can't currently put
390     const data into the read-only pages of an object or
391     shared library (service program). Only strings are allowed in read-only
392     pages, so we use char * strings to store the data.
393
394     In order to prevent the beginning of the data from ever matching the
395     magic numbers we must still use the initial double.
396     [grhoten 4/24/2003]
397     */
398     sprintf(buffer,
399         "#ifndef IN_GENERATED_CCODE\n"
400         "#define IN_GENERATED_CCODE\n"
401         "#define U_DISABLE_RENAMING 1\n"
402         "#include \"unicode/umachine.h\"\n"
403         "#endif\n"
404         "U_CDECL_BEGIN\n"
405         "const struct {\n"
406         "    double bogus;\n"
407         "    const char *bytes; \n"
408         "} %s={ 0.0, \n",
409         entry);
410     T_FileStream_writeLine(out, buffer);
411
412     for(;;) {
413         length=T_FileStream_read(in, buffer, sizeof(buffer));
414         if(length==0) {
415             break;
416         }
417         for(i=0; i<length; ++i) {
418             column = write8str(out, (uint8_t)buffer[i], column);
419         }
420     }
421
422     T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
423 #else
424     /* Function renaming shouldn't be done in data */
425     sprintf(buffer,
426         "#ifndef IN_GENERATED_CCODE\n"
427         "#define IN_GENERATED_CCODE\n"
428         "#define U_DISABLE_RENAMING 1\n"
429         "#include \"unicode/umachine.h\"\n"
430         "#endif\n"
431         "U_CDECL_BEGIN\n"
432         "const struct {\n"
433         "    double bogus;\n"
434         "    uint8_t bytes[%ld]; \n"
435         "} %s={ 0.0, {\n",
436         (long)T_FileStream_size(in), entry);
437     T_FileStream_writeLine(out, buffer);
438
439     for(;;) {
440         length=T_FileStream_read(in, buffer, sizeof(buffer));
441         if(length==0) {
442             break;
443         }
444         for(i=0; i<length; ++i) {
445             column = write8(out, (uint8_t)buffer[i], column);
446         }
447     }
448
449     T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
450 #endif
451
452     if(T_FileStream_error(in)) {
453         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
454         exit(U_FILE_ACCESS_ERROR);
455     }
456
457     if(T_FileStream_error(out)) {
458         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
459         exit(U_FILE_ACCESS_ERROR);
460     }
461
462     T_FileStream_close(out);
463     T_FileStream_close(in);
464 }
465
466 static uint32_t
467 write32(FileStream *out, uint32_t bitField, uint32_t column) {
468     int32_t i;
469     char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
470     char *s = bitFieldStr;
471     uint8_t *ptrIdx = (uint8_t *)&bitField;
472     static const char hexToStr[16] = {
473         '0','1','2','3',
474         '4','5','6','7',
475         '8','9','A','B',
476         'C','D','E','F'
477     };
478
479     /* write the value, possibly with comma and newline */
480     if(column==MAX_COLUMN) {
481         /* first byte */
482         column=1;
483     } else if(column<32) {
484         *(s++)=',';
485         ++column;
486     } else {
487         *(s++)='\n';
488         uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
489         s+=uprv_strlen(s);
490         column=1;
491     }
492
493     if (bitField < 10) {
494         /* It's a small number. Don't waste the space for 0x */
495         *(s++)=hexToStr[bitField];
496     }
497     else {
498         int seenNonZero = 0; /* This is used to remove leading zeros */
499
500         if(hexType==HEX_0X) {
501          *(s++)='0';
502          *(s++)='x';
503         } else if(hexType==HEX_0H) {
504          *(s++)='0';
505         }
506
507         /* This creates a 32-bit field */
508 #if U_IS_BIG_ENDIAN
509         for (i = 0; i < sizeof(uint32_t); i++)
510 #else
511         for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
512 #endif
513         {
514             uint8_t value = ptrIdx[i];
515             if (value || seenNonZero) {
516                 *(s++)=hexToStr[value>>4];
517                 *(s++)=hexToStr[value&0xF];
518                 seenNonZero = 1;
519             }
520         }
521         if(hexType==HEX_0H) {
522          *(s++)='h';
523         }
524     }
525
526     *(s++)=0;
527     T_FileStream_writeLine(out, bitFieldStr);
528     return column;
529 }
530
531 static uint32_t
532 write8(FileStream *out, uint8_t byte, uint32_t column) {
533     char s[4];
534     int i=0;
535
536     /* convert the byte value to a string */
537     if(byte>=100) {
538         s[i++]=(char)('0'+byte/100);
539         byte%=100;
540     }
541     if(i>0 || byte>=10) {
542         s[i++]=(char)('0'+byte/10);
543         byte%=10;
544     }
545     s[i++]=(char)('0'+byte);
546     s[i]=0;
547
548     /* write the value, possibly with comma and newline */
549     if(column==MAX_COLUMN) {
550         /* first byte */
551         column=1;
552     } else if(column<16) {
553         T_FileStream_writeLine(out, ",");
554         ++column;
555     } else {
556         T_FileStream_writeLine(out, ",\n");
557         column=1;
558     }
559     T_FileStream_writeLine(out, s);
560     return column;
561 }
562
563 #if U_PLATFORM == U_PF_OS400
564 static uint32_t
565 write8str(FileStream *out, uint8_t byte, uint32_t column) {
566     char s[8];
567
568     if (byte > 7)
569         sprintf(s, "\\x%X", byte);
570     else
571         sprintf(s, "\\%X", byte);
572
573     /* write the value, possibly with comma and newline */
574     if(column==MAX_COLUMN) {
575         /* first byte */
576         column=1;
577         T_FileStream_writeLine(out, "\"");
578     } else if(column<24) {
579         ++column;
580     } else {
581         T_FileStream_writeLine(out, "\"\n\"");
582         column=1;
583     }
584     T_FileStream_writeLine(out, s);
585     return column;
586 }
587 #endif
588
589 static void
590 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
591     const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
592
593     /* copy path */
594     if(destdir!=NULL && *destdir!=0) {
595         do {
596             *outFilename++=*destdir++;
597         } while(*destdir!=0);
598         if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
599             *outFilename++=U_FILE_SEP_CHAR;
600         }
601         inFilename=basename;
602     } else {
603         while(inFilename<basename) {
604             *outFilename++=*inFilename++;
605         }
606     }
607
608     if(suffix==NULL) {
609         /* the filename does not have a suffix */
610         uprv_strcpy(entryName, inFilename);
611         if(optFilename != NULL) {
612           uprv_strcpy(outFilename, optFilename);
613         } else {
614           uprv_strcpy(outFilename, inFilename);
615         }
616         uprv_strcat(outFilename, newSuffix);
617     } else {
618         char *saveOutFilename = outFilename;
619         /* copy basename */
620         while(inFilename<suffix) {
621             if(*inFilename=='-') {
622                 /* iSeries cannot have '-' in the .o objects. */
623                 *outFilename++=*entryName++='_';
624                 inFilename++;
625             }
626             else {
627                 *outFilename++=*entryName++=*inFilename++;
628             }
629         }
630
631         /* replace '.' by '_' */
632         *outFilename++=*entryName++='_';
633         ++inFilename;
634
635         /* copy suffix */
636         while(*inFilename!=0) {
637             *outFilename++=*entryName++=*inFilename++;
638         }
639
640         *entryName=0;
641
642         if(optFilename != NULL) {
643             uprv_strcpy(saveOutFilename, optFilename);
644             uprv_strcat(saveOutFilename, newSuffix);
645         } else {
646             /* add ".c" */
647             uprv_strcpy(outFilename, newSuffix);
648         }
649     }
650 }
651
652 #ifdef CAN_GENERATE_OBJECTS
653 static void
654 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
655     union {
656         char        bytes[2048];
657 #ifdef U_ELF
658         Elf32_Ehdr  header32;
659         /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
660 #elif U_PLATFORM_HAS_WIN32_API
661         IMAGE_FILE_HEADER header;
662 #endif
663     } buffer;
664
665     const char *filename;
666     FileStream *in;
667     int32_t length;
668
669 #ifdef U_ELF
670
671 #elif U_PLATFORM_HAS_WIN32_API
672     const IMAGE_FILE_HEADER *pHeader;
673 #else
674 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
675 #endif
676
677     if(optMatchArch != NULL) {
678         filename=optMatchArch;
679     } else {
680         /* set defaults */
681 #ifdef U_ELF
682         /* set EM_386 because elf.h does not provide better defaults */
683         *pCPU=EM_386;
684         *pBits=32;
685         *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
686 #elif U_PLATFORM_HAS_WIN32_API
687 /* _M_IA64 should be defined in windows.h */
688 #   if defined(_M_IA64)
689         *pCPU=IMAGE_FILE_MACHINE_IA64;
690 #   elif defined(_M_AMD64)
691         *pCPU=IMAGE_FILE_MACHINE_AMD64;
692 #   else
693         *pCPU=IMAGE_FILE_MACHINE_I386;
694 #   endif
695         *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
696         *pIsBigEndian=FALSE;
697 #else
698 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
699 #endif
700         return;
701     }
702
703     in=T_FileStream_open(filename, "rb");
704     if(in==NULL) {
705         fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
706         exit(U_FILE_ACCESS_ERROR);
707     }
708     length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
709
710 #ifdef U_ELF
711     if(length<sizeof(Elf32_Ehdr)) {
712         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
713         exit(U_UNSUPPORTED_ERROR);
714     }
715     if(
716         buffer.header32.e_ident[0]!=ELFMAG0 ||
717         buffer.header32.e_ident[1]!=ELFMAG1 ||
718         buffer.header32.e_ident[2]!=ELFMAG2 ||
719         buffer.header32.e_ident[3]!=ELFMAG3 ||
720         buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
721     ) {
722         fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
723         exit(U_UNSUPPORTED_ERROR);
724     }
725
726     *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
727 #ifdef U_ELF64
728     if(*pBits!=32 && *pBits!=64) {
729         fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
730         exit(U_UNSUPPORTED_ERROR);
731     }
732 #else
733     if(*pBits!=32) {
734         fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
735         exit(U_UNSUPPORTED_ERROR);
736     }
737 #endif
738
739     *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
740     if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
741         fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
742         exit(U_UNSUPPORTED_ERROR);
743     }
744     /* TODO: Support byte swapping */
745
746     *pCPU=buffer.header32.e_machine;
747 #elif U_PLATFORM_HAS_WIN32_API
748     if(length<sizeof(IMAGE_FILE_HEADER)) {
749         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
750         exit(U_UNSUPPORTED_ERROR);
751     }
752     /* TODO: Use buffer.header.  Keep aliasing legal.  */
753     pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
754     *pCPU=pHeader->Machine;
755     /*
756      * The number of bits is implicit with the Machine value.
757      * *pBits is ignored in the calling code, so this need not be precise.
758      */
759     *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
760     /* Windows always runs on little-endian CPUs. */
761     *pIsBigEndian=FALSE;
762 #else
763 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
764 #endif
765
766     T_FileStream_close(in);
767 }
768
769 U_CAPI void U_EXPORT2
770 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
771     /* common variables */
772     char buffer[4096], entry[40]={ 0 };
773     FileStream *in, *out;
774     const char *newSuffix;
775     int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
776
777     uint16_t cpu, bits;
778     UBool makeBigEndian;
779
780     /* platform-specific variables and initialization code */
781 #ifdef U_ELF
782     /* 32-bit Elf file header */
783     static Elf32_Ehdr header32={
784         {
785             /* e_ident[] */
786             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
787             ELFCLASS32,
788             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
789             EV_CURRENT /* EI_VERSION */
790         },
791         ET_REL,
792         EM_386,
793         EV_CURRENT, /* e_version */
794         0, /* e_entry */
795         0, /* e_phoff */
796         (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
797         0, /* e_flags */
798         (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
799         0, /* e_phentsize */
800         0, /* e_phnum */
801         (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
802         5, /* e_shnum */
803         2 /* e_shstrndx */
804     };
805
806     /* 32-bit Elf section header table */
807     static Elf32_Shdr sectionHeaders32[5]={
808         { /* SHN_UNDEF */
809             0
810         },
811         { /* .symtab */
812             1, /* sh_name */
813             SHT_SYMTAB,
814             0, /* sh_flags */
815             0, /* sh_addr */
816             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
817             (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
818             3, /* sh_link=sect hdr index of .strtab */
819             1, /* sh_info=One greater than the symbol table index of the last
820                 * local symbol (with STB_LOCAL). */
821             4, /* sh_addralign */
822             (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
823         },
824         { /* .shstrtab */
825             9, /* sh_name */
826             SHT_STRTAB,
827             0, /* sh_flags */
828             0, /* sh_addr */
829             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
830             40, /* sh_size */
831             0, /* sh_link */
832             0, /* sh_info */
833             1, /* sh_addralign */
834             0 /* sh_entsize */
835         },
836         { /* .strtab */
837             19, /* sh_name */
838             SHT_STRTAB,
839             0, /* sh_flags */
840             0, /* sh_addr */
841             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
842             (Elf32_Word)sizeof(entry), /* sh_size */
843             0, /* sh_link */
844             0, /* sh_info */
845             1, /* sh_addralign */
846             0 /* sh_entsize */
847         },
848         { /* .rodata */
849             27, /* sh_name */
850             SHT_PROGBITS,
851             SHF_ALLOC, /* sh_flags */
852             0, /* sh_addr */
853             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
854             0, /* sh_size */
855             0, /* sh_link */
856             0, /* sh_info */
857             16, /* sh_addralign */
858             0 /* sh_entsize */
859         }
860     };
861
862     /* symbol table */
863     static Elf32_Sym symbols32[2]={
864         { /* STN_UNDEF */
865             0
866         },
867         { /* data entry point */
868             1, /* st_name */
869             0, /* st_value */
870             0, /* st_size */
871             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
872             0, /* st_other */
873             4 /* st_shndx=index of related section table entry */
874         }
875     };
876
877     /* section header string table, with decimal string offsets */
878     static const char sectionStrings[40]=
879         /*  0 */ "\0"
880         /*  1 */ ".symtab\0"
881         /*  9 */ ".shstrtab\0"
882         /* 19 */ ".strtab\0"
883         /* 27 */ ".rodata\0"
884         /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
885         /* 40: padded to multiple of 8 bytes */
886
887     /*
888      * Use entry[] for the string table which will contain only the
889      * entry point name.
890      * entry[0] must be 0 (NUL)
891      * The entry point name can be up to 38 characters long (sizeof(entry)-2).
892      */
893
894     /* 16-align .rodata in the .o file, just in case */
895     static const char padding[16]={ 0 };
896     int32_t paddingSize;
897
898 #ifdef U_ELF64
899     /* 64-bit Elf file header */
900     static Elf64_Ehdr header64={
901         {
902             /* e_ident[] */
903             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
904             ELFCLASS64,
905             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
906             EV_CURRENT /* EI_VERSION */
907         },
908         ET_REL,
909         EM_X86_64,
910         EV_CURRENT, /* e_version */
911         0, /* e_entry */
912         0, /* e_phoff */
913         (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
914         0, /* e_flags */
915         (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
916         0, /* e_phentsize */
917         0, /* e_phnum */
918         (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
919         5, /* e_shnum */
920         2 /* e_shstrndx */
921     };
922
923     /* 64-bit Elf section header table */
924     static Elf64_Shdr sectionHeaders64[5]={
925         { /* SHN_UNDEF */
926             0
927         },
928         { /* .symtab */
929             1, /* sh_name */
930             SHT_SYMTAB,
931             0, /* sh_flags */
932             0, /* sh_addr */
933             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
934             (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
935             3, /* sh_link=sect hdr index of .strtab */
936             1, /* sh_info=One greater than the symbol table index of the last
937                 * local symbol (with STB_LOCAL). */
938             4, /* sh_addralign */
939             (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
940         },
941         { /* .shstrtab */
942             9, /* sh_name */
943             SHT_STRTAB,
944             0, /* sh_flags */
945             0, /* sh_addr */
946             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
947             40, /* sh_size */
948             0, /* sh_link */
949             0, /* sh_info */
950             1, /* sh_addralign */
951             0 /* sh_entsize */
952         },
953         { /* .strtab */
954             19, /* sh_name */
955             SHT_STRTAB,
956             0, /* sh_flags */
957             0, /* sh_addr */
958             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
959             (Elf64_Xword)sizeof(entry), /* sh_size */
960             0, /* sh_link */
961             0, /* sh_info */
962             1, /* sh_addralign */
963             0 /* sh_entsize */
964         },
965         { /* .rodata */
966             27, /* sh_name */
967             SHT_PROGBITS,
968             SHF_ALLOC, /* sh_flags */
969             0, /* sh_addr */
970             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
971             0, /* sh_size */
972             0, /* sh_link */
973             0, /* sh_info */
974             16, /* sh_addralign */
975             0 /* sh_entsize */
976         }
977     };
978
979     /*
980      * 64-bit symbol table
981      * careful: different order of items compared with Elf32_sym!
982      */
983     static Elf64_Sym symbols64[2]={
984         { /* STN_UNDEF */
985             0
986         },
987         { /* data entry point */
988             1, /* st_name */
989             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
990             0, /* st_other */
991             4, /* st_shndx=index of related section table entry */
992             0, /* st_value */
993             0 /* st_size */
994         }
995     };
996
997 #endif /* U_ELF64 */
998
999     /* entry[] have a leading NUL */
1000     entryOffset=1;
1001
1002     /* in the common code, count entryLength from after the NUL */
1003     entryLengthOffset=1;
1004
1005     newSuffix=".o";
1006
1007 #elif U_PLATFORM_HAS_WIN32_API
1008     struct {
1009         IMAGE_FILE_HEADER fileHeader;
1010         IMAGE_SECTION_HEADER sections[2];
1011         char linkerOptions[100];
1012     } objHeader;
1013     IMAGE_SYMBOL symbols[1];
1014     struct {
1015         DWORD sizeofLongNames;
1016         char longNames[100];
1017     } symbolNames;
1018
1019     /*
1020      * entry sometimes have a leading '_'
1021      * overwritten if entryOffset==0 depending on the target platform
1022      * see check for cpu below
1023      */
1024     entry[0]='_';
1025
1026     newSuffix=".obj";
1027 #else
1028 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
1029 #endif
1030
1031     /* deal with options, files and the entry point name */
1032     getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1033     printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1034 #if U_PLATFORM_HAS_WIN32_API
1035     if(cpu==IMAGE_FILE_MACHINE_I386) {
1036         entryOffset=1;
1037     }
1038 #endif
1039
1040     in=T_FileStream_open(filename, "rb");
1041     if(in==NULL) {
1042         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1043         exit(U_FILE_ACCESS_ERROR);
1044     }
1045     size=T_FileStream_size(in);
1046
1047     getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1048     if (outFilePath != NULL) {
1049         uprv_strcpy(outFilePath, buffer);
1050     }
1051
1052     if(optEntryPoint != NULL) {
1053         uprv_strcpy(entry+entryOffset, optEntryPoint);
1054         uprv_strcat(entry+entryOffset, "_dat");
1055     }
1056     /* turn dashes in the entry name into underscores */
1057     entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1058     for(i=0; i<entryLength; ++i) {
1059         if(entry[entryLengthOffset+i]=='-') {
1060             entry[entryLengthOffset+i]='_';
1061         }
1062     }
1063
1064     /* open the output file */
1065     out=T_FileStream_open(buffer, "wb");
1066     if(out==NULL) {
1067         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1068         exit(U_FILE_ACCESS_ERROR);
1069     }
1070
1071 #ifdef U_ELF
1072     if(bits==32) {
1073         header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1074         header32.e_machine=cpu;
1075
1076         /* 16-align .rodata in the .o file, just in case */
1077         paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1078         if(paddingSize!=0) {
1079                 paddingSize=0x10-paddingSize;
1080                 sectionHeaders32[4].sh_offset+=paddingSize;
1081         }
1082
1083         sectionHeaders32[4].sh_size=(Elf32_Word)size;
1084
1085         symbols32[1].st_size=(Elf32_Word)size;
1086
1087         /* write .o headers */
1088         T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1089         T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1090         T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1091     } else /* bits==64 */ {
1092 #ifdef U_ELF64
1093         header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1094         header64.e_machine=cpu;
1095
1096         /* 16-align .rodata in the .o file, just in case */
1097         paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1098         if(paddingSize!=0) {
1099                 paddingSize=0x10-paddingSize;
1100                 sectionHeaders64[4].sh_offset+=paddingSize;
1101         }
1102
1103         sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1104
1105         symbols64[1].st_size=(Elf64_Xword)size;
1106
1107         /* write .o headers */
1108         T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1109         T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1110         T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1111 #endif
1112     }
1113
1114     T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1115     T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1116     if(paddingSize!=0) {
1117         T_FileStream_write(out, padding, paddingSize);
1118     }
1119 #elif U_PLATFORM_HAS_WIN32_API
1120     /* populate the .obj headers */
1121     uprv_memset(&objHeader, 0, sizeof(objHeader));
1122     uprv_memset(&symbols, 0, sizeof(symbols));
1123     uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1124
1125     /* write the linker export directive */
1126     uprv_strcpy(objHeader.linkerOptions, "-export:");
1127     length=8;
1128     uprv_strcpy(objHeader.linkerOptions+length, entry);
1129     length+=entryLength;
1130     uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1131     length+=6;
1132
1133     /* set the file header */
1134     objHeader.fileHeader.Machine=cpu;
1135     objHeader.fileHeader.NumberOfSections=2;
1136     objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1137     objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1138     objHeader.fileHeader.NumberOfSymbols=1;
1139
1140     /* set the section for the linker options */
1141     uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1142     objHeader.sections[0].SizeOfRawData=length;
1143     objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1144     objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1145
1146     /* set the data section */
1147     uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1148     objHeader.sections[1].SizeOfRawData=size;
1149     objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1150     objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1151
1152     /* set the symbol table */
1153     if(entryLength<=8) {
1154         uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1155         symbolNames.sizeofLongNames=4;
1156     } else {
1157         symbols[0].N.Name.Short=0;
1158         symbols[0].N.Name.Long=4;
1159         symbolNames.sizeofLongNames=4+entryLength+1;
1160         uprv_strcpy(symbolNames.longNames, entry);
1161     }
1162     symbols[0].SectionNumber=2;
1163     symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1164
1165     /* write the file header and the linker options section */
1166     T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1167 #else
1168 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
1169 #endif
1170
1171     /* copy the data file into section 2 */
1172     for(;;) {
1173         length=T_FileStream_read(in, buffer, sizeof(buffer));
1174         if(length==0) {
1175             break;
1176         }
1177         T_FileStream_write(out, buffer, (int32_t)length);
1178     }
1179
1180 #if U_PLATFORM_HAS_WIN32_API
1181     /* write the symbol table */
1182     T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1183     T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1184 #endif
1185
1186     if(T_FileStream_error(in)) {
1187         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1188         exit(U_FILE_ACCESS_ERROR);
1189     }
1190
1191     if(T_FileStream_error(out)) {
1192         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1193         exit(U_FILE_ACCESS_ERROR);
1194     }
1195
1196     T_FileStream_close(out);
1197     T_FileStream_close(in);
1198 }
1199 #endif