2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
4 * Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
6 * The contents of this file are subject to the terms of either the GNU Lesser
7 * General Public License Version 2.1 only ("LGPL") or the Common Development and
8 * Distribution License ("CDDL")(collectively, the "License"). You may not use this
9 * file except in compliance with the License. You can obtain a copy of the CDDL at
10 * http://www.opensource.org/licenses/cddl1.php and a copy of the LGPLv2.1 at
11 * http://www.opensource.org/licenses/lgpl-license.php. See the License for the
12 * specific language governing permissions and limitations under the License. When
13 * distributing the software, include this License Header Notice in each file and
14 * include the full text of the License in the License file as well as the
17 * NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND DISTRIBUTION LICENSE
19 * For Covered Software in this distribution, this License shall be governed by the
20 * laws of the State of California (excluding conflict-of-law provisions).
21 * Any litigation relating to this License shall be subject to the jurisdiction of
22 * the Federal Courts of the Northern District of California and the state courts
23 * of the State of California, with venue lying in Santa Clara County, California.
27 * If you wish your version of this file to be governed by only the CDDL or only
28 * the LGPL Version 2.1, indicate your decision by adding "[Contributor]" elects to
29 * include this software in this distribution under the [CDDL or LGPL Version 2.1]
30 * license." If you don't indicate a single choice of license, a recipient has the
31 * option to distribute your version of this file under either the CDDL or the LGPL
32 * Version 2.1, or to extend the choice of license to its licensees as provided
33 * above. However, if you add LGPL Version 2.1 code and therefore, elected the LGPL
34 * Version 2 license, then the option applies only if the new code is made subject
35 * to such option by the copyright holder.
54 #include "../sim_fmerge.h"
56 #include "idngram_merge.h"
58 static struct option long_options[] =
60 { "NMax", 1, 0, 'n' },
66 // static int paraMax = 0;
67 static char* output = NULL;
68 // static char* swapfile = NULL;
73 printf("Usage:\n\tidngram_merge options idngramfile[ idngramfile...]\n");
74 printf("\nDescription:\n");
76 " This program merge multiple idngram file, each of them are sorted [id1,...,idN,freq] array, into one idngram file. For those id1..idN which appear in more than one files, only one item appear in the final file, and its freq are sumed.\n");
77 printf("\nOptions:\n");
78 printf("\t -n N # N-gram\n");
79 printf("\t -o outputfile # finale merged idngram file\n");
80 printf("\nExample:\n");
81 printf(" Following example merge 2 id3gram files into a large one:\n");
83 "\tidngram_merge -n3 -o all.id3gram first.id3gram second.id3gram\n\n");
87 getParameters(int argc, char* const argv[])
92 getopt_long(argc, argv, "n:o:", long_options,
93 &option_index)) != -1) {
96 N = atoi(strdup(optarg));
99 output = strdup(optarg);
106 if (N < 1 || N > 3 || output == NULL) {
113 main(int argc, char* argv[])
115 getParameters(argc, argv);
116 FILE *out = fopen(output, "wb+");
117 std::vector<FILE* > idngram_files;
119 if (optind >= argc) ShowUsage();
120 while (optind < argc) {
121 printf("Open %s:...", argv[optind]);
122 FILE *fp = fopen(argv[optind++], "rb");
127 idngram_files.push_back(fp);
130 printf("Merging...");
133 ProcessingIdngramMerge<1>(out, idngram_files);
136 ProcessingIdngramMerge<2>(out, idngram_files);
139 ProcessingIdngramMerge<3>(out, idngram_files);
144 for (size_t i = 0; i < idngram_files.size(); i++)
145 fclose(idngram_files[i]);