1 /* ÆþÎϤΥ»¥Ã¥È¤ò´ÉÍý¤¹¤ë¥³¡¼¥É
3 * Copyright (C) 2006 HANAOKA Toshiyuki
4 * Copyright (C) 2006-2007 TABATA Yusuke
6 * Special Thanks: Google Summer of Code Program 2006
13 #include "input_set.h"
15 #define HASH_SIZE 1024
20 struct int_map_node *next;
26 struct int_map_node *hash_head;
29 struct int_map_node **array;
34 struct input_line *lines;
35 struct input_line *buckets[HASH_SIZE];
37 struct int_map *feature_freq;
41 line_hash(const int *ar, int nr)
45 for (i = 0; i < nr; i++) {
48 return (h % HASH_SIZE);
51 static struct input_line *
52 find_same_line(struct input_set *is, int *features, int nr)
54 struct input_line *il;
55 int h = line_hash(features, nr);
56 for (il = is->buckets[h]; il; il = il->next_in_hash) {
58 if (il->nr_features != nr) {
61 for (i = 0; i < nr; i++) {
62 if (il->features[i] != features[i]) {
73 static struct input_line *
74 add_line(struct input_set *is, int *features, int nr)
77 struct input_line *il;
78 il = malloc(sizeof(struct input_line));
80 il->features = malloc(sizeof(int) * nr);
81 for (i = 0; i < nr; i++) {
82 il->features[i] = features[i];
85 il->negative_weight = 0;
87 il->next_line = is->lines;
90 h = line_hash(features, nr);
91 il->next_in_hash = is->buckets[h];
97 add_feature_count(struct int_map *im, int nr, int *features, int weight)
100 for (i = 0; i < nr; i++) {
102 int v = int_map_peek(im, f);
103 int_map_set(im, f, v + weight);
107 /* input_set¤ËÆþÎϤò°ì¤Ä²Ã¤¨¤ë */
109 input_set_set_features(struct input_set *is, int *features,
112 struct input_line *il;
113 int abs_weight = abs(weight);
116 il = find_same_line(is, features, nr);
118 il = add_line(is, features, nr);
122 il->weight += weight;
124 il->negative_weight += abs_weight;
127 add_feature_count(is->feature_freq, nr, features, abs_weight);
131 input_set_create(void)
134 struct input_set *is;
135 is = malloc(sizeof(struct input_set));
138 for (i = 0; i < HASH_SIZE; i++) {
139 is->buckets[i] = NULL;
142 is->feature_freq = int_map_new();
148 input_set_get_input_line(struct input_set *is)
154 input_set_filter(struct input_set *is,
155 double pos, double neg)
157 struct input_set *new_is = input_set_create();
158 struct input_line *il;
159 for (il = is->lines; il; il = il->next_line) {
160 if (il->weight > pos ||
161 il->negative_weight > neg) {
162 input_set_set_features(new_is, il->features,
163 il->nr_features, il->weight);
164 input_set_set_features(new_is, il->features,
165 il->nr_features, -il->negative_weight);
172 input_set_output_feature_freq(FILE *fp, struct input_set *is)
175 struct int_map *im = is->feature_freq;
176 fprintf(fp, "0 %d\n", im->array_size);
178 for (i = 0; i < im->array_size; i++) {
180 fprintf(fp, "0 0\n");
182 fprintf(fp, "%d %d\n", im->array[i]->key,
192 struct int_map *im = malloc(sizeof(struct int_map));
194 im->hash_head = malloc(sizeof(struct int_map_node) * HASH_SIZE);
195 for (i = 0; i < HASH_SIZE; i++) {
196 im->hash_head[i].next = NULL;
204 return idx % HASH_SIZE;
207 static struct int_map_node *
208 find_int_map_node(struct int_map *im, int idx)
210 struct int_map_node *node;
211 int h = node_index(idx);
212 for (node = im->hash_head[h].next; node; node = node->next) {
213 if (node->key == idx) {
221 int_map_peek(struct int_map *im, int idx)
223 struct int_map_node *node = find_int_map_node(im, idx);
231 int_map_set(struct int_map *im, int idx, int val)
233 struct int_map_node *node = find_int_map_node(im, idx);
240 node = malloc(sizeof(struct int_map_node));
244 node->next = im->hash_head[h].next;
245 im->hash_head[h].next = node;
251 int_map_flatten(struct int_map *im)
254 struct int_map_node *node;
257 im->array_size = im->nr * 2;
258 im->array = malloc(sizeof(struct int_map_node *) *
260 for (i = 0; i < im->array_size; i++) {
263 /* Í×ÁǤòÃÖ¤¤¤Æ¤¤¤¯ */
264 for (i = 0; i < HASH_SIZE; i++) {
265 for (node = im->hash_head[i].next; node; node = node->next) {
284 printf(" max_collision=%d\n", max_n);