Fix FSF address (Tobias Mueller, #470445)
[platform/upstream/evolution-data-server.git] / camel / providers / imap / camel-imap-search.c
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /* camel-imap-search.c: IMAP folder search */
3
4 /*
5  *  Authors:
6  *    Dan Winship <danw@ximian.com>
7  *    Michael Zucchi <notzed@ximian.com>
8  *
9  *  Copyright 2000, 2001, 2002 Ximian, Inc.
10  *
11  * This program is free software; you can redistribute it and/or
12  * modify it under the terms of version 2 of the GNU Lesser General Public
13  * License as published by the Free Software Foundation.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this program; if not, write to the
22  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23  * Boston, MA 02110-1301, USA.
24  *
25  */
26
27 #ifdef HAVE_CONFIG_H
28 #include <config.h>
29 #endif
30
31 #include <string.h>
32
33 #include <libedataserver/md5-utils.h>   /* md5 hash building */
34
35 #include "camel-mime-utils.h"   /* base64 encoding */
36 #include "camel-search-private.h"
37 #include "camel-seekable-stream.h"
38
39 #include "camel-imap-command.h"
40 #include "camel-imap-folder.h"
41 #include "camel-imap-search.h"
42 #include "camel-imap-store.h"
43 #include "camel-imap-summary.h"
44 #include "camel-imap-utils.h"
45
46 #define d(x)
47
48 #ifdef G_OS_WIN32
49 /* The strtok() in Microsoft's C library is MT-safe (but still uses
50  * only one buffer pointer per thread, but for the use of strtok_r()
51  * here that's enough).
52  */
53 #define strtok_r(s,sep,lasts) (*(lasts)=strtok((s),(sep)))
54 #endif
55
56 /*
57   File is:
58    BODY  (as in body search)
59    Last uid when search performed
60    termcount: number of search terms
61    matchcount: number of matches
62    term0, term1 ...
63    match0, match1, match2, ...
64 */
65
66 /* size of in-memory cache */
67 #define MATCH_CACHE_SIZE (32)
68
69 /* Also takes care of 'endianness' file magic */
70 #define MATCH_MARK (('B' << 24) | ('O' << 16) | ('D' << 8) | 'Y')
71
72 /* on-disk header, in native endianness format, matches follow */
73 struct _match_header {
74         guint32 mark;
75         guint32 validity;       /* uidvalidity for this folder */
76         guint32 lastuid;
77         guint32 termcount;
78         guint32 matchcount;
79 };
80
81 /* in-memory record */
82 struct _match_record {
83         struct _match_record *next;
84         struct _match_record *prev;
85
86         char hash[17];
87
88         guint32 lastuid;
89         guint32 validity;
90
91         unsigned int termcount;
92         char **terms;
93         GArray *matches;
94 };
95
96
97 static void free_match(CamelImapSearch *is, struct _match_record *mr);
98 static ESExpResult *imap_body_contains (struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolderSearch *s);
99
100 static CamelFolderSearchClass *imap_search_parent_class;
101
102 static void
103 camel_imap_search_class_init (CamelImapSearchClass *camel_imap_search_class)
104 {
105         /* virtual method overload */
106         CamelFolderSearchClass *camel_folder_search_class =
107                 CAMEL_FOLDER_SEARCH_CLASS (camel_imap_search_class);
108
109         imap_search_parent_class = (CamelFolderSearchClass *)camel_type_get_global_classfuncs (camel_folder_search_get_type ());
110         
111         /* virtual method overload */
112         camel_folder_search_class->body_contains = imap_body_contains;
113 }
114
115 static void
116 camel_imap_search_init(CamelImapSearch *is)
117 {
118         e_dlist_init(&is->matches);
119         is->matches_hash = g_hash_table_new(g_str_hash, g_str_equal);
120         is->matches_count = 0;
121         is->lastuid = 0;
122 }
123
124 static void
125 camel_imap_search_finalise(CamelImapSearch *is)
126 {
127         struct _match_record *mr;
128
129         while ( (mr = (struct _match_record *)e_dlist_remtail(&is->matches)) )
130                 free_match(is, mr);
131         g_hash_table_destroy(is->matches_hash);
132         if (is->cache)
133                 camel_object_unref((CamelObject *)is->cache);
134 }
135
136 CamelType
137 camel_imap_search_get_type (void)
138 {
139         static CamelType camel_imap_search_type = CAMEL_INVALID_TYPE;
140         
141         if (camel_imap_search_type == CAMEL_INVALID_TYPE) {
142                 camel_imap_search_type = camel_type_register (
143                         CAMEL_FOLDER_SEARCH_TYPE, "CamelImapSearch",
144                         sizeof (CamelImapSearch),
145                         sizeof (CamelImapSearchClass),
146                         (CamelObjectClassInitFunc) camel_imap_search_class_init, NULL,
147                         (CamelObjectInitFunc) camel_imap_search_init,
148                         (CamelObjectFinalizeFunc) camel_imap_search_finalise);
149         }
150
151         return camel_imap_search_type;
152 }
153
154 /**
155  * camel_imap_search_new:
156  *
157  * Return value: A new CamelImapSearch widget.
158  **/
159 CamelFolderSearch *
160 camel_imap_search_new (const char *cachedir)
161 {
162         CamelFolderSearch *new = CAMEL_FOLDER_SEARCH (camel_object_new (camel_imap_search_get_type ()));
163         CamelImapSearch *is = (CamelImapSearch *)new;
164
165         camel_folder_search_construct (new);
166
167         is->cache = camel_data_cache_new(cachedir, 0, NULL);
168         if (is->cache) {
169                 /* Expire entries after 14 days of inactivity */
170                 camel_data_cache_set_expire_access(is->cache, 60*60*24*14);
171         }
172
173         return new;
174 }
175
176
177 static void
178 hash_match(char hash[17], int argc, struct _ESExpResult **argv)
179 {
180         MD5Context ctx;
181         unsigned char digest[16];
182         unsigned int state = 0, save = 0;
183         int i;
184
185         md5_init(&ctx);
186         for (i=0;i<argc;i++) {
187                 if (argv[i]->type == ESEXP_RES_STRING)
188                         md5_update(&ctx, argv[i]->value.string, strlen(argv[i]->value.string));
189         }
190         md5_final(&ctx, digest);
191
192         camel_base64_encode_close(digest, 12, FALSE, hash, &state, &save);
193
194         for (i=0;i<16;i++) {
195                 if (hash[i] == '+')
196                         hash[i] = ',';
197                 if (hash[i] == '/')
198                         hash[i] = '_';
199         }
200
201         hash[16] = 0;
202 }
203
204 static int
205 save_match(CamelImapSearch *is, struct _match_record *mr)
206 {
207         guint32 mark = MATCH_MARK;
208         int ret = 0;
209         struct _match_header header;
210         CamelStream *stream;
211
212         /* since its a cache, doesn't matter if it doesn't save, at least we have the in-memory cache
213            for this session */
214         if (is->cache == NULL)
215                 return -1;
216
217         stream = camel_data_cache_add(is->cache, "search/body-contains", mr->hash, NULL);
218         if (stream == NULL)
219                 return -1;
220
221         d(printf("Saving search cache entry to '%s': %s\n", mr->hash, mr->terms[0]));
222         
223         /* we write the whole thing, then re-write the header magic, saves fancy sync code */
224         memcpy(&header.mark, "    ", 4);
225         header.termcount = 0;
226         header.matchcount = mr->matches->len;
227         header.lastuid = mr->lastuid;
228         header.validity = mr->validity;
229
230         if (camel_stream_write(stream, (char *)&header, sizeof(header)) != sizeof(header)
231             || camel_stream_write(stream, mr->matches->data, mr->matches->len*sizeof(guint32)) != mr->matches->len*sizeof(guint32)
232             || camel_seekable_stream_seek((CamelSeekableStream *)stream, 0, CAMEL_STREAM_SET) == -1
233             || camel_stream_write(stream, (char *)&mark, sizeof(mark)) != sizeof(mark)) {
234                 d(printf(" saving failed, removing cache entry\n"));
235                 camel_data_cache_remove(is->cache, "search/body-contains", mr->hash, NULL);
236                 ret = -1;
237         }
238
239         camel_object_unref((CamelObject *)stream);
240         return ret;
241 }
242
243 static void
244 free_match(CamelImapSearch *is, struct _match_record *mr)
245 {
246         int i;
247
248         for (i=0;i<mr->termcount;i++)
249                 g_free(mr->terms[i]);
250         g_free(mr->terms);
251         g_array_free(mr->matches, TRUE);
252         g_free(mr);
253 }
254
255 static struct _match_record *
256 load_match(CamelImapSearch *is, char hash[17], int argc, struct _ESExpResult **argv)
257 {
258         struct _match_record *mr;
259         CamelStream *stream = NULL;
260         struct _match_header header;
261         int i;
262
263         mr = g_malloc0(sizeof(*mr));
264         mr->matches = g_array_new(0, 0, sizeof(guint32));
265         g_assert(strlen(hash) == 16);
266         strcpy(mr->hash, hash);
267         mr->terms = g_malloc0(sizeof(mr->terms[0]) * argc);
268         for (i=0;i<argc;i++) {
269                 if (argv[i]->type == ESEXP_RES_STRING) {
270                         mr->termcount++;
271                         mr->terms[i] = g_strdup(argv[i]->value.string);
272                 }
273         }
274
275         d(printf("Loading search cache entry to '%s': %s\n", mr->hash, mr->terms[0]));
276
277         memset(&header, 0, sizeof(header));
278         if (is->cache)
279                 stream = camel_data_cache_get(is->cache, "search/body-contains", mr->hash, NULL);
280         if (stream != NULL) {
281                 /* 'cause i'm gonna be lazy, i'm going to have the termcount == 0 for now,
282                    and not load or save them since i can't think of a nice way to do it, the hash
283                    should be sufficient to key it */
284                 /* This check should also handle endianness changes, we just throw away
285                    the data (its only a cache) */
286                 if (camel_stream_read(stream, (char *)&header, sizeof(header)) == sizeof(header)
287                     && header.validity == is->validity
288                     && header.mark == MATCH_MARK
289                     && header.termcount == 0) {
290                         d(printf(" found %d matches\n", header.matchcount));
291                         g_array_set_size(mr->matches, header.matchcount);
292                         camel_stream_read(stream, mr->matches->data, sizeof(guint32)*header.matchcount);
293                 } else {
294                         d(printf(" file format invalid/validity changed\n"));
295                         memset(&header, 0, sizeof(header));
296                 }
297                 camel_object_unref((CamelObject *)stream);
298         } else {
299                 d(printf(" no cache entry found\n"));
300         }
301
302         mr->validity = header.validity;
303         if (mr->validity != is->validity)
304                 mr->lastuid = 0;
305         else
306                 mr->lastuid = header.lastuid;
307
308         return mr;
309 }
310
311 static int
312 sync_match(CamelImapSearch *is, struct _match_record *mr)
313 {
314         char *p, *result, *lasts = NULL;
315         CamelImapResponse *response = NULL;
316         guint32 uid;
317         CamelFolder *folder = ((CamelFolderSearch *)is)->folder;
318         CamelImapStore *store = (CamelImapStore *)folder->parent_store;
319         struct _camel_search_words *words;
320         GString *search;
321         int i;
322         
323         if (mr->lastuid >= is->lastuid && mr->validity == is->validity)
324                 return 0;
325         
326         d(printf ("updating match record for uid's %d:%d\n", mr->lastuid+1, is->lastuid));
327         
328         /* TODO: Handle multiple search terms */
329         
330         /* This handles multiple search words within a single term */
331         words = camel_search_words_split (mr->terms[0]);
332         search = g_string_new ("");
333         g_string_append_printf (search, "UID %d:%d", mr->lastuid + 1, is->lastuid);
334         for (i = 0; i < words->len; i++) {
335                 char *w = words->words[i]->word, c;
336                 
337                 g_string_append_printf (search, " BODY \"");
338                 while ((c = *w++)) {
339                         if (c == '\\' || c == '"')
340                                 g_string_append_c (search, '\\');
341                         g_string_append_c (search, c);
342                 }
343                 g_string_append_c (search, '"');
344         }
345         camel_search_words_free (words);
346         
347         /* We only try search using utf8 if its non us-ascii text? */
348         if ((words->type & CAMEL_SEARCH_WORD_8BIT) &&  (store->capabilities & IMAP_CAPABILITY_utf8_search)) {
349                 response = camel_imap_command (store, folder, NULL,
350                                                "UID SEARCH CHARSET UTF-8 %s", search->str);
351                 /* We can't actually tell if we got a NO response, so assume always */
352                 if (response == NULL)
353                         store->capabilities &= ~IMAP_CAPABILITY_utf8_search;
354         }
355         if (response == NULL)
356                 response = camel_imap_command (store, folder, NULL,
357                                                "UID SEARCH %s", search->str);
358         g_string_free(search, TRUE);
359
360         if (!response)
361                 return -1;
362         result = camel_imap_response_extract (store, response, "SEARCH", NULL);
363         if (!result)
364                 return -1;
365         
366         p = result + sizeof ("* SEARCH");
367         for (p = strtok_r (p, " ", &lasts); p; p = strtok_r (NULL, " ", &lasts)) {
368                 uid = strtoul(p, NULL, 10);
369                 g_array_append_vals(mr->matches, &uid, 1);
370         }
371         g_free(result);
372
373         mr->validity = is->validity;
374         mr->lastuid = is->lastuid;
375         save_match(is, mr);
376
377         return 0;
378 }
379
380 static struct _match_record *
381 get_match(CamelImapSearch *is, int argc, struct _ESExpResult **argv)
382 {
383         char hash[17];
384         struct _match_record *mr;
385
386         hash_match(hash, argc, argv);
387
388         mr = g_hash_table_lookup(is->matches_hash, hash);
389         if (mr == NULL) {
390                 while (is->matches_count >= MATCH_CACHE_SIZE) {
391                         mr = (struct _match_record *)e_dlist_remtail(&is->matches);
392                         if (mr) {
393                                 printf("expiring match '%s' (%s)\n", mr->hash, mr->terms[0]);
394                                 g_hash_table_remove(is->matches_hash, mr->hash);
395                                 free_match(is, mr);
396                                 is->matches_count--;
397                         } else {
398                                 is->matches_count = 0;
399                         }
400                 }
401                 mr = load_match(is, hash, argc, argv);
402                 g_hash_table_insert(is->matches_hash, mr->hash, mr);
403                 is->matches_count++;
404         } else {
405                 e_dlist_remove((EDListNode *)mr);
406         }
407
408         e_dlist_addhead(&is->matches, (EDListNode *)mr);
409
410         /* what about offline mode? */
411         /* We could cache those results too, or should we cache them elsewhere? */
412         sync_match(is, mr);
413
414         return mr;
415 }
416
417 static ESExpResult *
418 imap_body_contains (struct _ESExp *f, int argc, struct _ESExpResult **argv, CamelFolderSearch *s)
419 {
420         CamelImapStore *store = CAMEL_IMAP_STORE (s->folder->parent_store);
421         CamelImapSearch *is = (CamelImapSearch *)s;
422         char *uid;
423         ESExpResult *r;
424         CamelMessageInfo *info; 
425         GHashTable *uid_hash = NULL;
426         GPtrArray *array;
427         int i, j;
428         struct _match_record *mr;
429         guint32 uidn, *uidp;
430
431         d(printf("Performing body search '%s'\n", argv[0]->value.string));
432
433         /* TODO: Cache offline searches too? */
434
435         /* If offline, search using the parent class, which can handle this manually */
436         if (!camel_disco_store_check_online (CAMEL_DISCO_STORE (store), NULL))
437                 return imap_search_parent_class->body_contains(f, argc, argv, s);
438
439         /* optimise the match "" case - match everything */
440         if (argc == 1 && argv[0]->value.string[0] == '\0') {
441                 if (s->current) {
442                         r = e_sexp_result_new(f, ESEXP_RES_BOOL);
443                         r->value.bool = TRUE;
444                 } else {
445                         r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR);
446                         r->value.ptrarray = g_ptr_array_new ();
447                         for (i = 0; i < s->summary->len; i++) {
448                                 info = g_ptr_array_index(s->summary, i);
449                                 g_ptr_array_add(r->value.ptrarray, (char *)camel_message_info_uid(info));
450                         }
451                 }
452         } else if (argc == 0 || s->summary->len == 0) {
453                 /* nothing to match case, do nothing (should be handled higher up?) */
454                 if (s->current) {
455                         r = e_sexp_result_new(f, ESEXP_RES_BOOL);
456                         r->value.bool = FALSE;
457                 } else {
458                         r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR);
459                         r->value.ptrarray = g_ptr_array_new ();
460                 }
461         } else {
462                 int truth = FALSE;
463
464                 /* setup lastuid/validity for synchronising */
465                 info = g_ptr_array_index(s->summary, s->summary->len-1);
466                 is->lastuid = strtoul(camel_message_info_uid(info), NULL, 10);
467                 is->validity = ((CamelImapSummary *)(s->folder->summary))->validity;
468
469                 mr = get_match(is, argc, argv);
470
471                 if (s->current) {
472                         uidn = strtoul(camel_message_info_uid(s->current), NULL, 10);
473                         uidp = (guint32 *)mr->matches->data;
474                         j = mr->matches->len;
475                         for (i=0;i<j && !truth;i++)
476                                 truth = *uidp++ == uidn;
477                         r = e_sexp_result_new(f, ESEXP_RES_BOOL);
478                         r->value.bool = truth;
479                 } else {
480                         r = e_sexp_result_new(f, ESEXP_RES_ARRAY_PTR);
481                         array = r->value.ptrarray = g_ptr_array_new();
482
483                         /* We use a hash to map the uid numbers to uid strings as required by the search api */
484                         /* We use the summary's strings so we dont need to alloc more */
485                         uid_hash = g_hash_table_new(NULL, NULL);
486                         for (i = 0; i < s->summary->len; i++) {
487                                 info = s->summary->pdata[i];
488                                 uid = (char *)camel_message_info_uid(info);
489                                 uidn = strtoul(uid, NULL, 10);
490                                 g_hash_table_insert(uid_hash, GUINT_TO_POINTER(uidn), uid);
491                         }
492
493                         uidp = (guint32 *)mr->matches->data;
494                         j = mr->matches->len;
495                         for (i=0;i<j && !truth;i++) {
496                                 uid = g_hash_table_lookup(uid_hash, GUINT_TO_POINTER(*uidp++));
497                                 if (uid)
498                                         g_ptr_array_add(array, uid);
499                         }
500
501                         g_hash_table_destroy(uid_hash);
502                 }
503         }
504
505         return r;
506 }