1 <script language="PHP">
2 require_once "search_config.php";
4 function end_form($value)
8 if ($config['DISABLE_INDEX'] == false)
10 echo " <input type=\"text\" id=\"MSearchField\" name=\"query\" value=\"$value\" size=\"20\" accesskey=\"S\" onfocus=\"searchBox.OnSearchFieldFocus(true)\" onblur=\"searchBox.OnSearchFieldFocus(false)\"/>\n </form>\n </div><div class=\"right\"></div>\n </div>\n </li>\n </ul>\n </div>\n</div>\n";
12 if ($config['GENERATE_TREEVIEW'])
14 echo $translator['split_bar'];
20 echo "</body></html>";
23 function search_results()
26 return $translator['search_results_title'];
29 function matches_text($num)
32 $string = $translator['search_results'][($num>2)?2:$num];
33 // The eval is used so that translator strings can contain $num.
34 eval("\$result = \"$string\";");
38 function report_matches()
41 return $translator['search_matches'];
44 function readInt($file)
46 $b1 = ord(fgetc($file)); $b2 = ord(fgetc($file));
47 $b3 = ord(fgetc($file)); $b4 = ord(fgetc($file));
48 return ($b1<<24)|($b2<<16)|($b3<<8)|$b4;
51 function readString($file)
54 while (ord($c=fgetc($file))) $result.=$c;
58 function readHeader($file)
60 $header =fgetc($file); $header.=fgetc($file);
61 $header.=fgetc($file); $header.=fgetc($file);
65 function computeIndex($word)
67 // Simple hashing that allows for substring search
68 if (strlen($word)<2) return -1;
69 // high char of the index
71 if ($hi==0) return -1;
72 // low char of the index
74 if ($lo==0) return -1;
79 function search($file,$word,&$statsList)
81 $index = computeIndex($word);
82 if ($index!=-1) // found a valid index
84 fseek($file,$index*4+4); // 4 bytes per entry, skip header
85 $index = readInt($file);
86 if ($index) // found words matching the hash key
88 $start=sizeof($statsList);
91 $w = readString($file);
94 $statIdx = readInt($file);
95 if ($word==substr($w,0,strlen($word)))
96 { // found word that matches (as substring)
97 $statsList[$count++]=array(
101 "full"=>strlen($w)==strlen($word),
105 $w = readString($file);
110 for ($count=$start;$count<sizeof($statsList);$count++)
112 $statInfo = &$statsList[$count];
114 // whole word matches have a double weight
115 if ($statInfo["full"]) $multiplier=2;
116 fseek($file,$statInfo["index"]);
117 $numDocs = readInt($file);
119 // read docs info + occurrence frequency of the word
120 for ($i=0;$i<$numDocs;$i++)
123 $freq=readInt($file);
124 $docInfo[$i]=array("idx" => $idx,
129 if ($freq&1) // word occurs in high priority doc
132 $totalFreqHi+=$freq*$multiplier;
134 else // word occurs in low priority doc
136 $totalFreqLo+=$freq*$multiplier;
139 // read name and url info for the doc
140 for ($i=0;$i<$numDocs;$i++)
142 fseek($file,$docInfo[$i]["idx"]);
143 $docInfo[$i]["name"]=readString($file);
144 $docInfo[$i]["url"]=readString($file);
146 $statInfo["docs"]=$docInfo;
148 $totalFreq=($totalHi+1)*$totalFreqLo + $totalFreqHi;
149 for ($count=$start;$count<sizeof($statsList);$count++)
151 $statInfo = &$statsList[$count];
153 // whole word matches have a double weight
154 if ($statInfo["full"]) $multiplier=2;
155 for ($i=0;$i<sizeof($statInfo["docs"]);$i++)
157 $docInfo = &$statInfo["docs"];
158 // compute frequency rank of the word in each doc
159 $freq=$docInfo[$i]["freq"];
160 if ($docInfo[$i]["hi"])
162 $statInfo["docs"][$i]["rank"]=
163 (float)($freq*$multiplier+$totalFreqLo)/$totalFreq;
167 $statInfo["docs"][$i]["rank"]=
168 (float)($freq*$multiplier)/$totalFreq;
177 function combine_results($results,&$docs)
179 foreach ($results as $wordInfo)
181 $docsList = &$wordInfo["docs"];
182 foreach ($docsList as $di)
186 if (isset($docs[$key]))
188 $docs[$key]["rank"]+=$rank;
192 $docs[$key] = array("url"=>$key,
197 $docs[$key]["words"][] = array(
198 "word"=>$wordInfo["word"],
199 "match"=>$wordInfo["match"],
207 function filter_results($docs,&$requiredWords,&$forbiddenWords)
209 $filteredDocs=array();
210 while (list ($key, $val) = each ($docs))
212 $words = &$docs[$key]["words"];
213 $copy=1; // copy entry by default
214 if (sizeof($requiredWords)>0)
216 foreach ($requiredWords as $reqWord)
219 foreach ($words as $wordInfo)
221 $found = $wordInfo["word"]==$reqWord;
226 $copy=0; // document contains none of the required words
231 if (sizeof($forbiddenWords)>0)
233 foreach ($words as $wordInfo)
235 if (in_array($wordInfo["word"],$forbiddenWords))
237 $copy=0; // document contains a forbidden word
242 if ($copy) $filteredDocs[$key]=$docs[$key];
244 return $filteredDocs;
247 function compare_rank($a,$b)
249 if ($a["rank"] == $b["rank"])
253 return ($a["rank"]>$b["rank"]) ? -1 : 1;
256 function sort_results($docs,&$sorted)
259 usort($sorted,"compare_rank");
263 function report_results(&$docs)
265 echo "<div class=\"header\">";
266 echo " <div class=\"headertitle\">\n";
267 echo " <h1>".search_results()."</h1>\n";
270 echo "<div class=\"searchresults\">\n";
271 echo "<table cellspacing=\"2\">\n";
272 $numDocs = sizeof($docs);
276 echo " <td colspan=\"2\">".matches_text(0)."</td>\n";
282 echo " <td colspan=\"2\">".matches_text($numDocs);
287 foreach ($docs as $doc)
290 echo " <td align=\"right\">$num.</td>";
291 echo "<td><a class=\"el\" href=\"".$doc["url"]."\">".$doc["name"]."</a></td>\n";
293 echo " <td></td><td class=\"tiny\">".report_matches()." ";
294 foreach ($doc["words"] as $wordInfo)
296 $word = $wordInfo["word"];
297 $matchRight = substr($wordInfo["match"],strlen($word));
298 echo "<b>$word</b>$matchRight(".$wordInfo["freq"].") ";
309 function run_query($query)
311 if(strcmp('4.1.0', phpversion()) > 0)
313 die("Error: PHP version 4.1.0 or above required!");
315 if (!($file=fopen("search/search.idx","rb")))
317 die("Error: Search index file could NOT be opened!");
319 if (readHeader($file)!="DOXS")
321 die("Error: Header of index file is invalid!");
324 $requiredWords = array();
325 $forbiddenWords = array();
326 $foundWords = array();
327 $word=strtok($query," ");
328 while ($word) // for each word in the search query
330 if (($word{0}=='+')) { $word=substr($word,1); $requiredWords[]=$word; }
331 if (($word{0}=='-')) { $word=substr($word,1); $forbiddenWords[]=$word; }
332 if (!in_array($word,$foundWords))
335 search($file,strtolower($word),$results);
341 combine_results($results,$docs);
342 // filter out documents with forbidden word or that do not contain
344 $filteredDocs = filter_results($docs,$requiredWords,$forbiddenWords);
345 // sort the results based on rank
347 sort_results($filteredDocs,$sorted);
354 if (array_key_exists("query", $_GET))
356 $query=$_GET["query"];
358 $sorted = run_query($query);
359 // Now output the HTML stuff...
361 end_form(preg_replace("/[^a-zA-Z0-9\-\_\.\x80-\xFF]/i", " ", $query ));
362 // report results to the user
363 report_results($sorted);