Changeset 1414
- Timestamp:
- 10/13/07 18:47:04 (11 months ago)
- Files:
-
- nebula/trunk/ChangeLog (added)
- nebula/trunk/src/Makefile.am (modified) (1 diff)
- nebula/trunk/src/cluster.c (modified) (1 diff)
- nebula/trunk/src/cluster.h (modified) (2 diffs)
- nebula/trunk/src/nebula.c (modified) (7 diffs)
- nebula/trunk/src/nebula.h (modified) (1 diff)
- nebula/trunk/src/signals.c (modified) (1 diff)
- nebula/trunk/src/spamsum.c (modified) (3 diffs)
- nebula/trunk/src/spamsum.h (modified) (1 diff)
- nebula/trunk/src/trie.c (modified) (3 diffs)
- nebula/trunk/src/trie.h (modified) (1 diff)
- nebula/trunk/src/util.c (modified) (2 diffs)
- nebula/trunk/src/util.h (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
nebula/trunk/src/Makefile.am
r1375 r1414 9 9 util.c util.h \ 10 10 trie.c trie.h \ 11 hash list.c hashlist.h \11 hashq.c hashq.h \ 12 12 ngram.c ngram.h \ 13 13 cluster.c cluster.h \ nebula/trunk/src/cluster.c
r1377 r1414 20 20 21 21 #include <stdio.h> 22 #include <string.h> 22 23 23 24 #include "cluster.h" 25 #include "hashq.h" 24 26 #include "nebula.h" 25 27 #include "trie.h" 26 28 27 29 28 u_int16_t add_entry_to_cluster(cluster *cl, hash_list *entry) { 29 if ((cl->entries = (hash_list **) realloc(cl->entries, (cl->cnt+1) * sizeof(hash_list *))) == NULL) { 30 /* create new cluster */ 31 inline cluster *cluster_new(void) { 32 cluster *new; 33 34 /* create new cluster */ 35 if ((new = calloc(1, sizeof(cluster))) == NULL) { 30 36 fprintf(stderr, "Error - Unable to allocate memory: %m.\n"); 31 37 exit(EXIT_FAILURE); 32 38 } 33 cl->entries[cl->cnt++] = entry;34 39 35 return(cl->cnt); 40 /* create element queue */ 41 if ((new->hq = calloc(1, sizeof(hashq))) == NULL) { 42 fprintf(stderr, "Error - Unable to allocate memory: %m.\n"); 43 exit(EXIT_FAILURE); 44 } 45 46 return(new); 36 47 } 37 48 38 49 39 cluster *extend_cluster(hash_list *md5sum1, hash_list *md5sum2) { 40 cluster *cl, *cl_new; 50 /* delete cluster */ 51 void cluster_free(cluster *cl, u_char list_files, void(*cbfn)(hashq *hq, u_char flag)) { 52 if (!cl) return; 41 53 42 if ( md5sum1->cl == NULL) {43 cl = NULL;54 if (cbfn) cbfn(cl->hq, list_files); 55 free(cl); 44 56 45 /* create new cluster */ 46 if ((cl_new = calloc(1, sizeof(cluster))) == NULL) { 47 fprintf(stderr, "Error - Unable to allocate memory: %m.\n"); 48 exit(EXIT_FAILURE); 49 } 50 md5sum1->cl = cl_new; 51 md5sum2->cl = cl_new; 52 53 add_entry_to_cluster(cl_new, md5sum1); 54 add_entry_to_cluster(cl_new, md5sum2); 55 56 /* prepend new cluster to list */ 57 cl_new->next = cluster_list; 58 cluster_list = cl_new; 59 60 num_of_clusters++; 61 62 return(cl_new); 63 } 64 65 /* add element to existing cluster */ 66 md5sum2->cl = md5sum1->cl; 67 add_entry_to_cluster(md5sum1->cl, md5sum2); 68 69 return(md5sum1->cl); 57 return; 70 58 } 71 59 72 60 73 void clusterlist_delete(cluster *list) { 74 int i; 75 cluster *cl; 61 /* free a cluster's hash queue and its hashes */ 62 void cluster_hashq_free(hashq *hq, u_char list_files) { 63 hashq_free(hq, list_files, hash_free); 64 return; 65 } 66 67 68 cluster *add_entry_to_cluster(cluster *cl, hash *entry) { 69 qelem *new; 70 71 if (cl->hq->size < clusterq_max) { 72 // set cluster pointer in hash struct 73 entry->cl = cl; 74 75 // add hash to queue 76 if ((new = hashq_prepend(cl->hq, entry)) == NULL) { 77 fprintf(stderr, "Error - Could not add entry to cluster.\n"); 78 exit(EXIT_FAILURE); 79 } 80 } else { 81 if (verbose) fprintf(stderr, "Warning - Could not add hash to cluster queue: Maximum size reached.\n"); 82 return(NULL); 83 } 84 85 return(cl); 86 } 87 88 89 cluster *create_cluster(hash *l1, hash *l2) { 90 cluster *cl = cluster_new(); 91 92 add_entry_to_cluster(cl, l1); 93 add_entry_to_cluster(cl, l2); 94 95 /* prepend new cluster to list */ 96 cl->next = cluster_list; 97 if(cluster_list) cluster_list->prev = cl; 98 cluster_list = cl; 99 100 num_of_clusters++; 101 102 return(cl); 103 } 104 105 106 cluster *clusters_merge(cluster *dst, cluster *new) { 107 cluster *cl1, *cl2; 108 qelem *entry; 109 110 cl1 = dst; 111 cl2 = new; 112 113 // join lists 114 cl1->hq->tail->next = cl2->hq->head; 115 cl1->hq->tail = cl2->hq->tail; 116 cl1->hq->size += cl2->hq->size; 117 118 // let elements of cl2 point to cl1 119 for (entry = cl2->hq->head; entry; entry = entry->next) ((hash*)entry->data)->cl = cl1; 120 cl2->hq->head = cl2->hq->tail = NULL; 121 122 // unlink and free cluster 2 123 if(cl2->prev) cl2->prev->next = cl2->next; 124 if(cl2->next) cl2->next->prev = cl2->prev; 125 126 // check whether cl2 is the list head 127 if (cluster_list == cl2) cluster_list = cl2->next; 128 129 130 // hashq_free(cl2->hq, 0, NULL); 131 free(cl2->hq); 132 cluster_free(cl2, 0, NULL); 133 num_of_clusters--; 134 135 return(dst); 136 } 137 138 139 void clusterlist_delete(cluster *list, u_char list_files) { 140 cluster *cl; 76 141 77 142 while(list) { 78 143 cl = list; 79 printf("Cluster has %u entries.\n", cl->cnt);80 if (list_files)81 for (i=0; i<cl->cnt; printf(" %s\n", cl->entries[i++]->filename));82 83 free(list->entries);84 144 list = cl->next; 85 free(cl); 145 printf("Cluster has %u entries.\n", cl->hq->size); 146 cluster_free(cl, list_files, cluster_hashq_free); 86 147 } 87 148 } nebula/trunk/src/cluster.h
r1375 r1414 28 28 #include <stdlib.h> 29 29 30 #include "hash list.h"30 #include "hashq.h" 31 31 #include "trie.h" 32 32 … … 36 36 typedef struct cluster { 37 37 u_int16_t cnt; 38 struct cluster *prev; 38 39 struct cluster *next; 39 hash _list **entries;40 hashq *hq; 40 41 } cluster; 41 42 42 43 43 cluster *extend_cluster(hash_list *md5sum1, hash_list *md5sum2); 44 void clusterlist_delete(cluster *list); 44 inline cluster *create_hashlist(hash *hl1, hash *hl2); 45 cluster *add_entry_to_cluster(cluster *cl, hash *entry); 46 cluster *create_cluster(hash *l1, hash *l2); 47 cluster *clusters_merge(cluster *dst, cluster *new); 48 void clusterlist_delete(cluster *list, u_char list_files); 45 49 46 50 #endif nebula/trunk/src/nebula.c
r1375 r1414 28 28 #include <sys/stat.h> 29 29 #include <sys/types.h> 30 #include <sys/queue.h> 30 31 #include <string.h> 31 32 #include <unistd.h> 32 33 33 34 #include "cluster.h" 34 #include "hashlist.h"35 35 #include "md5.h" 36 36 #include "nebula.h" … … 48 48 49 49 int main(int argc, char *argv[]) { 50 int i ;50 int i, qsize, show_progress; 51 51 u_char *content, *tmpbuf; 52 52 FILE *md5sum_file, *spamsum_file; 53 char option, *curfile; 54 struct dirent *dir_entry; 55 struct stat statbuf; 56 hash *tmp_hash; 57 qelem *cur_hq, *tmp_hq; 58 cluster *cl; 59 double score; 53 60 DIR *dirp; 54 61 trie_node *t; 55 62 bstring bstr; 56 char option, *curfile; 57 struct dirent *dir_entry; 58 struct stat statbuf; 59 ss_match spamsum_match; 60 hash_list *md5sum1, *md5sum2, *spamsum1, *spamsum2; 61 cluster *cl; 63 62 64 63 65 spamsum_file = NULL; … … 65 67 dirp = NULL; 66 68 67 spamsum_list = NULL; 68 md5sum_list = NULL; 69 outlierq = NULL; 69 70 cluster_list = NULL; 70 71 71 72 content = NULL; 72 verbose = 0;73 list_files = 0;74 73 num_of_files = 0; 75 74 num_of_clusters = 0; 76 cluster_radius = 95.0;77 75 i = 0; 78 79 memset(&spamsum_trie, 0, sizeof(trie_node)); 76 qsize = 0; 77 total_files = 0; 78 79 /* default values for parameters */ 80 verbose = 0; // don't be verbose 81 show_progress = 0; // don't show progress dots 82 list_files = 0; // don't list cluster objects 83 cluster_radius = 95.0; // 95% similarity as cluster criteria 84 outlierq_max = 500000; 85 clusterq_max = 500000; 86 80 87 memset(&md5sum_trie, 0, sizeof(trie_node)); 81 memset(&spamsum_match, 0, sizeof(ss_match));82 88 83 89 // process args 84 while((option = getopt(argc, argv, " lvd:r:h?")) > 0) {90 while((option = getopt(argc, argv, "c:q:plvd:r:h?")) > 0) { 85 91 switch(option) { 92 case 'c': 93 clusterq_max = atoi(optarg); 94 if (clusterq_max < 2) { 95 fprintf(stderr, "Error - Maximum cluster size must be at least 2.\n"); 96 exit(EXIT_FAILURE); 97 } 98 break; 86 99 case 'd': 87 100 if ((dirp = opendir(optarg)) == NULL) { … … 104 117 } 105 118 break; 119 case 'q': 120 outlierq_max = atoi(optarg); 121 if (outlierq_max < 1) { 122 fprintf(stderr, "Error - Outlier queue size must be a non-negative value.\n"); 123 exit(EXIT_FAILURE); 124 } 125 break; 126 case 'p': 127 show_progress = 1; 128 break; 106 129 case 'v': 107 130 verbose = 1; … … 117 140 set_signal_handlers(); 118 141 142 /* initialize outlier queue */ 143 outlierq = hashq_new(); 144 119 145 if (!dirp) { 120 146 if ((argc - optind) < 1) usage(argv[0], EXIT_FAILURE); … … 122 148 } 123 149 150 if (dirp) { 151 while ((dir_entry = readdir(dirp)) != NULL) if (dir_entry->d_type == 8) total_files++; 152 rewinddir(dirp); 153 printf("processing %u files.\n", (unsigned int) total_files); 154 } 155 156 if (!verbose && show_progress) { 157 printf("files processed: "); 158 fflush(stdout); 159 } 160 124 161 // process files 125 162 for (;;) { 163 // get next file content 126 164 if (dirp) { 127 165 if ((dir_entry = readdir(dirp)) == NULL) break; … … 138 176 } 139 177 140 if (verbose) printf(" file %s:\n", curfile);141 142 178 bstr = bstr_map(curfile); 143 179 num_of_files++; 144 180 145 md5sum1 = NULL; 146 md5sum2 = NULL; 147 spamsum1 = NULL; 148 spamsum2 = NULL; 149 cl = NULL; 150 151 // build md5sum trie 181 if (verbose) printf(" processing file %s.\n", curfile); 182 else if (show_progress) { 183 printf("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b"); 184 if (num_of_files < total_files) 185 printf("%05.2f%% (%04d clusters)", num_of_files/total_files*100, num_of_clusters); 186 else 187 printf("100.00%% (%04d clusters)\n", num_of_clusters); 188 fflush(stdout); 189 } 190 191 192 // calculate md5sum 152 193 if ((tmpbuf = (u_char *) mem_md5sum(bstr.data, bstr.len)) == NULL) { 153 194 fprintf(stderr, "Error - Unable to allocate memory: %m.\n"); 154 195 exit(EXIT_FAILURE); 155 196 } 197 156 198 if ((t = trie_find_memstr(&md5sum_trie, tmpbuf, strlen((char *)tmpbuf))) != NULL) { 199 157 200 // md5sum is already in trie 158 201 free(tmpbuf); 159 ((hash _list*)t->data)->cnt++;160 if (verbose) printf(" md5sum is %s (%u instances)\n", ((hash _list*)t->data)->hash, ((hash_list*)t->data)->cnt);202 ((hash*)t->data)->cnt++; 203 if (verbose) printf(" md5sum is %s (%u instances)\n", ((hash*)t->data)->md5sum, ((hash*)t->data)->cnt); 161 204 if (verbose) printf(" absolute match found.\n"); 162 163 /* increase cluster counter */164 // if (((hash_list*)t->data)->cl) ((hash_list*)t->data)->cl->cnt++;165 205 } else { 166 // insert md5sum into trie206 // md5sum not in trie, create new element 167 207 t = trie_memins(&md5sum_trie, tmpbuf, strlen((char *)tmpbuf), NULL); 168 md5sum_list = t->data = hashlist_new_entry(md5sum_list); 169 md5sum2 = (hash_list*)t->data; 170 171 ((hash_list*)t->data)->hashlen = strlen((char *)tmpbuf); 172 ((hash_list*)t->data)->hash = (char *) tmpbuf; 173 ((hash_list*)t->data)->cnt++; 174 if ((((hash_list*)t->data)->filename = strdup(curfile)) == NULL) { 208 if ((t->data = calloc(1, sizeof(hash))) == NULL) { 175 209 fprintf(stderr, "Error - Unable to allocate memory: %m.\n"); 176 } 177 if (verbose) printf(" md5sum is %s (%u instances)\n", md5sum2->hash, md5sum2->cnt); 178 179 180 //Â build spamsum trie 181 if ((tmpbuf = (u_char *) spamsum(bstr.data, bstr.len, 0)) == NULL) { 210 exit(EXIT_FAILURE); 211 } 212 ((hash*)t->data)->hashlen = 32; 213 ((hash*)t->data)->md5sum = (char *) tmpbuf; 214 ((hash*)t->data)->cnt++; 215 // set filename 216 if ((((hash*)t->data)->filename = strdup(curfile)) == NULL) { 217 fprintf(stderr, "Error - Unable to allocate memory: %m.\n"); 218 exit(EXIT_FAILURE); 219 } 220 // set spamsum hash 221 if ((((hash*)t->data)->spamsum = spamsum(bstr.data, bstr.len, 0)) == NULL) { 182 222 fprintf(stderr, "Error - Unable to allocate memoory: %m.\n"); 183 223 exit(EXIT_FAILURE); 184 224 } 185 if (((t = trie_find_memstr(&spamsum_trie, tmpbuf, strlen((char *)tmpbuf))) != NULL) && (t->data)) { 186 //spamsum is already in trie 187 free(tmpbuf); 188 spamsum_match.score = 100.0; 189 spamsum_match.entry = (hash_list*)t->data; 190 spamsum2 = spamsum_match.entry; 191 } else { 192 // find best match 193 spamsum_match = spamsum_match_list(spamsum_list, (char *) tmpbuf, 100); 194 195 // insert spamsum 196 t = trie_memins(&spamsum_trie, tmpbuf, strlen((char *)tmpbuf), NULL); 197 spamsum_list = t->data = hashlist_new_entry(spamsum_list); 198 spamsum2 = (hash_list*)t->data; 199 spamsum2->neighbour_hash = md5sum2; 200 201 ((hash_list*)t->data)->hashlen = strlen((char *)tmpbuf); 202 ((hash_list*)t->data)->hash = (char *) tmpbuf; 203 if ((((hash_list*)t->data)->filename = strdup(curfile)) == NULL) { 204 fprintf(stderr, "Error - Unable to allocate memory: %m.\n"); 205 } 206 } 207 if (verbose) printf(" highest spamsum match score is %.0f.\n", spamsum_match.score); 208 md5sum2->neighbour_hash = spamsum2; 209 spamsum1 = spamsum_match.entry; 210 211 if (spamsum1) { 212 md5sum1 = spamsum1->neighbour_hash; 213 214 if (spamsum_match.score >= cluster_radius) { 215 if ((cl = extend_cluster(md5sum1, md5sum2)) == NULL) { 216 fprintf(stderr, "Error - Unable to extend cluster.\n"); 225 if (verbose) printf(" md5sum is %s (%u instances)\n", ((hash*)t->data)->md5sum, ((hash*)t->data)->cnt); 226 227 // connect all clusters within range 228 for (cl = cluster_list; cl; cl = cl->next) { 229 for (cur_hq = cl->hq->head; cur_hq; cur_hq = cur_hq->next) 230 if ((score = spamsum_match(((hash*)t->data)->spamsum, ((hash*)cur_hq->data)->spamsum)) >= cluster_radius) { 231 if (!((hash*)t->data)->cl) { 232 add_entry_to_cluster(cl, (hash*)t->data); 233 break; 234 } else { 235 if (cl != ((hash*)t->data)->cl) { 236 clusters_merge(cl, ((hash*)t->data)->cl); 237 } 238 break; 239 } 240 } 241 } 242 243 // connect all outliers within range 244 for (cur_hq = outlierq->head; cur_hq; cur_hq = cur_hq->next) { 245 if ((score = spamsum_match(((hash*)t->data)->spamsum, ((hash*)cur_hq->data)->spamsum)) >= cluster_radius) { 246 // unlink match from outlier list 247 tmp_hq = cur_hq; 248 cur_hq = cur_hq->next; 249 if ((tmp_hash = hashq_unlink(outlierq, tmp_hq)) == NULL) { 250 fprintf(stderr, "Error - Unable to unlink outlier from queue.\n"); 217 251 exit(EXIT_FAILURE); 218 252 } 219 if (verbose) printf(" cluster has now %u elements.\n", cl->cnt); 220 } 221 } 222 223 ((hash_list*)t->data)->cnt++; 224 if (verbose) printf(" spamsum is %s (%u instances)\n", ((hash_list*)t->data)->hash, ((hash_list*)t->data)->cnt); 225 } 226 253 if (((hash*)t->data)->cl) { 254 // add other outliers to cluster 255 if (add_entry_to_cluster(((hash*)t->data)->cl, tmp_hash) == NULL) 256 hash_free(tmp_hash, 0); 257 } else { 258 // create new cluster of two outliers 259 create_cluster(((hash*)t->data), tmp_hash); 260 } 261 } 262 if (!cur_hq) break; 263 } 264 265 266 if (verbose) printf(" spamsum is %s (%u instances)\n", ((hash*)t->data)->spamsum, ((hash*)t->data)->cnt); 267 268 if (!((hash*)t->data)->cl) { 269 // insert outlier into queue 270 if (outlierq->size >= outlierq_max) { 271 tmp_hash = hashq_unlink(outlierq, outlierq->tail); 272 trie_del_memstr(&md5sum_trie, (u_char *) tmp_hash->md5sum, strlen(tmp_hash->md5sum)); 273 trie_del_memstr(&spamsum_trie, (u_char *) tmp_hash->spamsum, strlen(tmp_hash->spamsum)); 274 hash_free(tmp_hash, 0); 275 } 276 hashq_ins(outlierq, t->data, outlierq_max); 277 } 278 } 227 279 bstr_unmap(bstr); 228 280 if (verbose) printf("\n"); nebula/trunk/src/nebula.h
r1375 r1414 27 27 28 28 #include <sys/types.h> 29 #include <sys/queue.h> 29 30 30 31 #include "cluster.h" 32 #include "hashq.h" 31 33 32 u_char verbose, list_files; 33 u_int16_t num_of_files, num_of_clusters; 34 double cluster_radius; 35 trie_node spamsum_trie, md5sum_trie; 36 cluster *cluster_list; 34 u_char verbose, list_files; 35 int clusterq_max, outlierq_max; 36 u_int16_t num_of_clusters; 37 float num_of_files, total_files; 38 double cluster_radius; 39 trie_node spamsum_trie, md5sum_trie; 40 cluster *cluster_list; 41 hashq *outlierq; 37 42 38 43 #endif nebula/trunk/src/signals.c
r1376 r1414 30 30 31 31 void handle_signal(int sig) { 32 printf("Premature termination forced .\n");32 printf("Premature termination forced (signal %d caught).\n", sig); 33 33 cleanup(); 34 34 exit(EXIT_SUCCESS); nebula/trunk/src/spamsum.c
r1373 r1414 27 27 #include <stdio.h> 28 28 29 #include "hashlist.h"30 29 #include "spamsum.h" 31 30 … … 284 283 285 284 286 /* return the maximum match for a linked list containing a list of spamsums */287 ss_match spamsum_match_list(hash_list *list, const char *sum, u_int32_t threshold) {288 ss_match rv;289 u_int32_t score, best;290 291 score = 0;292 best = 0;293 memset(&rv, 0, sizeof(ss_match));294 295 while (list) {296 score = spamsum_match(sum, list->hash);297 if (score >= best) {298 best = score;299 rv.score = best;300 rv.entry = list;301 if (best >= threshold) break;302 }303 list = list->next;304 }305 306 return(rv);307 }308 309 310 285 /* return the maximum match for a file containing a list of spamsums */ 311 286 u_int32_t spamsum_match_db(const char *fname, const char *sum, u_int32_t threshold) { … … 413 388 100 being a excellent match. */ 414 389 score = 100 - score; 415 416 /* when the blocksize is small we don't want to exaggerate the match size */417 if (score > block_size/MIN_BLOCKSIZE * MIN(len1, len2)) {418 score = block_size/MIN_BLOCKSIZE * MIN(len1, len2);419 }420 390 421 391 return score; nebula/trunk/src/spamsum.h
r1373 r1414 26 26 #endif 27 27 28 #include "hashlist.h" 29 30 typedef struct ss_match { 31 double score; 32 hash_list *entry; 33 } ss_match; 28 #include "hashq.h" 34 29 35 30 char *spamsum(const u_char *in, size_t length, u_int32_t bsize); 36 31 u_int32_t spamsum_match(const char *str1, const char *str2); 37 ss_match spamsum_match_list(hash_list *list, const char *sum, u_int32_t threshold);38 32 u_int32_t spamsum_match_db(const char *fname, const char *sum, u_int32_t threshold); 39 33 nebula/trunk/src/trie.c
r1376 r1414 30 30 31 31 32 void print_trie(const trie t, int depth) { 33 int i, j; 34 for(i=0; i<t->childlist_len; i++) { 35 if (t->childlist_len > 1) for (j=0; j<depth; j++) printf(" "); 36 putchar(t->childlist[i].key); 37 print_trie(&t->childlist[i], depth+1); 38 if (t->childlist_len > 1) putchar('\n'); 39 } 40 return; 41 } 42 43 32 44 /* find and return trie node for key */ 33 45 trie trie_find_node(const trie t, const u_int16_t size, const u_char key) { … … 35 47 u_int16_t high, low; 36 48 37 if (t == NULL ) return(NULL);49 if (t == NULL || size == 0) return(NULL); 38 50 39 51 for (low=0, high=(size-1); high-low>0; ) { … … 55 67 56 68 node = t; 57 for (k=0; k<n && node != NULL; k++) 69 for (k=0; k<n && node != NULL; k++) { 58 70 node = trie_find_node(node->childlist, node->childlist_len, data[k]); 71 if (node);// printf("%d: node is '%c'\n", k, node->key); 72 else break; 73 } 59 74 60 75 return(node); 76 } 77 78 79 /* delete a path from a trie */ 80 int trie_del_memstr(trie t, const u_char* data, u_int16_t n) { 81 u_int16_t i = 0; 82 u_int16_t high, low; 83 trie node = t; 84 85 if (!t || !data) return(0); 86 if (!t->childlist) return(1); 87 88 i = 0; 89 for (low=0, high=(node->childlist_len-1); high-low>0; ) { 90 i = (high+low)/2; 91 if (*data <= node->childlist[i].key) high = i; 92 else{ 93 i = (high+low+1)/2; 94 low = i; 95 } 96 } 97 98 if (node->childlist[i].key == data[0]) { 99 /* check subtrie recursively */ 100 if (!trie_del_memstr(&node->childlist[i], &data[1], n-1)) return(0); 101 102 /* if subpath was deleted, delete node from childlist */ 103 node->childlist_len--; 104 if (node->childlist_len) { 105 if (node->childlist_len > i) memmove(&(node->childlist[i]), &(node->childlist[i+1]), (node->childlist_len-i) * sizeof(trie_node)); 106 if ((node->childlist = realloc(node->childlist, node->childlist_len * sizeof(trie_node))) == NULL) { 107 fprintf(stderr, "Error - Unable to reallocate memory: %m.\n"); 108 exit(EXIT_FAILURE); 109 } 110 return(0); 111 } else { 112 free(node->childlist); 113 node->childlist = NULL; 114 return(1); 115 } 116 } 117 118 return(0); 61 119 } 62 120 nebula/trunk/src/trie.h
r1373 r1414 37 37 38 38 39 void print_trie(const trie t, int depth); 40 39 41 trie trie_find_node(const trie t, const u_int16_t size, const u_char key); 40 42 trie trie_find_memstr(trie t, const u_char* data, u_int16_t n); 43 int trie_del_memstr(trie t, const u_char* data, u_int16_t n); 41 44 trie trie_insert_node(trie parent, const u_char key); 42 45 trie trie_memins(trie t, const u_char *data, ssize_t n, void(*cbfn)(trie t)); nebula/trunk/src/util.c
r1375 r1414 28 28 29 29 #include "cluster.h" 30 #include "hashq.h" 30 31 #include "nebula.h" 31 32 #include "trie.h" … … 75 76 void cleanup(void) { 76 77 // free data structures 77 printf("%u files form %u clustes.\n-----------------------\n", num_of_files, num_of_clusters); 78 clusterlist_delete(cluster_list); 78 printf("%u files form %u clustes.\n-----------------------\n", (unsigned int) num_of_files, num_of_clusters); 79 79 80 hashlist_delete(spamsum_list); 81 hashlist_delete(md5sum_list); 80 hashq_free(outlierq, 0, hash_free); 81 clusterlist_delete(cluster_list, list_files); 82 trie_delete(md5sum_trie.childlist, md5sum_trie.childlist_len, NULL); 82 83 trie_delete(spamsum_trie.childlist, spamsum_trie.childlist_len, NULL); 83 trie_delete(md5sum_trie.childlist, md5sum_trie.childlist_len, NULL);84 84 85 85 return; nebula/trunk/src/util.h
r1375 r1414 27 27 28 28 typedef struct bstring { 29 u_int 16_t len;29 u_int32_t len; 30 30 u_char *data; 31 31 } bstring;
