nemea-common  1.6.3
prefix_tree.h
Go to the documentation of this file.
1 
7 /*
8  * Copyright (C) 2014 CESNET
9  *
10  * LICENSE TERMS
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  * notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  * notice, this list of conditions and the following disclaimer in
19  * the documentation and/or other materials provided with the
20  * distribution.
21  * 3. Neither the name of the Company nor the names of its contributors
22  * may be used to endorse or promote products derived from this
23  * software without specific prior written permission.
24  *
25  * ALTERNATIVELY, provided that this notice is retained in full, this
26  * product may be distributed under the terms of the GNU General Public
27  * License (GPL) version 2 or later, in which case the provisions
28  * of the GPL apply INSTEAD OF those given above.
29  *
30  * This software is provided ``as is'', and any express or implied
31  * warranties, including, but not limited to, the implied warranties of
32  * merchantability and fitness for a particular purpose are disclaimed.
33  * In no event shall the company or contributors be liable for any
34  * direct, indirect, incidental, special, exemplary, or consequential
35  * damages (including, but not limited to, procurement of substitute
36  * goods or services; loss of use, data, or profits; or business
37  * interruption) however caused and on any theory of liability, whether
38  * in contract, strict liability, or tort (including negligence or
39  * otherwise) arising in any way out of the use of this software, even
40  * if advised of the possibility of such damage.
41  *
42  */
43 #ifndef _PREFIX_TREE_
44 #define _PREFIX_TREE_
45 
46 
47 
48 #include <stdio.h>
49 #include <stdint.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include <getopt.h>
53 #include <time.h>
54 #include <inttypes.h>
55 #include <sys/types.h>
56 #include <sys/stat.h>
57 //#include "tunnel_detection_dns_structs.h"
58 
59 
60 
65 #define COUNT_OF_LETTERS_IN_DOMAIN 256 /*< Max count of letter in domain. */
66 #define MAX_SIZE_OF_DOMAIN 256 /*< Max length of domain */
67 #define MAX_SIZE_OF_DEGREE 5 /*< Max size of stored information */
68 #define ADD_TO_LIST_FROM_COUNT_OF_SEARCH 20 /*< Default number of histogram size. */
69 #define ADD_TO_LIST_FROM_COUNT_OF_DIFFERENT_SUBDOMAINS 10 /*< Default number of histogram size. */
70 #define MAX_COUNT_TO_BE_IN_JUST_ONE_SEARCHER 10 /*< Default number of histogram size. */
71 #define PREFIX 1
72 #define SUFFIX 0
73 /* /} */
74 
78 typedef enum {
82 
86 typedef enum {
90 
93 
99  unsigned char length; /*< length of stored string */
100  unsigned int count_of_string; /*< count of different substrings */
101  unsigned char count_of_children;
102  char * string; /*< stored string in reverse way (end of string is on postion 0)*/
103  prefix_tree_inner_node_t * parent; /*< Pointer to parent */
104  prefix_tree_domain_t * parent_is_domain; /*< Pointer to parent, if NULL parent is inner node, else parent is domain name node*/
105  prefix_tree_inner_node_t ** child; /*< Pointer to descendants*/
106  prefix_tree_domain_t * domain; /*< if this string is domain, this variable conatin poiter to domain name structure*/
107  void * value; /*< pointer to user value */
108 };
109 
114 typedef struct node_domain_extension_t {
115  prefix_tree_domain_t * most_used_domain_less; /*< position in linked list, pointer to next domain, which has lower count of seatching*/
116  prefix_tree_domain_t * most_used_domain_more; /*< position in linked list, pointer to previous domain, which has higher count of seatching*/
117  prefix_tree_domain_t * most_subdomains_less; /*< position in linked list, pointer to next domain, which has lower count of descendants*/
118  prefix_tree_domain_t * most_subdomains_more; /*< position in linked list, pointer to previous domain, which has higher count of descendants*/
120 
126  unsigned char exception; /*< 1 for exception in detection, 0 for classic domain */
127  unsigned char degree; /*< degree of domain */
128  unsigned int count_of_insert; /*< count of searching this domain name */
129  unsigned int count_of_different_subdomains; /*< count of descendants - subdomains */
130  prefix_tree_inner_node_t * parent; /*< pointer to parent (last character of domain) */
131  prefix_tree_domain_t * parent_domain; /*< pointer to parent (domain name) */
132  prefix_tree_inner_node_t *child; /*< pointer to descendats */
133  void * value; /*< pointer to user value - specified by init function */
134  node_domain_extension_t * domain_extension; /*< pointer to structure with linked list of domains*/
135 };
136 
142 typedef struct tree_domain_extension_t {
143  prefix_tree_domain_t * list_of_most_used_domains; /*< list of most searched domains*/
144  prefix_tree_domain_t * list_of_most_used_domains_end; /*< list of most searched domains - pointer to end of the list*/
145  prefix_tree_domain_t * list_of_most_unused_domains; /*< list of most searched domains end - most unsearched domains*/
146  prefix_tree_domain_t ** list_of_most_subdomains; /*< list of domains, which has most subdomains, it is 2D linked list. 1.D is degree of domain name, 2. is linked list*/
147  prefix_tree_domain_t ** list_of_most_subdomains_end; /*< list of domains, which has most subdomains, it is 2D linked list. Pointers on the end of lists*/
149 
155 typedef struct prefix_tree_t {
156  prefix_tree_inner_node_t * root; /*< Pointer to root node. */
157  unsigned int size_of_value; /*< size of value stored for every domain node */
158  int domain_separator; /*< separator in text, which creares domain node */
159  unsigned char prefix_suffix; /*< prefix or suffix tree */
160  unsigned int count_of_domain_searched_just_ones; /*< Count of domain, witch were searcehd just ones */
161  unsigned int count_of_inserting; /*< Count of inserting and searching (all domains) */
162  unsigned int count_of_inserting_for_just_ones; /*< Count of inserting, but for percent value with variable count_of_domain_searched_just_ones*/
163  unsigned int count_of_different_domains; /*< Count of unique domains*/
164  tree_domain_extension_t * domain_extension; /*< Lists of most used domains etc.*/
166 } prefix_tree_t;
167 
168 
175 int prefix_tree_map_character_to_number(unsigned char letter);
176 
186 prefix_tree_t * prefix_tree_initialize(unsigned char prefix_suffix, unsigned int size_of_value, int domain_separator, int domain_extension, relaxation_after_delete relaxation);
194 void prefix_tree_decrease_counters_deleted_inner_node(prefix_tree_inner_node_t * node, int deleted_strings, int deleted_domains);
195 
203 
212 
221 
228 
236 
246 
255 
263 
271 
281 int prefix_tree_count_to_domain_separator(const char *string, int length, int domain_separator, char prefix);
282 
293 prefix_tree_domain_t * prefix_tree_add_new_item(prefix_tree_inner_node_t * node ,prefix_tree_domain_t * domain ,const char *string, int length, prefix_tree_t * tree);
294 
295 
305 
314 char * prefix_tree_read_string(prefix_tree_t * tree, prefix_tree_domain_t * domain, char * string);
315 
316 
317 
326 char * prefix_tree_read_inner_node(prefix_tree_t * tree, prefix_tree_inner_node_t * node, char * string);
327 
339 prefix_tree_domain_t * prefix_tree_add_domain_recursive_prefix(prefix_tree_inner_node_t * node, prefix_tree_domain_t * domain_parent, const char *string, int length, prefix_tree_t * tree);
340 
341 
354 
363 prefix_tree_domain_t * prefix_tree_insert(prefix_tree_t * tree, const char *string, int length);
364 
373 prefix_tree_domain_t * prefix_tree_search(prefix_tree_t * tree, const char *string, int length);
374 
383 prefix_tree_domain_t * prefix_tree_add_string_exception(prefix_tree_t * tree, const char *string, int length);
384 
393 int prefix_tree_is_string_in_exception(prefix_tree_t * tree, const char *string, int length);
394 
403 
404 
412 
413 
414 
415 
416 
417 
418 #endif /* _PREFIX_TREE_ */
unsigned int count_of_domain_searched_just_ones
Definition: prefix_tree.h:160
Structure - Prefix tree main structure Structure used to keep information about prefix tree...
Definition: prefix_tree.h:155
prefix_tree_inner_node_t ** child
Definition: prefix_tree.h:105
prefix_tree_domain_t * list_of_most_used_domains
Definition: prefix_tree.h:143
prefix_tree_inner_node_t * prefix_tree_most_substring(prefix_tree_inner_node_t *node)
Returns inner node with most of different strings Function returns pointer to inner node...
domain_extension
Definition: prefix_tree.h:78
Structure - domain extension Structure keeps pointers in linked list to other domain nodes...
Definition: prefix_tree.h:114
prefix_tree_inner_node_t * child
Definition: prefix_tree.h:132
prefix_tree_domain_t * prefix_tree_add_domain_recursive_prefix(prefix_tree_inner_node_t *node, prefix_tree_domain_t *domain_parent, const char *string, int length, prefix_tree_t *tree)
Add domain recursive (prefix tree) Function adds domain to the prefix tree. This function is called f...
unsigned int count_of_string
Definition: prefix_tree.h:100
double prefix_tree_most_used_domain_percent_of_subdomains(prefix_tree_t *tree, int depth)
Statistic function percent od subdomains in certain depth Function returns percent of subdomains in m...
void prefix_tree_decrease_counters_deleted_inner_node(prefix_tree_inner_node_t *node, int deleted_strings, int deleted_domains)
Decrease counters in prefix tree, if deleting a node. Function that goes from the node to the root an...
prefix_tree_domain_t * list_of_most_unused_domains
Definition: prefix_tree.h:145
Structure - Prefix tree - inner node structure Structure used to keep information about inner node of...
Definition: prefix_tree.h:98
relaxation_after_delete relaxation
Definition: prefix_tree.h:165
prefix_tree_inner_node_t * join_nodes(prefix_tree_inner_node_t *node)
Function joins two nodes into one. If parent has just one child, this function join them into one nod...
void prefix_tree_destroy(prefix_tree_t *tree)
Destroy function for prefix tree Function destroy prefix tree and all item inside.
prefix_tree_domain_t * prefix_tree_new_domain(prefix_tree_inner_node_t *node, prefix_tree_domain_t *domain_parent, prefix_tree_t *tree)
Create domain node structure Function Create domian and connects it to the tree.
int prefix_tree_destroy_recursive(prefix_tree_t *tree, prefix_tree_inner_node_t *node)
Destroy all items in prefix tree Function destroy recursively destroies all nodes.
prefix_tree_domain_t * most_subdomains_more
Definition: prefix_tree.h:118
prefix_tree_inner_node_t * prefix_tree_new_node(prefix_tree_inner_node_t *parent, int map_number)
Create inner node structure Function Create inned node and connects it to the tree.
node_domain_extension_t * domain_extension
Definition: prefix_tree.h:134
prefix_tree_t * prefix_tree_initialize(unsigned char prefix_suffix, unsigned int size_of_value, int domain_separator, int domain_extension, relaxation_after_delete relaxation)
Init function for prefix tree Function that incialize prefix tree.
int prefix_tree_map_character_to_number(unsigned char letter)
Map character to index Function maps character to index in descendants.
prefix_tree_inner_node_t * prefix_tree_add_children_array(prefix_tree_inner_node_t *parent)
Alloc memory for descendats in inner node Function allocs memory for descendats in inner node...
unsigned int count_of_inserting
Definition: prefix_tree.h:161
prefix_tree_inner_node_t * parent
Definition: prefix_tree.h:130
char * prefix_tree_read_inner_node(prefix_tree_t *tree, prefix_tree_inner_node_t *node, char *string)
Read string from inner node Function return string stored in given inner node.
prefix_tree_domain_t * most_used_domain_more
Definition: prefix_tree.h:116
unsigned int count_of_inserting_for_just_ones
Definition: prefix_tree.h:162
unsigned char length
Definition: prefix_tree.h:99
void prefix_tree_delete_inner_node(prefix_tree_t *tree, prefix_tree_inner_node_t *node)
Function which removes inner node and his descendats. Function will erase specified node and all his ...
prefix_tree_domain_t * most_subdomains_less
Definition: prefix_tree.h:117
tree_domain_extension_t * domain_extension
Definition: prefix_tree.h:164
prefix_tree_domain_t * parent_is_domain
Definition: prefix_tree.h:104
Structure - Prefix tree - domain name structure Structure used to keep information about domain names...
Definition: prefix_tree.h:125
prefix_tree_domain_t * domain
Definition: prefix_tree.h:106
prefix_tree_domain_t * prefix_tree_search(prefix_tree_t *tree, const char *string, int length)
Seacrh domain in prefix tree Function adds domain to the prefix tree.
int prefix_tree_count_to_domain_separator(const char *string, int length, int domain_separator, char prefix)
Count length of string to dot Function counts length of string to dot.
prefix_tree_domain_t ** list_of_most_subdomains_end
Definition: prefix_tree.h:147
Structure - Domain names extension Structure used to keep lists of most searched domains, subdomains and least searched subdomains information about prexix tree.
Definition: prefix_tree.h:142
prefix_tree_inner_node_t * prefix_tree_new_node_parent_is_domain(prefix_tree_domain_t *domain)
Create descendant of domain Function creates descendant of domain, (domain has other subdomains)...
prefix_tree_inner_node_t * parent
Definition: prefix_tree.h:103
unsigned int size_of_value
Definition: prefix_tree.h:157
prefix_tree_domain_t * prefix_tree_add_new_item(prefix_tree_inner_node_t *node, prefix_tree_domain_t *domain, const char *string, int length, prefix_tree_t *tree)
Add new item to prefix tree Function add new item to prefix tree (place where to add new domain has t...
relaxation_after_delete
Definition: prefix_tree.h:86
prefix_tree_domain_t ** list_of_most_subdomains
Definition: prefix_tree.h:146
prefix_tree_inner_node_t * prefix_tree_split_node_into_two(prefix_tree_inner_node_t *node, int index)
Split node into two nodes Function splits node into two nodes, on the given position. This function is needed when inserting new node, which has coomon part of string with some node.
prefix_tree_domain_t * most_used_domain_less
Definition: prefix_tree.h:115
unsigned int count_of_insert
Definition: prefix_tree.h:128
prefix_tree_domain_t * prefix_tree_add_string_exception(prefix_tree_t *tree, const char *string, int length)
Add domain to prefix tree and set it to the exception state Function adds domain to the prefix tree a...
void prefix_tree_recursive_plus_domain(prefix_tree_domain_t *domain_parent, prefix_tree_t *tree)
Recursive change info about parent doimains Function actualize information in parent domains...
struct node_domain_extension_t node_domain_extension_t
Structure - domain extension Structure keeps pointers in linked list to other domain nodes...
struct prefix_tree_t prefix_tree_t
Structure - Prefix tree main structure Structure used to keep information about prefix tree...
prefix_tree_domain_t * parent_domain
Definition: prefix_tree.h:131
unsigned char exception
Definition: prefix_tree.h:126
unsigned char degree
Definition: prefix_tree.h:127
prefix_tree_domain_t * prefix_tree_insert(prefix_tree_t *tree, const char *string, int length)
Add domain to prefix tree Function adds domain to the prefix tree.
unsigned int count_of_different_subdomains
Definition: prefix_tree.h:129
prefix_tree_domain_t * prefix_tree_add_domain_recursive_suffix(prefix_tree_inner_node_t *node, prefix_tree_domain_t *domain_parent, const char *string, int length, prefix_tree_t *tree)
Add domain recursive (suffix tree) Function adds domain to the prefix tree. This function is called f...
int prefix_tree_is_string_in_exception(prefix_tree_t *tree, const char *string, int length)
Test domain if is in exception state Function tests domain if is in exception state.
struct tree_domain_extension_t tree_domain_extension_t
Structure - Domain names extension Structure used to keep lists of most searched domains, subdomains and least searched subdomains information about prexix tree.
prefix_tree_domain_t * list_of_most_used_domains_end
Definition: prefix_tree.h:144
unsigned int count_of_different_domains
Definition: prefix_tree.h:163
unsigned char prefix_suffix
Definition: prefix_tree.h:159
int domain_separator
Definition: prefix_tree.h:158
unsigned char count_of_children
Definition: prefix_tree.h:101
prefix_tree_inner_node_t * root
Definition: prefix_tree.h:156
char * prefix_tree_read_string(prefix_tree_t *tree, prefix_tree_domain_t *domain, char *string)
Read domain from tree Function return string with the domain name.