Wireshark  4.3.0
The Wireshark network protocol analyzer
tvbparse.h
Go to the documentation of this file.
1 
14 /*
15  The intention behind this is to ease the writing of dissectors that have to
16  parse text without the need of writing into buffers.
17 
18  It was originally written to avoid using lex and yacc for the xml dissector.
19 
20  the parser is able to look for wanted elements these can be:
21 
22  simple tokens:
23  - a char out of a string of needles
24  - a char not belonging to a string of needles
25  - a sequence of chars that belong to a set of chars
26  - a sequence of chars that do not belong to a set of chars
27  - a string
28  - a caseless string
29  - all the characters up to a certain wanted element (included or excluded)
30 
31  composed elements:
32  - one of a given group of wanted elements
33  - a sequence of wanted elements
34  - some (at least one) instances of a wanted element
35 
36  Once a wanted element is successfully extracted, by either tvbparse_get or
37  tvbparse_find, the parser will invoke a given callback
38  before and another one after every of its component's subelement's callbacks
39  are being called.
40 
41  If tvbparse_get or tvbparse_find fail to extract the wanted element the
42  subelements callbacks are not going to be invoked.
43 
44  The wanted elements are instantiated once by the proto_register_xxx function.
45 
46  The parser is instantiated for every packet and it mantains its state.
47 
48  The element's data is destroyed before the next packet is dissected.
49  */
50 
51 #ifndef _TVB_PARSE_H_
52 #define _TVB_PARSE_H_
53 
54 #include <epan/tvbuff.h>
55 #include <glib.h>
56 #include "ws_symbol_export.h"
57 
58 typedef struct _tvbparse_elem_t tvbparse_elem_t;
60 typedef struct _tvbparse_t tvbparse_t;
61 
62 
63 /*
64  * a callback function to be called before or after an element has been
65  * successfuly extracted.
66  *
67  * Note that if the token belongs to a composed token the callbacks of the
68  * components won't be called unless the composed token is successfully
69  * extracted.
70  *
71  * tvbparse_data: the private data of the parser
72  * wanted_data: the private data of the wanted element
73  * elem: the extracted element
74  */
75 typedef void (*tvbparse_action_t)(void* tvbparse_data, const void* wanted_data, struct _tvbparse_elem_t* elem);
76 
77 typedef int (*tvbparse_condition_t)
78 (tvbparse_t*, const int,
79  const tvbparse_wanted_t*,
80  tvbparse_elem_t**);
81 
82 
83 typedef enum {
84  TP_UNTIL_INCLUDE, /* last elem is included, its span is spent by the parser */
85  TP_UNTIL_SPEND, /* last elem is not included, but its span is spent by the parser */
86  TP_UNTIL_LEAVE /* last elem is not included, neither its span is spent by the parser */
87 } until_mode_t;
88 
89 
91  int id;
92  tvbparse_condition_t condition;
93 
94  union {
95  const gchar* str;
96  struct _tvbparse_wanted_t** handle;
97  struct {
98  union {
99  gint64 i;
100  guint64 u;
101  gdouble f;
102  } value;
103  gboolean (*comp)(void*,const void*);
104  void* (*extract)(tvbuff_t*,guint);
105  } number;
106  enum ftenum ftenum;
107  struct {
108  until_mode_t mode;
109  const tvbparse_wanted_t* subelem;
110  } until;
111  struct {
112  wmem_map_t* table;
113  struct _tvbparse_wanted_t* key;
114  struct _tvbparse_wanted_t* other;
115  } hash;
116  GPtrArray* elems;
117  const tvbparse_wanted_t* subelem;
118  void* p;
119  } control;
120 
121  int len;
122 
123  guint min;
124  guint max;
125 
126  const void* data;
127 
128  tvbparse_action_t before;
129  tvbparse_action_t after;
130 };
131 
132 /* an instance of a per packet parser */
133 struct _tvbparse_t {
134  wmem_allocator_t* scope;
135  tvbuff_t* tvb;
136  int offset;
137  int end_offset;
138  void* data;
139  const tvbparse_wanted_t* ignore;
140  int recursion_depth;
141 };
142 
143 
144 /* a matching token returned by either tvbparser_get or tvb_parser_find */
146  int id;
147 
148  tvbparse_t* parser;
149  tvbuff_t* tvb;
150  int offset;
151  int len;
152 
153  void* data;
154 
155  struct _tvbparse_elem_t* sub;
156 
157  struct _tvbparse_elem_t* next;
158  struct _tvbparse_elem_t* last;
159 
160  const tvbparse_wanted_t* wanted;
161 };
162 
163 
164 /*
165  * definition of wanted token types
166  *
167  * the following functions define the tokens we will be able to look for in a tvb
168  * common parameters are:
169  *
170  * id: an arbitrary id that will be copied to the eventual token (don't use 0)
171  * private_data: persistent data to be passed to the callback action (wanted_data)
172  * before_cb: an callback function to be called before those of the subelements
173  * after_cb: an callback function to be called after those of the subelements
174  */
175 
176 
177 /*
178  * a char element.
179  *
180  * When looked for it returns a simple element one character long if the char
181  * at the current offset matches one of the needles.
182  */
183 WS_DLL_PUBLIC
184 tvbparse_wanted_t* tvbparse_char(const int id,
185  const gchar* needles,
186  const void* private_data,
187  tvbparse_action_t before_cb,
188  tvbparse_action_t after_cb);
189 
190 /*
191  * a not_char element.
192  *
193  * When looked for it returns a simple element one character long if the char
194  * at the current offset does not match one of the needles.
195  */
196 WS_DLL_PUBLIC
197 tvbparse_wanted_t* tvbparse_not_char(const int id,
198  const gchar* needle,
199  const void* private_data,
200  tvbparse_action_t before_cb,
201  tvbparse_action_t after_cb);
202 
203 /*
204  * a chars element
205  *
206  * When looked for it returns a simple element one or more characters long if
207  * one or more char(s) starting from the current offset match one of the needles.
208  * An element will be returned if at least min_len chars are given (1 if it's 0)
209  * It will get at most max_len chars or as much as it can if max_len is 0.
210  */
211 WS_DLL_PUBLIC
212 tvbparse_wanted_t* tvbparse_chars(const int id,
213  const guint min_len,
214  const guint max_len,
215  const gchar* needles,
216  const void* private_data,
217  tvbparse_action_t before_cb,
218  tvbparse_action_t after_cb);
219 
220 /*
221  * a not_chars element
222  *
223  * When looked for it returns a simple element one or more characters long if
224  * one or more char(s) starting from the current offset do not match one of the
225  * needles.
226  * An element will be returned if at least min_len chars are given (1 if it's 0)
227  * It will get at most max_len chars or as much as it can if max_len is 0.
228  */
229 WS_DLL_PUBLIC
230 tvbparse_wanted_t* tvbparse_not_chars(const int id,
231  const guint min_len,
232  const guint max_len,
233  const gchar* needles,
234  const void* private_data,
235  tvbparse_action_t before_cb,
236  tvbparse_action_t after_cb);
237 
238 /*
239  * a string element
240  *
241  * When looked for it returns a simple element if we have the given string at
242  * the current offset
243  */
244 WS_DLL_PUBLIC
245 tvbparse_wanted_t* tvbparse_string(const int id,
246  const gchar* string,
247  const void* private_data,
248  tvbparse_action_t before_cb,
249  tvbparse_action_t after_cb);
250 
251 /*
252  * casestring
253  *
254  * When looked for it returns a simple element if we have a matching string at
255  * the current offset
256  */
257 WS_DLL_PUBLIC
258 tvbparse_wanted_t* tvbparse_casestring(const int id,
259  const gchar* str,
260  const void* data,
261  tvbparse_action_t before_cb,
262  tvbparse_action_t after_cb);
263 
264 /*
265  * until
266  *
267  * When looked for it returns a simple element containing all the characters
268  * found until the first match of the ending element if the ending element is
269  * found.
270  *
271  * When looking for until elements it calls tvbparse_find so it can be very slow.
272  *
273  * It won't have a subelement, the ending's callbacks won't get called.
274  */
275 
276 /*
277  * op_mode values determine how the terminating element and the current offset
278  * of the parser are handled
279  */
280 WS_DLL_PUBLIC
281 tvbparse_wanted_t* tvbparse_until(const int id,
282  const void* private_data,
283  tvbparse_action_t before_cb,
284  tvbparse_action_t after_cb,
285  const tvbparse_wanted_t* ending,
286  until_mode_t until_mode);
287 
288 /*
289  * one_of
290  *
291  * When looked for it will try to match to the given candidates and return a
292  * composed element whose subelement is the first match.
293  *
294  * The list of candidates is terminated with a NULL
295  *
296  */
297 WS_DLL_PUBLIC
298 tvbparse_wanted_t* tvbparse_set_oneof(const int id,
299  const void* private_data,
300  tvbparse_action_t before_cb,
301  tvbparse_action_t after_cb,
302  ...);
303 
304 /*
305  * hashed
306  */
307 WS_DLL_PUBLIC
308 tvbparse_wanted_t* tvbparse_hashed(const int id,
309  const void* data,
310  tvbparse_action_t before_cb,
311  tvbparse_action_t after_cb,
312  tvbparse_wanted_t* key,
313  tvbparse_wanted_t* other,
314  ...);
315 
316 WS_DLL_PUBLIC
317 void tvbparse_hashed_add(tvbparse_wanted_t* w, ...);
318 
319 /*
320  * sequence
321  *
322  * When looked for it will try to match in order all the given candidates. If
323  * every candidate is found in the given order it will return a composed
324  * element whose subelements are the matcheed elemets.
325  *
326  * The list of candidates is terminated with a NULL.
327  *
328  */
329 WS_DLL_PUBLIC
330 tvbparse_wanted_t* tvbparse_set_seq(const int id,
331  const void* private_data,
332  tvbparse_action_t before_cb,
333  tvbparse_action_t after_cb,
334  ...);
335 
336 /*
337  * some
338  *
339  * When looked for it will try to match the given candidate at least min times
340  * and at most max times. If the given candidate is matched at least min times
341  * a composed element is returned.
342  *
343  */
344 WS_DLL_PUBLIC
345 tvbparse_wanted_t* tvbparse_some(const int id,
346  const guint min,
347  const guint max,
348  const void* private_data,
349  tvbparse_action_t before_cb,
350  tvbparse_action_t after_cb,
351  const tvbparse_wanted_t* wanted);
352 
353 #define tvbparse_one_or_more(id, private_data, before_cb, after_cb, wanted)\
354  tvbparse_some(id, 1, G_MAXINT, private_data, before_cb, after_cb, wanted)
355 
356 
357 /*
358  * handle
359  *
360  * this is a pointer to a pointer to a wanted element (that might have not
361  * been initialized yet) so that recursive structures
362  */
363 WS_DLL_PUBLIC
364 tvbparse_wanted_t* tvbparse_handle(tvbparse_wanted_t** handle);
365 
366 /* quoted
367  * this is a composed candidate, that will try to match a quoted string
368  * (included the quotes) including into it every escaped quote.
369  *
370  * C strings are matched with tvbparse_quoted(-1,NULL,NULL,NULL,"\"","\\")
371  */
372 WS_DLL_PUBLIC
373 tvbparse_wanted_t* tvbparse_quoted(const int id,
374  const void* data,
375  tvbparse_action_t before_cb,
376  tvbparse_action_t after_cb,
377  const char quote,
378  const char escape);
379 
380 /*
381  * a helper callback for quoted strings that will shrink the token to contain
382  * only the string andnot the quotes
383  */
384 WS_DLL_PUBLIC
385 void tvbparse_shrink_token_cb(void* tvbparse_data,
386  const void* wanted_data,
387  tvbparse_elem_t* tok);
388 
389 
390 
391 
392 /* initialize the parser (at every packet)
393  * scope: memory scope/pool
394  * tvb: what are we parsing?
395  * offset: from where
396  * len: for how many bytes
397  * private_data: will be passed to the action callbacks
398  * ignore: a wanted token type to be ignored (the associated cb WILL be called when it matches)
399  */
400 WS_DLL_PUBLIC
401 tvbparse_t* tvbparse_init(wmem_allocator_t *scope,
402  tvbuff_t* tvb,
403  const int offset,
404  int len,
405  void* private_data,
406  const tvbparse_wanted_t* ignore);
407 
408 /* reset the parser */
409 WS_DLL_PUBLIC
410 gboolean tvbparse_reset(tvbparse_t* tt, const int offset, int len);
411 
412 WS_DLL_PUBLIC
413 guint tvbparse_curr_offset(tvbparse_t* tt);
414 guint tvbparse_len_left(tvbparse_t* tt);
415 
416 
417 
418 /*
419  * This will look for the wanted token at the current offset or after any given
420  * number of ignored tokens returning FALSE if there's no match or TRUE if there
421  * is a match.
422  * The parser will be left in its original state and no callbacks will be called.
423  */
424 WS_DLL_PUBLIC
425 gboolean tvbparse_peek(tvbparse_t* tt,
426  const tvbparse_wanted_t* wanted);
427 
428 /*
429  * This will look for the wanted token at the current offset or after any given
430  * number of ignored tokens returning NULL if there's no match.
431  * if there is a match it will set the offset of the current parser after
432  * the end of the token
433  */
434 WS_DLL_PUBLIC
435 tvbparse_elem_t* tvbparse_get(tvbparse_t* tt,
436  const tvbparse_wanted_t* wanted);
437 
438 /*
439  * Like tvbparse_get but this will look for a wanted token even beyond the
440  * current offset.
441  * This function is slow.
442  */
443 WS_DLL_PUBLIC
444 tvbparse_elem_t* tvbparse_find(tvbparse_t* tt,
445  const tvbparse_wanted_t* wanted);
446 
447 
448 WS_DLL_PUBLIC
449 void tvbparse_tree_add_elem(proto_tree* tree, tvbparse_elem_t* curr);
450 
451 #endif
Definition: proto.h:897
Definition: tvbparse.h:145
Definition: tvbparse.h:133
Definition: tvbparse.h:90
Definition: wmem_allocator.h:27
Definition: wmem_map.c:44
Definition: tvbuff-int.h:35