1"""The `suricata_check.utils.regex` module contains regular expressions for matching various parts of rules."""
2
3import importlib.util
4import logging
5from collections.abc import Iterable, Sequence
6from functools import lru_cache
7
8import idstools.rule
9
10_logger = logging.getLogger(__name__)
11
12# Import the fastest regex provider available:
13if importlib.util.find_spec("regex") is not None:
14 _logger.info("Detected regex module as installed, using it.")
15 import regex as _regex_provider
16else:
17 _logger.warning(
18 """Did not detect regex module as installed, using re instead.
19To increase suricata-check processing speed, consider isntalling the regex module \
20by running `pip install suricata-check[performance]`.""",
21 )
22 import re as _regex_provider
23
24LRU_CACHE_SIZE = 10
25
26ADDRESS_GROUPS = (
27 "HOME_NET",
28 "EXTERNAL_NET",
29 "HTTP_SERVERS",
30 "SMTP_SERVERS",
31 "SQL_SERVERS",
32 "DNS_SERVERS",
33 "TELNET_SERVERS",
34 "AIM_SERVERS",
35 "DC_SERVERS",
36 "DNP3_SERVER",
37 "DNP3_CLIENT",
38 "MODBUS_CLIENT",
39 "MODBUS_SERVER",
40 "ENIP_CLIENT",
41 "ENIP_SERVER",
42)
43
44
45PORT_GROUPS = (
46 "HTTP_PORTS",
47 "SHELLCODE_PORTS",
48 "ORACLE_PORTS",
49 "SSH_PORTS",
50 "DNP3_PORTS",
51 "MODBUS_PORTS",
52 "FILE_DATA_PORTS",
53 "FTP_PORTS",
54 "GENEVE_PORTS",
55 "VXLAN_PORTS",
56 "TEREDO_PORTS",
57)
58
59ALL_VARIABLES = ADDRESS_GROUPS + PORT_GROUPS
60
61CLASSTYPES = (
62 "not-suspicious",
63 "unknown",
64 "bad-unknown",
65 "attempted-recon",
66 "successful-recon-limited",
67 "successful-recon-largescale",
68 "attempted-dos",
69 "successful-dos",
70 "attempted-user",
71 "unsuccessful-user",
72 "successful-user",
73 "attempted-admin",
74 "successful-admin",
75 # NEW CLASSIFICATIONS
76 "rpc-portmap-decode",
77 "shellcode-detect",
78 "string-detect",
79 "suspicious-filename-detect",
80 "suspicious-login",
81 "system-call-detect",
82 "tcp-connection",
83 "trojan-activity",
84 "unusual-client-port-connection",
85 "network-scan",
86 "denial-of-service",
87 "non-standard-protocol",
88 "protocol-command-decode",
89 "web-application-activity",
90 "web-application-attack",
91 "misc-activity",
92 "misc-attack",
93 "icmp-event",
94 "inappropriate-content",
95 "policy-violation",
96 "default-login-attempt",
97 # Update
98 "targeted-activity",
99 "exploit-kit",
100 "external-ip-check",
101 "domain-c2",
102 "pup-activity",
103 "credential-theft",
104 "social-engineering",
105 "coin-mining",
106 "command-and-control",
107)
108
109NON_FUNCTIONAL_KEYWORDS = (
110 "classtype",
111 "gid",
112 "metadata",
113 "msg",
114 "priority",
115 "reference",
116 "rev",
117 "sid",
118 "target",
119)
120
121FLOW_KEYWORDS = (
122 "flow",
123 "flow.age",
124 "flowint",
125)
126
127STREAM_KEYWORDS = ("stream_size",)
128
129FLOW_STREAM_KEYWORDS: Sequence[str] = tuple(
130 sorted(set(FLOW_KEYWORDS).union(STREAM_KEYWORDS)),
131)
132
133STICKY_BUFFER_NAMING = {
134 "dce_iface": "dce.iface",
135 "dce_opnum": "dce.opnum",
136 "dce_stub_data": "dce.stub_data",
137 "dns_query": "dns.query",
138 "file_data": "file.data",
139 "http_accept": "http.accept",
140 "http_accept_enc": "http.accept_enc",
141 "http_accept_lang": "http.accept_lang",
142 "http_client_body": "http.request_body",
143 "http_connection": "http.connection",
144 "http_content_len": "http.content_len",
145 "http_content_type": "http.content_type",
146 "http_cookie": "http.cookie",
147 "http_header": "http.header",
148 "http_header_names": "http.header_names",
149 "http_host": "http.host",
150 "http_method": "http.method",
151 "http_protocol": "http.protocol",
152 "http_raw_header": "http.header.raw",
153 "http_raw_host": "http.host.raw",
154 "http_raw_uri": "http.uri.raw",
155 "http_referer": "http.referer",
156 "http_request_line": "http.request_line",
157 "http_response_line": "http.response_line",
158 "http_server_body": "http.response_body",
159 "http_start": "http.start",
160 "http_stat_code": "http.stat_code",
161 "http_stat_msg": "http.stat_msg",
162 "http_uri": "http.uri",
163 "http_user_agent": "http.user_agent",
164 "ja3_hash": "ja3.hash",
165 "tls_cert_fingerprint": "tls.cert_fingerprint",
166 "tls_cert_issuer": "tls.cert_issuer",
167 "tls_cert_serial": "tls.cert_serial",
168 "tls_cert_subject": "tls.cert_subject",
169 "tls_sni": "tls.sni",
170}
171
172BASE64_BUFFER_KEYWORDS = ("base64_data",)
173
174OTHER_BUFFERS = (
175 "http.location",
176 "http.request_header",
177 "http.response_header",
178 "http.server",
179 "ja3s.hash",
180 "tls.certs",
181 "tls.version",
182)
183
184assert set(OTHER_BUFFERS).isdisjoint(
185 set(STICKY_BUFFER_NAMING.keys()).union(STICKY_BUFFER_NAMING.values())
186)
187
188BUFFER_KEYWORDS: Sequence[str] = tuple(
189 sorted(
190 set(STICKY_BUFFER_NAMING.keys())
191 .union(STICKY_BUFFER_NAMING.values())
192 .union(BASE64_BUFFER_KEYWORDS)
193 .union(OTHER_BUFFERS),
194 ),
195)
196
197SIZE_KEYWORDS = (
198 "bsize",
199 "dsize",
200)
201
202TRANSFORMATION_KEYWORDS = (
203 "compress_whitespace",
204 "dotprefix",
205 "header_lowercase",
206 "pcrexform",
207 "strip_pseudo_headers",
208 "strip_whitespace",
209 "to_lowercase",
210 "to_md5",
211 "to_sha1",
212 "to_sha256",
213 "to_uppercase",
214 "url_decode",
215 "xor",
216)
217
218BASE64_TRANSFORMATION_KEYWORDS = ("base64_decode",)
219
220ALL_TRANSFORMATION_KEYWORDS: Sequence[str] = tuple(
221 sorted(set(TRANSFORMATION_KEYWORDS).union(BASE64_TRANSFORMATION_KEYWORDS)),
222)
223
224CONTENT_KEYWORDS = ("content", "pcre")
225
226POINTER_MOVEMENT_KEYWORDS = (
227 "depth",
228 "distance",
229 "offset",
230 "pkt_data",
231 "within",
232)
233
234COMPATIBILITY_MODIFIER_KEYWORDS = ("rawbytes",)
235
236MODIFIER_KEYWORDS = ("nocase",)
237
238ALL_MODIFIER_KEYWORDS: Sequence[str] = tuple(
239 sorted(set(COMPATIBILITY_MODIFIER_KEYWORDS).union(MODIFIER_KEYWORDS)),
240)
241
242MATCH_LOCATION_KEYWORDS = (
243 "endswith",
244 "startswith",
245)
246
247OTHER_PAYLOAD_KEYWORDS = (
248 "byte_extract",
249 "byte_jump",
250 "byte_test",
251 "isdataat",
252)
253
254IP_SPECIFIC_KEYWORDS = (
255 "ip_proto",
256 "ttl",
257)
258
259TCP_SPECIFIC_KEYWORDS = (
260 "ack",
261 "flags", # This is a duplicate of tcp.flags
262 "seq",
263 "tcp.flags",
264 "tcp.hdr",
265)
266
267UDP_SPECIFIC_KEYWORDS = ("udp.hdr",)
268
269ICMP_SPECIFIC_KEYWORDS = (
270 "fragbits",
271 "icode",
272 "icmp_id",
273 "icmp_seq",
274 "itype",
275)
276
277HTTP_SPECIFIC_KEYWORDS = (
278 "file.data",
279 "file_data",
280 "http.accept",
281 "http.accept_enc",
282 "http.accept_lang",
283 "http.connection",
284 "http.content_len",
285 "http.content_len",
286 "http.content_type",
287 "http.cookie",
288 "http.header",
289 "http.header_names",
290 "http.header.raw",
291 "http.host",
292 "http.host.raw",
293 "http.location",
294 "http.method",
295 "http.protocol",
296 "http.referer",
297 "http.request_body",
298 "http.request_header",
299 "http.request_line",
300 "http.response_body",
301 "http.response_header",
302 "http.response_line",
303 "http.server",
304 "http.start",
305 "http.stat_code",
306 "http.stat_code",
307 "http.stat_msg",
308 "http.uri",
309 "http.uri.raw",
310 "http.user_agent",
311 "http_accept",
312 "http_accept_enc",
313 "http_accept_lang",
314 "http_connection",
315 "http_content_len",
316 "http_content_len",
317 "http_content_type",
318 "http_cookie",
319 "http_header",
320 "http_header_names",
321 "http_host",
322 "http_location",
323 "http_method",
324 "http_protocol",
325 "http_raw_header",
326 "http_raw_host",
327 "http_raw_uri",
328 "http_referer",
329 "http_request_line",
330 "http_response_line",
331 "http_server_body",
332 "http_start",
333 "http_stat_code",
334 "http_stat_msg",
335 "http_uri",
336 "http_user_agent",
337 "urilen",
338)
339
340DNS_SPECIFIC_KEYWORDS = (
341 "dns.opcode",
342 "dns.query",
343 "dns_query",
344)
345
346TLS_SPECIFIC_KEYWORDS = (
347 "ssl_version",
348 "ssl_state",
349 "tls.cert_fingerprint",
350 "tls.cert_issuer",
351 "tls.cert_serial",
352 "tls.cert_subject",
353 "tls.certs",
354 "tls.sni",
355 "tls.version",
356 "tls_cert_fingerprint",
357 "tls_cert_issuer",
358 "tls_cert_serial",
359 "tls_cert_subject",
360 "tls_sni",
361)
362
363SSH_SPECIFIC_KEYWORDS = ("ssh_proto",)
364
365JA3_JA4_KEYWORDS = (
366 "ja3.hash",
367 "ja3_hash",
368 "ja3.string",
369 "ja3s.hash",
370)
371
372DCERPC_SPECIFIC_KEYWORDS = (
373 "dce.iface",
374 "dce.opnum",
375 "dce.stub_data",
376 "dce_iface",
377 "dce_opnum",
378 "dce_stub_data",
379)
380
381FTP_KEYWORDS = ("ftpbounce", "ftpdata_command")
382
383APP_LAYER_KEYWORDS = (
384 "app-layer-event",
385 "app-layer-protocol",
386)
387
388PROTOCOL_SPECIFIC_KEYWORDS = tuple(
389 sorted(
390 set().union(
391 *(
392 IP_SPECIFIC_KEYWORDS,
393 TCP_SPECIFIC_KEYWORDS,
394 UDP_SPECIFIC_KEYWORDS,
395 ICMP_SPECIFIC_KEYWORDS,
396 HTTP_SPECIFIC_KEYWORDS,
397 DNS_SPECIFIC_KEYWORDS,
398 TLS_SPECIFIC_KEYWORDS,
399 SSH_SPECIFIC_KEYWORDS,
400 DCERPC_SPECIFIC_KEYWORDS,
401 JA3_JA4_KEYWORDS,
402 FTP_KEYWORDS,
403 APP_LAYER_KEYWORDS,
404 ),
405 ),
406 ),
407)
408
409PERFORMANCE_DETECTION_OPTIONS = ("fast_pattern",)
410
411LUA_KEYWORDS = ("lua", "luajit")
412
413ALL_DETECTION_KEYWORDS: Sequence[str] = tuple(
414 sorted(
415 set().union(
416 *(
417 BUFFER_KEYWORDS,
418 SIZE_KEYWORDS,
419 ALL_TRANSFORMATION_KEYWORDS,
420 CONTENT_KEYWORDS,
421 POINTER_MOVEMENT_KEYWORDS,
422 ALL_MODIFIER_KEYWORDS,
423 MATCH_LOCATION_KEYWORDS,
424 OTHER_PAYLOAD_KEYWORDS,
425 PROTOCOL_SPECIFIC_KEYWORDS,
426 PERFORMANCE_DETECTION_OPTIONS,
427 LUA_KEYWORDS,
428 ),
429 ),
430 ),
431)
432
433THRESHOLD_KEYWORDS = (
434 "detection_filter",
435 "threshold",
436)
437
438STATEFUL_KEYWORDS = ("flowbits", "flowint", "xbits")
439
440OTHER_KEYWORDS = ("noalert", "tag")
441
442ALL_KEYWORDS = tuple(
443 sorted(
444 set().union(
445 *(
446 NON_FUNCTIONAL_KEYWORDS,
447 FLOW_KEYWORDS,
448 STREAM_KEYWORDS,
449 ALL_DETECTION_KEYWORDS,
450 THRESHOLD_KEYWORDS,
451 STATEFUL_KEYWORDS,
452 OTHER_KEYWORDS,
453 ),
454 ),
455 ),
456)
457
458METADATA_DATE_KEYWORDS = (
459 "created_at",
460 "reviewed_at",
461 "updated_at",
462)
463
464METADATA_NON_DATE_KEYWORDS = (
465 "affected_product",
466 "attack_target",
467 "confidence",
468 "cve",
469 "deprecation_reason",
470 "deployment",
471 "former_category",
472 "former_sid",
473 "impact_flag",
474 "malware_family",
475 "mitre_tactic_id",
476 "mitre_tactic_name",
477 "mitre_technique_id",
478 "mitre_technique_name",
479 "performance_impact",
480 "policy",
481 "ruleset",
482 "signature_severity",
483 "tag",
484 "tls_state",
485 "first_seen",
486 "confidence_level",
487)
488
489ALL_METADATA_KEYWORDS = tuple(
490 sorted(set(METADATA_DATE_KEYWORDS).union(METADATA_NON_DATE_KEYWORDS)),
491)
492
493IP_ADDRESS_REGEX = _regex_provider.compile(r"^.*\d+\.\d+\.\d+\.\d+.*$")
494
495_GROUP_REGEX = _regex_provider.compile(r"^(!)?\[(.*)\]$")
496_VARIABLE_GROUP_REGEX = _regex_provider.compile(r"^!?\$([A-Z\_]+)$")
497
498_ACTION_REGEX = _regex_provider.compile(
499 r"(alert|pass|drop|reject|rejectsrc|rejectdst|rejectboth)",
500)
501_PROTOCOL_REGEX = _regex_provider.compile(r"[a-z0-3\-]+")
502_ADDR_REGEX = _regex_provider.compile(r"[a-zA-Z0-9\$_\!\[\],\s/\.]+")
503_PORT_REGEX = _regex_provider.compile(r"[a-zA-Z0-9\$_\!\[\],\s:]+")
504_DIRECTION_REGEX = _regex_provider.compile(r"(\->|<>)")
505HEADER_REGEX = _regex_provider.compile(
506 rf"{_ACTION_REGEX.pattern}\s*{_PROTOCOL_REGEX.pattern}\s*{_ADDR_REGEX.pattern}\s*{_PORT_REGEX.pattern}\s*{_DIRECTION_REGEX.pattern}\s*{_ADDR_REGEX.pattern}\s*{_PORT_REGEX.pattern}",
507)
508_OPTION_REGEX = _regex_provider.compile(
509 r"[a-z\-\._]+\s*(:(\s*([0-9]+|.+)\s*\,?\s*)+)?;"
510)
511_BODY_REGEX = _regex_provider.compile(rf"\((\s*{_OPTION_REGEX.pattern}\s*)*\)")
512_RULE_REGEX = _regex_provider.compile(
513 rf"^(\s*#)?\s*{HEADER_REGEX.pattern}\s*{_BODY_REGEX.pattern}\s*(#.*)?$",
514)
515
516
[docs]
517def get_regex_provider(): # noqa: ANN201
518 """Returns the regex provider to be used.
519
520 If `regex` is installed, it will return that module.
521 Otherwise, it will return the `re` module instead.
522 """
523 return _regex_provider
524
525
526@lru_cache(maxsize=LRU_CACHE_SIZE)
527def __escape_regex(s: str) -> str:
528 # Escape the escape character first
529 s = s.replace("\\", "\\\\")
530
531 # Then escape all other characters
532 # . ^ $ * + ? { } [ ] \ | ( )
533 s = s.replace(".", "\\.")
534 s = s.replace("^", "\\^")
535 s = s.replace("$", "\\$")
536 s = s.replace("*", "\\*")
537 s = s.replace("+", "\\+")
538 s = s.replace("?", "\\?")
539 s = s.replace("{", "\\{")
540 s = s.replace("}", "\\}")
541 s = s.replace("[", "\\[")
542 s = s.replace("]", "\\]")
543 s = s.replace("|", "\\|")
544 s = s.replace("(", "\\(")
545 s = s.replace(")", "\\)")
546
547 return s # noqa: RET504
548
549
[docs]
550def get_options_regex(options: Iterable[str]) -> _regex_provider.Pattern:
551 """Returns a regular expression that can match any of the provided options."""
552 return __get_options_regex(tuple(sorted(options)))
553
554
555@lru_cache(maxsize=LRU_CACHE_SIZE)
556def __get_options_regex(options: Sequence[str]) -> _regex_provider.Pattern:
557 return _regex_provider.compile(
558 "(" + "|".join([__escape_regex(option) for option in options]) + ")",
559 )
560
561
562def __is_group(entry: str) -> bool:
563 if _GROUP_REGEX.match(entry) is None:
564 return False
565
566 return True
567
568
[docs]
569def get_rule_group_entries(group: str) -> Sequence[str]:
570 """Returns a list of entries in a group."""
571 stripped_group = group.strip()
572
573 if not __is_group(stripped_group):
574 return [stripped_group]
575
576 match = _GROUP_REGEX.match(stripped_group)
577 assert match is not None
578 negated = match.group(1) == "!"
579
580 entries = []
581 for entry in match.group(2).split(","):
582 stripped_entry = entry.strip()
583 if __is_group(stripped_entry):
584 entries += get_rule_group_entries(stripped_entry)
585 else:
586 entries.append(stripped_entry)
587
588 if negated:
589 entries = ["!" + entry for entry in entries]
590
591 return entries
592
593
[docs]
594def get_variable_groups(value: str) -> Sequence[str]:
595 """Returns a list of variable groups such as $HTTP_SERVERS in a variable."""
596 return __get_variable_groups(value)
597
598
599@lru_cache(maxsize=LRU_CACHE_SIZE)
600def __get_variable_groups(value: str) -> Sequence[str]:
601 entries = get_rule_group_entries(value)
602 variable_groups = []
603 for entry in entries:
604 match = _VARIABLE_GROUP_REGEX.match(entry)
605 if match is not None:
606 variable_groups.append(match.group(1))
607
608 return variable_groups
609
610
[docs]
611def get_rule_body(rule: idstools.rule.Rule) -> str:
612 """Returns the body of a rule."""
613 return __get_rule_body(rule)
614
615
616@lru_cache(maxsize=LRU_CACHE_SIZE)
617def __get_rule_body(rule: idstools.rule.Rule) -> str:
618 match = _BODY_REGEX.search(rule["raw"])
619
620 if match is None:
621 msg = f"Could not extract rule body from rule: {rule['raw']}"
622 _logger.critical(msg)
623 raise RuntimeError(msg)
624
625 return match.group(0)
626
627
[docs]
628def is_valid_rule(rule: idstools.rule.Rule) -> bool:
629 """Checks if a rule is valid."""
630 if _RULE_REGEX.match(rule["raw"]) is None:
631 return False
632
633 return True