1"""The `suricata_check.utils.regex` module contains regular expressions for matching various parts of rules."""
2
3import logging
4from collections.abc import Iterable, Sequence
5from functools import lru_cache
6
7from suricata_check.utils.regex_provider import Pattern
8from suricata_check.utils.regex_provider import (
9 get_regex_provider as _get_regex_provider,
10)
11from suricata_check.utils.rule import Rule
12
13_logger = logging.getLogger(__name__)
14_regex_provider = _get_regex_provider()
15
16LRU_CACHE_SIZE = 10
17
18ADDRESS_GROUPS = (
19 "HOME_NET",
20 "EXTERNAL_NET",
21 "HTTP_SERVERS",
22 "SMTP_SERVERS",
23 "SQL_SERVERS",
24 "DNS_SERVERS",
25 "TELNET_SERVERS",
26 "AIM_SERVERS",
27 "DC_SERVERS",
28 "DNP3_SERVER",
29 "DNP3_CLIENT",
30 "MODBUS_CLIENT",
31 "MODBUS_SERVER",
32 "ENIP_CLIENT",
33 "ENIP_SERVER",
34)
35
36
37PORT_GROUPS = (
38 "HTTP_PORTS",
39 "SHELLCODE_PORTS",
40 "ORACLE_PORTS",
41 "SSH_PORTS",
42 "DNP3_PORTS",
43 "MODBUS_PORTS",
44 "FILE_DATA_PORTS",
45 "FTP_PORTS",
46 "GENEVE_PORTS",
47 "VXLAN_PORTS",
48 "TEREDO_PORTS",
49)
50
51ALL_VARIABLES = ADDRESS_GROUPS + PORT_GROUPS
52
53CLASSTYPES = (
54 "not-suspicious",
55 "unknown",
56 "bad-unknown",
57 "attempted-recon",
58 "successful-recon-limited",
59 "successful-recon-largescale",
60 "attempted-dos",
61 "successful-dos",
62 "attempted-user",
63 "unsuccessful-user",
64 "successful-user",
65 "attempted-admin",
66 "successful-admin",
67 # NEW CLASSIFICATIONS
68 "rpc-portmap-decode",
69 "shellcode-detect",
70 "string-detect",
71 "suspicious-filename-detect",
72 "suspicious-login",
73 "system-call-detect",
74 "tcp-connection",
75 "trojan-activity",
76 "unusual-client-port-connection",
77 "network-scan",
78 "denial-of-service",
79 "non-standard-protocol",
80 "protocol-command-decode",
81 "web-application-activity",
82 "web-application-attack",
83 "misc-activity",
84 "misc-attack",
85 "icmp-event",
86 "inappropriate-content",
87 "policy-violation",
88 "default-login-attempt",
89 # Update
90 "targeted-activity",
91 "exploit-kit",
92 "external-ip-check",
93 "domain-c2",
94 "pup-activity",
95 "credential-theft",
96 "social-engineering",
97 "coin-mining",
98 "command-and-control",
99)
100
101NON_FUNCTIONAL_KEYWORDS = (
102 "classtype",
103 "gid",
104 "metadata",
105 "msg",
106 "priority",
107 "reference",
108 "rev",
109 "sid",
110 "target",
111)
112
113FLOW_KEYWORDS = (
114 "flow",
115 "flow.age",
116 "flowint",
117)
118
119STREAM_KEYWORDS = ("stream_size",)
120
121FLOW_STREAM_KEYWORDS: Sequence[str] = tuple(
122 sorted(set(FLOW_KEYWORDS).union(STREAM_KEYWORDS)),
123)
124
125STICKY_BUFFER_NAMING = {
126 "dce_iface": "dce.iface",
127 "dce_opnum": "dce.opnum",
128 "dce_stub_data": "dce.stub_data",
129 "dns_query": "dns.query",
130 "file_data": "file.data",
131 "http_accept": "http.accept",
132 "http_accept_enc": "http.accept_enc",
133 "http_accept_lang": "http.accept_lang",
134 "http_client_body": "http.request_body",
135 "http_connection": "http.connection",
136 "http_content_len": "http.content_len",
137 "http_content_type": "http.content_type",
138 "http_cookie": "http.cookie",
139 "http_header": "http.header",
140 "http_header_names": "http.header_names",
141 "http_host": "http.host",
142 "http_method": "http.method",
143 "http_protocol": "http.protocol",
144 "http_raw_header": "http.header.raw",
145 "http_raw_host": "http.host.raw",
146 "http_raw_uri": "http.uri.raw",
147 "http_referer": "http.referer",
148 "http_request_line": "http.request_line",
149 "http_response_line": "http.response_line",
150 "http_server_body": "http.response_body",
151 "http_start": "http.start",
152 "http_stat_code": "http.stat_code",
153 "http_stat_msg": "http.stat_msg",
154 "http_uri": "http.uri",
155 "http_user_agent": "http.user_agent",
156 "ja3_hash": "ja3.hash",
157 "tls_cert_fingerprint": "tls.cert_fingerprint",
158 "tls_cert_issuer": "tls.cert_issuer",
159 "tls_cert_serial": "tls.cert_serial",
160 "tls_cert_subject": "tls.cert_subject",
161 "tls_sni": "tls.sni",
162}
163
164BASE64_BUFFER_KEYWORDS = ("base64_data",)
165
166OTHER_BUFFERS = (
167 "http.location",
168 "http.request_header",
169 "http.response_header",
170 "http.server",
171 "ja3s.hash",
172 "tls.certs",
173 "tls.version",
174)
175
176assert set(OTHER_BUFFERS).isdisjoint(
177 set(STICKY_BUFFER_NAMING.keys()).union(STICKY_BUFFER_NAMING.values()),
178)
179
180BUFFER_KEYWORDS: Sequence[str] = tuple(
181 sorted(
182 set(STICKY_BUFFER_NAMING.keys())
183 .union(STICKY_BUFFER_NAMING.values())
184 .union(BASE64_BUFFER_KEYWORDS)
185 .union(OTHER_BUFFERS),
186 ),
187)
188
189SIZE_KEYWORDS = (
190 "bsize",
191 "dsize",
192)
193
194TRANSFORMATION_KEYWORDS = (
195 "compress_whitespace",
196 "dotprefix",
197 "header_lowercase",
198 "pcrexform",
199 "strip_pseudo_headers",
200 "strip_whitespace",
201 "to_lowercase",
202 "to_md5",
203 "to_sha1",
204 "to_sha256",
205 "to_uppercase",
206 "url_decode",
207 "xor",
208)
209
210BASE64_TRANSFORMATION_KEYWORDS = ("base64_decode",)
211
212ALL_TRANSFORMATION_KEYWORDS: Sequence[str] = tuple(
213 sorted(set(TRANSFORMATION_KEYWORDS).union(BASE64_TRANSFORMATION_KEYWORDS)),
214)
215
216CONTENT_KEYWORDS = ("content", "pcre")
217
218POINTER_MOVEMENT_KEYWORDS = (
219 "depth",
220 "distance",
221 "offset",
222 "pkt_data",
223 "within",
224)
225
226COMPATIBILITY_MODIFIER_KEYWORDS = ("rawbytes",)
227
228MODIFIER_KEYWORDS = ("nocase",)
229
230ALL_MODIFIER_KEYWORDS: Sequence[str] = tuple(
231 sorted(set(COMPATIBILITY_MODIFIER_KEYWORDS).union(MODIFIER_KEYWORDS)),
232)
233
234MATCH_LOCATION_KEYWORDS = (
235 "endswith",
236 "startswith",
237)
238
239OTHER_PAYLOAD_KEYWORDS = (
240 "byte_extract",
241 "byte_jump",
242 "byte_test",
243 "isdataat",
244)
245
246IP_SPECIFIC_KEYWORDS = (
247 "ip_proto",
248 "ttl",
249)
250
251TCP_SPECIFIC_KEYWORDS = (
252 "ack",
253 "flags", # This is a duplicate of tcp.flags
254 "seq",
255 "tcp.flags",
256 "tcp.hdr",
257)
258
259UDP_SPECIFIC_KEYWORDS = ("udp.hdr",)
260
261ICMP_SPECIFIC_KEYWORDS = (
262 "fragbits",
263 "icode",
264 "icmp_id",
265 "icmp_seq",
266 "itype",
267)
268
269HTTP_SPECIFIC_KEYWORDS = (
270 "file.data",
271 "file_data",
272 "http.accept",
273 "http.accept_enc",
274 "http.accept_lang",
275 "http.connection",
276 "http.content_len",
277 "http.content_len",
278 "http.content_type",
279 "http.cookie",
280 "http.header",
281 "http.header_names",
282 "http.header.raw",
283 "http.host",
284 "http.host.raw",
285 "http.location",
286 "http.method",
287 "http.protocol",
288 "http.referer",
289 "http.request_body",
290 "http.request_header",
291 "http.request_line",
292 "http.response_body",
293 "http.response_header",
294 "http.response_line",
295 "http.server",
296 "http.start",
297 "http.stat_code",
298 "http.stat_code",
299 "http.stat_msg",
300 "http.uri",
301 "http.uri.raw",
302 "http.user_agent",
303 "http_accept",
304 "http_accept_enc",
305 "http_accept_lang",
306 "http_connection",
307 "http_content_len",
308 "http_content_len",
309 "http_content_type",
310 "http_cookie",
311 "http_header",
312 "http_header_names",
313 "http_host",
314 "http_location",
315 "http_method",
316 "http_protocol",
317 "http_raw_header",
318 "http_raw_host",
319 "http_raw_uri",
320 "http_referer",
321 "http_request_line",
322 "http_response_line",
323 "http_server_body",
324 "http_start",
325 "http_stat_code",
326 "http_stat_msg",
327 "http_uri",
328 "http_user_agent",
329 "urilen",
330)
331
332DNS_SPECIFIC_KEYWORDS = (
333 "dns.opcode",
334 "dns.query",
335 "dns_query",
336)
337
338TLS_SPECIFIC_KEYWORDS = (
339 "ssl_version",
340 "ssl_state",
341 "tls.cert_fingerprint",
342 "tls.cert_issuer",
343 "tls.cert_serial",
344 "tls.cert_subject",
345 "tls.certs",
346 "tls.sni",
347 "tls.version",
348 "tls_cert_fingerprint",
349 "tls_cert_issuer",
350 "tls_cert_serial",
351 "tls_cert_subject",
352 "tls_sni",
353)
354
355SSH_SPECIFIC_KEYWORDS = ("ssh_proto",)
356
357JA3_JA4_KEYWORDS = (
358 "ja3.hash",
359 "ja3_hash",
360 "ja3.string",
361 "ja3s.hash",
362)
363
364DCERPC_SPECIFIC_KEYWORDS = (
365 "dce.iface",
366 "dce.opnum",
367 "dce.stub_data",
368 "dce_iface",
369 "dce_opnum",
370 "dce_stub_data",
371)
372
373FTP_KEYWORDS = ("ftpbounce", "ftpdata_command")
374
375APP_LAYER_KEYWORDS = (
376 "app-layer-event",
377 "app-layer-protocol",
378)
379
380PROTOCOL_SPECIFIC_KEYWORDS = tuple(
381 sorted(
382 set().union(
383 *(
384 IP_SPECIFIC_KEYWORDS,
385 TCP_SPECIFIC_KEYWORDS,
386 UDP_SPECIFIC_KEYWORDS,
387 ICMP_SPECIFIC_KEYWORDS,
388 HTTP_SPECIFIC_KEYWORDS,
389 DNS_SPECIFIC_KEYWORDS,
390 TLS_SPECIFIC_KEYWORDS,
391 SSH_SPECIFIC_KEYWORDS,
392 DCERPC_SPECIFIC_KEYWORDS,
393 JA3_JA4_KEYWORDS,
394 FTP_KEYWORDS,
395 APP_LAYER_KEYWORDS,
396 ),
397 ),
398 ),
399)
400
401PERFORMANCE_DETECTION_OPTIONS = ("fast_pattern",)
402
403LUA_KEYWORDS = ("lua", "luajit")
404
405ALL_DETECTION_KEYWORDS: Sequence[str] = tuple(
406 sorted(
407 set().union(
408 *(
409 BUFFER_KEYWORDS,
410 SIZE_KEYWORDS,
411 ALL_TRANSFORMATION_KEYWORDS,
412 CONTENT_KEYWORDS,
413 POINTER_MOVEMENT_KEYWORDS,
414 ALL_MODIFIER_KEYWORDS,
415 MATCH_LOCATION_KEYWORDS,
416 OTHER_PAYLOAD_KEYWORDS,
417 PROTOCOL_SPECIFIC_KEYWORDS,
418 PERFORMANCE_DETECTION_OPTIONS,
419 LUA_KEYWORDS,
420 ),
421 ),
422 ),
423)
424
425THRESHOLD_KEYWORDS = (
426 "detection_filter",
427 "threshold",
428)
429
430STATEFUL_KEYWORDS = ("flowbits", "flowint", "xbits")
431
432OTHER_KEYWORDS = ("noalert", "tag")
433
434ALL_KEYWORDS = tuple(
435 sorted(
436 set().union(
437 *(
438 NON_FUNCTIONAL_KEYWORDS,
439 FLOW_KEYWORDS,
440 STREAM_KEYWORDS,
441 ALL_DETECTION_KEYWORDS,
442 THRESHOLD_KEYWORDS,
443 STATEFUL_KEYWORDS,
444 OTHER_KEYWORDS,
445 ),
446 ),
447 ),
448)
449
450METADATA_DATE_KEYWORDS = (
451 "created_at",
452 "reviewed_at",
453 "updated_at",
454)
455
456METADATA_NON_DATE_KEYWORDS = (
457 "affected_product",
458 "attack_target",
459 "confidence",
460 "cve",
461 "deprecation_reason",
462 "deployment",
463 "former_category",
464 "former_sid",
465 "impact_flag",
466 "malware_family",
467 "mitre_tactic_id",
468 "mitre_tactic_name",
469 "mitre_technique_id",
470 "mitre_technique_name",
471 "performance_impact",
472 "policy",
473 "ruleset",
474 "signature_severity",
475 "tag",
476 "tls_state",
477 "first_seen",
478 "confidence_level",
479)
480
481ALL_METADATA_KEYWORDS = tuple(
482 sorted(set(METADATA_DATE_KEYWORDS).union(METADATA_NON_DATE_KEYWORDS)),
483)
484
485IP_ADDRESS_REGEX = _regex_provider.compile(r"^.*\d+\.\d+\.\d+\.\d+.*$")
486
487_GROUP_REGEX = _regex_provider.compile(r"^(!)?\[(.*)\]$")
488_VARIABLE_GROUP_REGEX = _regex_provider.compile(r"^!?\$([A-Z\_]+)$")
489
490_ACTION_REGEX = _regex_provider.compile(
491 r"(alert|pass|drop|reject|rejectsrc|rejectdst|rejectboth)",
492)
493_PROTOCOL_REGEX = _regex_provider.compile(r"[a-z0-3\-]+")
494_ADDR_REGEX = _regex_provider.compile(r"[a-zA-Z0-9\$_\!\[\],\s/\.]+")
495_PORT_REGEX = _regex_provider.compile(r"[a-zA-Z0-9\$_\!\[\],\s:]+")
496_DIRECTION_REGEX = _regex_provider.compile(r"(\->|<>)")
497HEADER_REGEX = _regex_provider.compile(
498 rf"{_ACTION_REGEX.pattern}\s*{_PROTOCOL_REGEX.pattern}\s*{_ADDR_REGEX.pattern}\s*{_PORT_REGEX.pattern}\s*{_DIRECTION_REGEX.pattern}\s*{_ADDR_REGEX.pattern}\s*{_PORT_REGEX.pattern}",
499)
500_OPTION_REGEX = _regex_provider.compile(
501 r"[a-z\-\._]+\s*(:(\s*([0-9]+|.+)\s*\,?\s*)+)?;",
502)
503_BODY_REGEX = _regex_provider.compile(rf"\((\s*{_OPTION_REGEX.pattern}\s*)*\)")
504_RULE_REGEX = _regex_provider.compile(
505 rf"^(\s*#)?\s*{HEADER_REGEX.pattern}\s*{_BODY_REGEX.pattern}\s*(#.*)?$",
506)
507
508
509@lru_cache(maxsize=LRU_CACHE_SIZE)
510def __escape_regex(s: str) -> str:
511 # Escape the escape character first
512 s = s.replace("\\", "\\\\")
513
514 # Then escape all other characters
515 # . ^ $ * + ? { } [ ] \ | ( )
516 s = s.replace(".", "\\.")
517 s = s.replace("^", "\\^")
518 s = s.replace("$", "\\$")
519 s = s.replace("*", "\\*")
520 s = s.replace("+", "\\+")
521 s = s.replace("?", "\\?")
522 s = s.replace("{", "\\{")
523 s = s.replace("}", "\\}")
524 s = s.replace("[", "\\[")
525 s = s.replace("]", "\\]")
526 s = s.replace("|", "\\|")
527 s = s.replace("(", "\\(")
528 s = s.replace(")", "\\)")
529
530 return s # noqa: RET504
531
532
[docs]
533def get_options_regex(options: Iterable[str]) -> Pattern:
534 """Returns a regular expression that can match any of the provided options."""
535 return __get_options_regex(tuple(sorted(options)))
536
537
538@lru_cache(maxsize=LRU_CACHE_SIZE)
539def __get_options_regex(options: Sequence[str]) -> Pattern:
540 return _regex_provider.compile(
541 "(" + "|".join([__escape_regex(option) for option in options]) + ")",
542 )
543
544
545def __is_group(entry: str) -> bool:
546 if _GROUP_REGEX.match(entry) is None:
547 return False
548
549 return True
550
551
[docs]
552def get_rule_group_entries(group: str) -> Sequence[str]:
553 """Returns a list of entries in a group."""
554 stripped_group = group.strip()
555
556 if not __is_group(stripped_group):
557 return [stripped_group]
558
559 match = _GROUP_REGEX.match(stripped_group)
560 assert match is not None
561 negated = match.group(1) == "!"
562
563 entries = []
564 for entry in match.group(2).split(","):
565 stripped_entry = entry.strip()
566 if __is_group(stripped_entry):
567 entries += get_rule_group_entries(stripped_entry)
568 else:
569 entries.append(stripped_entry)
570
571 if negated:
572 entries = ["!" + entry for entry in entries]
573
574 return entries
575
576
[docs]
577def get_variable_groups(value: str) -> Sequence[str]:
578 """Returns a list of variable groups such as $HTTP_SERVERS in a variable."""
579 return __get_variable_groups(value)
580
581
582@lru_cache(maxsize=LRU_CACHE_SIZE)
583def __get_variable_groups(value: str) -> Sequence[str]:
584 entries = get_rule_group_entries(value)
585 variable_groups = []
586 for entry in entries:
587 match = _VARIABLE_GROUP_REGEX.match(entry)
588 if match is not None:
589 variable_groups.append(match.group(1))
590
591 return variable_groups
592
593
[docs]
594def get_rule_body(rule: Rule) -> str:
595 """Returns the body of a rule."""
596 return __get_rule_body(rule)
597
598
599@lru_cache(maxsize=LRU_CACHE_SIZE)
600def __get_rule_body(rule: Rule) -> str:
601 match = _BODY_REGEX.search(rule.raw)
602
603 if match is None:
604 msg = f"Could not extract rule body from rule: {rule.raw}"
605 _logger.critical(msg)
606 raise RuntimeError(msg)
607
608 return match.group(0)
609
610
[docs]
611def is_valid_rule(rule: Rule) -> bool:
612 """Checks if a rule is valid."""
613 if _RULE_REGEX.match(rule.raw) is None:
614 return False
615
616 return True