Source code for suricata_check.utils.regex

  1"""The `suricata_check.utils.regex` module contains regular expressions for matching various parts of rules."""
  2
  3import importlib.util
  4import logging
  5from collections.abc import Iterable, Sequence
  6from functools import lru_cache
  7
  8import idstools.rule
  9
 10_logger = logging.getLogger(__name__)
 11
 12# Import the fastest regex provider available:
 13if importlib.util.find_spec("regex") is not None:
 14    _logger.info("Detected regex module as installed, using it.")
 15    import regex as _regex_provider
 16else:
 17    _logger.warning(
 18        """Did not detect regex module as installed, using re instead.
 19To increase suricata-check processing speed, consider isntalling the regex module \
 20by running `pip install suricata-check[performance]`.""",
 21    )
 22    import re as _regex_provider
 23
 24LRU_CACHE_SIZE = 10
 25
 26ADDRESS_GROUPS = (
 27    "HOME_NET",
 28    "EXTERNAL_NET",
 29    "HTTP_SERVERS",
 30    "SMTP_SERVERS",
 31    "SQL_SERVERS",
 32    "DNS_SERVERS",
 33    "TELNET_SERVERS",
 34    "AIM_SERVERS",
 35    "DC_SERVERS",
 36    "DNP3_SERVER",
 37    "DNP3_CLIENT",
 38    "MODBUS_CLIENT",
 39    "MODBUS_SERVER",
 40    "ENIP_CLIENT",
 41    "ENIP_SERVER",
 42)
 43
 44
 45PORT_GROUPS = (
 46    "HTTP_PORTS",
 47    "SHELLCODE_PORTS",
 48    "ORACLE_PORTS",
 49    "SSH_PORTS",
 50    "DNP3_PORTS",
 51    "MODBUS_PORTS",
 52    "FILE_DATA_PORTS",
 53    "FTP_PORTS",
 54    "GENEVE_PORTS",
 55    "VXLAN_PORTS",
 56    "TEREDO_PORTS",
 57)
 58
 59ALL_VARIABLES = ADDRESS_GROUPS + PORT_GROUPS
 60
 61CLASSTYPES = (
 62    "not-suspicious",
 63    "unknown",
 64    "bad-unknown",
 65    "attempted-recon",
 66    "successful-recon-limited",
 67    "successful-recon-largescale",
 68    "attempted-dos",
 69    "successful-dos",
 70    "attempted-user",
 71    "unsuccessful-user",
 72    "successful-user",
 73    "attempted-admin",
 74    "successful-admin",
 75    # NEW CLASSIFICATIONS
 76    "rpc-portmap-decode",
 77    "shellcode-detect",
 78    "string-detect",
 79    "suspicious-filename-detect",
 80    "suspicious-login",
 81    "system-call-detect",
 82    "tcp-connection",
 83    "trojan-activity",
 84    "unusual-client-port-connection",
 85    "network-scan",
 86    "denial-of-service",
 87    "non-standard-protocol",
 88    "protocol-command-decode",
 89    "web-application-activity",
 90    "web-application-attack",
 91    "misc-activity",
 92    "misc-attack",
 93    "icmp-event",
 94    "inappropriate-content",
 95    "policy-violation",
 96    "default-login-attempt",
 97    # Update
 98    "targeted-activity",
 99    "exploit-kit",
100    "external-ip-check",
101    "domain-c2",
102    "pup-activity",
103    "credential-theft",
104    "social-engineering",
105    "coin-mining",
106    "command-and-control",
107)
108
109NON_FUNCTIONAL_KEYWORDS = (
110    "classtype",
111    "gid",
112    "metadata",
113    "msg",
114    "priority",
115    "reference",
116    "rev",
117    "sid",
118    "target",
119)
120
121FLOW_KEYWORDS = (
122    "flow",
123    "flow.age",
124    "flowint",
125)
126
127STREAM_KEYWORDS = ("stream_size",)
128
129FLOW_STREAM_KEYWORDS: Sequence[str] = tuple(
130    sorted(set(FLOW_KEYWORDS).union(STREAM_KEYWORDS)),
131)
132
133STICKY_BUFFER_NAMING = {
134    "dce_iface": "dce.iface",
135    "dce_opnum": "dce.opnum",
136    "dce_stub_data": "dce.stub_data",
137    "dns_query": "dns.query",
138    "file_data": "file.data",
139    "http_accept": "http.accept",
140    "http_accept_enc": "http.accept_enc",
141    "http_accept_lang": "http.accept_lang",
142    "http_client_body": "http.request_body",
143    "http_connection": "http.connection",
144    "http_content_len": "http.content_len",
145    "http_content_type": "http.content_type",
146    "http_cookie": "http.cookie",
147    "http_header": "http.header",
148    "http_header_names": "http.header_names",
149    "http_host": "http.host",
150    "http_method": "http.method",
151    "http_protocol": "http.protocol",
152    "http_raw_header": "http.header.raw",
153    "http_raw_host": "http.host.raw",
154    "http_raw_uri": "http.uri.raw",
155    "http_referer": "http.referer",
156    "http_request_line": "http.request_line",
157    "http_response_line": "http.response_line",
158    "http_server_body": "http.response_body",
159    "http_start": "http.start",
160    "http_stat_code": "http.stat_code",
161    "http_stat_msg": "http.stat_msg",
162    "http_uri": "http.uri",
163    "http_user_agent": "http.user_agent",
164    "ja3_hash": "ja3.hash",
165    "tls_cert_fingerprint": "tls.cert_fingerprint",
166    "tls_cert_issuer": "tls.cert_issuer",
167    "tls_cert_serial": "tls.cert_serial",
168    "tls_cert_subject": "tls.cert_subject",
169    "tls_sni": "tls.sni",
170}
171
172BASE64_BUFFER_KEYWORDS = ("base64_data",)
173
174OTHER_BUFFERS = (
175    "http.location",
176    "http.request_header",
177    "http.response_header",
178    "http.server",
179    "ja3s.hash",
180    "tls.certs",
181    "tls.version",
182)
183
184assert set(OTHER_BUFFERS).isdisjoint(
185    set(STICKY_BUFFER_NAMING.keys()).union(STICKY_BUFFER_NAMING.values())
186)
187
188BUFFER_KEYWORDS: Sequence[str] = tuple(
189    sorted(
190        set(STICKY_BUFFER_NAMING.keys())
191        .union(STICKY_BUFFER_NAMING.values())
192        .union(BASE64_BUFFER_KEYWORDS)
193        .union(OTHER_BUFFERS),
194    ),
195)
196
197SIZE_KEYWORDS = (
198    "bsize",
199    "dsize",
200)
201
202TRANSFORMATION_KEYWORDS = (
203    "compress_whitespace",
204    "dotprefix",
205    "header_lowercase",
206    "pcrexform",
207    "strip_pseudo_headers",
208    "strip_whitespace",
209    "to_lowercase",
210    "to_md5",
211    "to_sha1",
212    "to_sha256",
213    "to_uppercase",
214    "url_decode",
215    "xor",
216)
217
218BASE64_TRANSFORMATION_KEYWORDS = ("base64_decode",)
219
220ALL_TRANSFORMATION_KEYWORDS: Sequence[str] = tuple(
221    sorted(set(TRANSFORMATION_KEYWORDS).union(BASE64_TRANSFORMATION_KEYWORDS)),
222)
223
224CONTENT_KEYWORDS = ("content", "pcre")
225
226POINTER_MOVEMENT_KEYWORDS = (
227    "depth",
228    "distance",
229    "offset",
230    "pkt_data",
231    "within",
232)
233
234COMPATIBILITY_MODIFIER_KEYWORDS = ("rawbytes",)
235
236MODIFIER_KEYWORDS = ("nocase",)
237
238ALL_MODIFIER_KEYWORDS: Sequence[str] = tuple(
239    sorted(set(COMPATIBILITY_MODIFIER_KEYWORDS).union(MODIFIER_KEYWORDS)),
240)
241
242MATCH_LOCATION_KEYWORDS = (
243    "endswith",
244    "startswith",
245)
246
247OTHER_PAYLOAD_KEYWORDS = (
248    "byte_extract",
249    "byte_jump",
250    "byte_test",
251    "isdataat",
252)
253
254IP_SPECIFIC_KEYWORDS = (
255    "ip_proto",
256    "ttl",
257)
258
259TCP_SPECIFIC_KEYWORDS = (
260    "ack",
261    "flags",  # This is a duplicate of tcp.flags
262    "seq",
263    "tcp.flags",
264    "tcp.hdr",
265)
266
267UDP_SPECIFIC_KEYWORDS = ("udp.hdr",)
268
269ICMP_SPECIFIC_KEYWORDS = (
270    "fragbits",
271    "icode",
272    "icmp_id",
273    "icmp_seq",
274    "itype",
275)
276
277HTTP_SPECIFIC_KEYWORDS = (
278    "file.data",
279    "file_data",
280    "http.accept",
281    "http.accept_enc",
282    "http.accept_lang",
283    "http.connection",
284    "http.content_len",
285    "http.content_len",
286    "http.content_type",
287    "http.cookie",
288    "http.header",
289    "http.header_names",
290    "http.header.raw",
291    "http.host",
292    "http.host.raw",
293    "http.location",
294    "http.method",
295    "http.protocol",
296    "http.referer",
297    "http.request_body",
298    "http.request_header",
299    "http.request_line",
300    "http.response_body",
301    "http.response_header",
302    "http.response_line",
303    "http.server",
304    "http.start",
305    "http.stat_code",
306    "http.stat_code",
307    "http.stat_msg",
308    "http.uri",
309    "http.uri.raw",
310    "http.user_agent",
311    "http_accept",
312    "http_accept_enc",
313    "http_accept_lang",
314    "http_connection",
315    "http_content_len",
316    "http_content_len",
317    "http_content_type",
318    "http_cookie",
319    "http_header",
320    "http_header_names",
321    "http_host",
322    "http_location",
323    "http_method",
324    "http_protocol",
325    "http_raw_header",
326    "http_raw_host",
327    "http_raw_uri",
328    "http_referer",
329    "http_request_line",
330    "http_response_line",
331    "http_server_body",
332    "http_start",
333    "http_stat_code",
334    "http_stat_msg",
335    "http_uri",
336    "http_user_agent",
337    "urilen",
338)
339
340DNS_SPECIFIC_KEYWORDS = (
341    "dns.opcode",
342    "dns.query",
343    "dns_query",
344)
345
346TLS_SPECIFIC_KEYWORDS = (
347    "ssl_version",
348    "ssl_state",
349    "tls.cert_fingerprint",
350    "tls.cert_issuer",
351    "tls.cert_serial",
352    "tls.cert_subject",
353    "tls.certs",
354    "tls.sni",
355    "tls.version",
356    "tls_cert_fingerprint",
357    "tls_cert_issuer",
358    "tls_cert_serial",
359    "tls_cert_subject",
360    "tls_sni",
361)
362
363SSH_SPECIFIC_KEYWORDS = ("ssh_proto",)
364
365JA3_JA4_KEYWORDS = (
366    "ja3.hash",
367    "ja3_hash",
368    "ja3.string",
369    "ja3s.hash",
370)
371
372DCERPC_SPECIFIC_KEYWORDS = (
373    "dce.iface",
374    "dce.opnum",
375    "dce.stub_data",
376    "dce_iface",
377    "dce_opnum",
378    "dce_stub_data",
379)
380
381FTP_KEYWORDS = ("ftpbounce", "ftpdata_command")
382
383APP_LAYER_KEYWORDS = (
384    "app-layer-event",
385    "app-layer-protocol",
386)
387
388PROTOCOL_SPECIFIC_KEYWORDS = tuple(
389    sorted(
390        set().union(
391            *(
392                IP_SPECIFIC_KEYWORDS,
393                TCP_SPECIFIC_KEYWORDS,
394                UDP_SPECIFIC_KEYWORDS,
395                ICMP_SPECIFIC_KEYWORDS,
396                HTTP_SPECIFIC_KEYWORDS,
397                DNS_SPECIFIC_KEYWORDS,
398                TLS_SPECIFIC_KEYWORDS,
399                SSH_SPECIFIC_KEYWORDS,
400                DCERPC_SPECIFIC_KEYWORDS,
401                JA3_JA4_KEYWORDS,
402                FTP_KEYWORDS,
403                APP_LAYER_KEYWORDS,
404            ),
405        ),
406    ),
407)
408
409PERFORMANCE_DETECTION_OPTIONS = ("fast_pattern",)
410
411LUA_KEYWORDS = ("lua", "luajit")
412
413ALL_DETECTION_KEYWORDS: Sequence[str] = tuple(
414    sorted(
415        set().union(
416            *(
417                BUFFER_KEYWORDS,
418                SIZE_KEYWORDS,
419                ALL_TRANSFORMATION_KEYWORDS,
420                CONTENT_KEYWORDS,
421                POINTER_MOVEMENT_KEYWORDS,
422                ALL_MODIFIER_KEYWORDS,
423                MATCH_LOCATION_KEYWORDS,
424                OTHER_PAYLOAD_KEYWORDS,
425                PROTOCOL_SPECIFIC_KEYWORDS,
426                PERFORMANCE_DETECTION_OPTIONS,
427                LUA_KEYWORDS,
428            ),
429        ),
430    ),
431)
432
433THRESHOLD_KEYWORDS = (
434    "detection_filter",
435    "threshold",
436)
437
438STATEFUL_KEYWORDS = ("flowbits", "flowint", "xbits")
439
440OTHER_KEYWORDS = ("noalert", "tag")
441
442ALL_KEYWORDS = tuple(
443    sorted(
444        set().union(
445            *(
446                NON_FUNCTIONAL_KEYWORDS,
447                FLOW_KEYWORDS,
448                STREAM_KEYWORDS,
449                ALL_DETECTION_KEYWORDS,
450                THRESHOLD_KEYWORDS,
451                STATEFUL_KEYWORDS,
452                OTHER_KEYWORDS,
453            ),
454        ),
455    ),
456)
457
458METADATA_DATE_KEYWORDS = (
459    "created_at",
460    "reviewed_at",
461    "updated_at",
462)
463
464METADATA_NON_DATE_KEYWORDS = (
465    "affected_product",
466    "attack_target",
467    "confidence",
468    "cve",
469    "deprecation_reason",
470    "deployment",
471    "former_category",
472    "former_sid",
473    "impact_flag",
474    "malware_family",
475    "mitre_tactic_id",
476    "mitre_tactic_name",
477    "mitre_technique_id",
478    "mitre_technique_name",
479    "performance_impact",
480    "policy",
481    "ruleset",
482    "signature_severity",
483    "tag",
484    "tls_state",
485    "first_seen",
486    "confidence_level",
487)
488
489ALL_METADATA_KEYWORDS = tuple(
490    sorted(set(METADATA_DATE_KEYWORDS).union(METADATA_NON_DATE_KEYWORDS)),
491)
492
493IP_ADDRESS_REGEX = _regex_provider.compile(r"^.*\d+\.\d+\.\d+\.\d+.*$")
494
495_GROUP_REGEX = _regex_provider.compile(r"^(!)?\[(.*)\]$")
496_VARIABLE_GROUP_REGEX = _regex_provider.compile(r"^!?\$([A-Z\_]+)$")
497
498_ACTION_REGEX = _regex_provider.compile(
499    r"(alert|pass|drop|reject|rejectsrc|rejectdst|rejectboth)",
500)
501_PROTOCOL_REGEX = _regex_provider.compile(r"[a-z0-3\-]+")
502_ADDR_REGEX = _regex_provider.compile(r"[a-zA-Z0-9\$_\!\[\],\s/\.]+")
503_PORT_REGEX = _regex_provider.compile(r"[a-zA-Z0-9\$_\!\[\],\s:]+")
504_DIRECTION_REGEX = _regex_provider.compile(r"(\->|<>)")
505HEADER_REGEX = _regex_provider.compile(
506    rf"{_ACTION_REGEX.pattern}\s*{_PROTOCOL_REGEX.pattern}\s*{_ADDR_REGEX.pattern}\s*{_PORT_REGEX.pattern}\s*{_DIRECTION_REGEX.pattern}\s*{_ADDR_REGEX.pattern}\s*{_PORT_REGEX.pattern}",
507)
508_OPTION_REGEX = _regex_provider.compile(
509    r"[a-z\-\._]+\s*(:(\s*([0-9]+|.+)\s*\,?\s*)+)?;"
510)
511_BODY_REGEX = _regex_provider.compile(rf"\((\s*{_OPTION_REGEX.pattern}\s*)*\)")
512_RULE_REGEX = _regex_provider.compile(
513    rf"^(\s*#)?\s*{HEADER_REGEX.pattern}\s*{_BODY_REGEX.pattern}\s*(#.*)?$",
514)
515
516
[docs] 517def get_regex_provider(): # noqa: ANN201 518 """Returns the regex provider to be used. 519 520 If `regex` is installed, it will return that module. 521 Otherwise, it will return the `re` module instead. 522 """ 523 return _regex_provider
524 525 526@lru_cache(maxsize=LRU_CACHE_SIZE) 527def __escape_regex(s: str) -> str: 528 # Escape the escape character first 529 s = s.replace("\\", "\\\\") 530 531 # Then escape all other characters 532 # . ^ $ * + ? { } [ ] \ | ( ) 533 s = s.replace(".", "\\.") 534 s = s.replace("^", "\\^") 535 s = s.replace("$", "\\$") 536 s = s.replace("*", "\\*") 537 s = s.replace("+", "\\+") 538 s = s.replace("?", "\\?") 539 s = s.replace("{", "\\{") 540 s = s.replace("}", "\\}") 541 s = s.replace("[", "\\[") 542 s = s.replace("]", "\\]") 543 s = s.replace("|", "\\|") 544 s = s.replace("(", "\\(") 545 s = s.replace(")", "\\)") 546 547 return s # noqa: RET504 548 549
[docs] 550def get_options_regex(options: Iterable[str]) -> _regex_provider.Pattern: 551 """Returns a regular expression that can match any of the provided options.""" 552 return __get_options_regex(tuple(sorted(options)))
553 554 555@lru_cache(maxsize=LRU_CACHE_SIZE) 556def __get_options_regex(options: Sequence[str]) -> _regex_provider.Pattern: 557 return _regex_provider.compile( 558 "(" + "|".join([__escape_regex(option) for option in options]) + ")", 559 ) 560 561 562def __is_group(entry: str) -> bool: 563 if _GROUP_REGEX.match(entry) is None: 564 return False 565 566 return True 567 568
[docs] 569def get_rule_group_entries(group: str) -> Sequence[str]: 570 """Returns a list of entries in a group.""" 571 stripped_group = group.strip() 572 573 if not __is_group(stripped_group): 574 return [stripped_group] 575 576 match = _GROUP_REGEX.match(stripped_group) 577 assert match is not None 578 negated = match.group(1) == "!" 579 580 entries = [] 581 for entry in match.group(2).split(","): 582 stripped_entry = entry.strip() 583 if __is_group(stripped_entry): 584 entries += get_rule_group_entries(stripped_entry) 585 else: 586 entries.append(stripped_entry) 587 588 if negated: 589 entries = ["!" + entry for entry in entries] 590 591 return entries
592 593
[docs] 594def get_variable_groups(value: str) -> Sequence[str]: 595 """Returns a list of variable groups such as $HTTP_SERVERS in a variable.""" 596 return __get_variable_groups(value)
597 598 599@lru_cache(maxsize=LRU_CACHE_SIZE) 600def __get_variable_groups(value: str) -> Sequence[str]: 601 entries = get_rule_group_entries(value) 602 variable_groups = [] 603 for entry in entries: 604 match = _VARIABLE_GROUP_REGEX.match(entry) 605 if match is not None: 606 variable_groups.append(match.group(1)) 607 608 return variable_groups 609 610
[docs] 611def get_rule_body(rule: idstools.rule.Rule) -> str: 612 """Returns the body of a rule.""" 613 return __get_rule_body(rule)
614 615 616@lru_cache(maxsize=LRU_CACHE_SIZE) 617def __get_rule_body(rule: idstools.rule.Rule) -> str: 618 match = _BODY_REGEX.search(rule["raw"]) 619 620 if match is None: 621 msg = f"Could not extract rule body from rule: {rule['raw']}" 622 _logger.critical(msg) 623 raise RuntimeError(msg) 624 625 return match.group(0) 626 627
[docs] 628def is_valid_rule(rule: idstools.rule.Rule) -> bool: 629 """Checks if a rule is valid.""" 630 if _RULE_REGEX.match(rule["raw"]) is None: 631 return False 632 633 return True