Source code for suricata_check.utils.regex

  1"""The `suricata_check.utils.regex` module contains regular expressions for matching various parts of rules."""
  2
  3import logging
  4from collections.abc import Iterable, Sequence
  5from functools import lru_cache
  6
  7from suricata_check.utils.regex_provider import Pattern
  8from suricata_check.utils.regex_provider import (
  9    get_regex_provider as _get_regex_provider,
 10)
 11from suricata_check.utils.rule import Rule
 12
 13_logger = logging.getLogger(__name__)
 14_regex_provider = _get_regex_provider()
 15
 16LRU_CACHE_SIZE = 10
 17
 18ADDRESS_GROUPS = (
 19    "HOME_NET",
 20    "EXTERNAL_NET",
 21    "HTTP_SERVERS",
 22    "SMTP_SERVERS",
 23    "SQL_SERVERS",
 24    "DNS_SERVERS",
 25    "TELNET_SERVERS",
 26    "AIM_SERVERS",
 27    "DC_SERVERS",
 28    "DNP3_SERVER",
 29    "DNP3_CLIENT",
 30    "MODBUS_CLIENT",
 31    "MODBUS_SERVER",
 32    "ENIP_CLIENT",
 33    "ENIP_SERVER",
 34)
 35
 36
 37PORT_GROUPS = (
 38    "HTTP_PORTS",
 39    "SHELLCODE_PORTS",
 40    "ORACLE_PORTS",
 41    "SSH_PORTS",
 42    "DNP3_PORTS",
 43    "MODBUS_PORTS",
 44    "FILE_DATA_PORTS",
 45    "FTP_PORTS",
 46    "GENEVE_PORTS",
 47    "VXLAN_PORTS",
 48    "TEREDO_PORTS",
 49)
 50
 51ALL_VARIABLES = ADDRESS_GROUPS + PORT_GROUPS
 52
 53CLASSTYPES = (
 54    "not-suspicious",
 55    "unknown",
 56    "bad-unknown",
 57    "attempted-recon",
 58    "successful-recon-limited",
 59    "successful-recon-largescale",
 60    "attempted-dos",
 61    "successful-dos",
 62    "attempted-user",
 63    "unsuccessful-user",
 64    "successful-user",
 65    "attempted-admin",
 66    "successful-admin",
 67    # NEW CLASSIFICATIONS
 68    "rpc-portmap-decode",
 69    "shellcode-detect",
 70    "string-detect",
 71    "suspicious-filename-detect",
 72    "suspicious-login",
 73    "system-call-detect",
 74    "tcp-connection",
 75    "trojan-activity",
 76    "unusual-client-port-connection",
 77    "network-scan",
 78    "denial-of-service",
 79    "non-standard-protocol",
 80    "protocol-command-decode",
 81    "web-application-activity",
 82    "web-application-attack",
 83    "misc-activity",
 84    "misc-attack",
 85    "icmp-event",
 86    "inappropriate-content",
 87    "policy-violation",
 88    "default-login-attempt",
 89    # Update
 90    "targeted-activity",
 91    "exploit-kit",
 92    "external-ip-check",
 93    "domain-c2",
 94    "pup-activity",
 95    "credential-theft",
 96    "social-engineering",
 97    "coin-mining",
 98    "command-and-control",
 99)
100
101NON_FUNCTIONAL_KEYWORDS = (
102    "classtype",
103    "gid",
104    "metadata",
105    "msg",
106    "priority",
107    "reference",
108    "rev",
109    "sid",
110    "target",
111)
112
113FLOW_KEYWORDS = (
114    "flow",
115    "flow.age",
116    "flowint",
117)
118
119STREAM_KEYWORDS = ("stream_size",)
120
121FLOW_STREAM_KEYWORDS: Sequence[str] = tuple(
122    sorted(set(FLOW_KEYWORDS).union(STREAM_KEYWORDS)),
123)
124
125STICKY_BUFFER_NAMING = {
126    "dce_iface": "dce.iface",
127    "dce_opnum": "dce.opnum",
128    "dce_stub_data": "dce.stub_data",
129    "dns_query": "dns.query",
130    "file_data": "file.data",
131    "http_accept": "http.accept",
132    "http_accept_enc": "http.accept_enc",
133    "http_accept_lang": "http.accept_lang",
134    "http_client_body": "http.request_body",
135    "http_connection": "http.connection",
136    "http_content_len": "http.content_len",
137    "http_content_type": "http.content_type",
138    "http_cookie": "http.cookie",
139    "http_header": "http.header",
140    "http_header_names": "http.header_names",
141    "http_host": "http.host",
142    "http_method": "http.method",
143    "http_protocol": "http.protocol",
144    "http_raw_header": "http.header.raw",
145    "http_raw_host": "http.host.raw",
146    "http_raw_uri": "http.uri.raw",
147    "http_referer": "http.referer",
148    "http_request_line": "http.request_line",
149    "http_response_line": "http.response_line",
150    "http_server_body": "http.response_body",
151    "http_start": "http.start",
152    "http_stat_code": "http.stat_code",
153    "http_stat_msg": "http.stat_msg",
154    "http_uri": "http.uri",
155    "http_user_agent": "http.user_agent",
156    "ja3_hash": "ja3.hash",
157    "tls_cert_fingerprint": "tls.cert_fingerprint",
158    "tls_cert_issuer": "tls.cert_issuer",
159    "tls_cert_serial": "tls.cert_serial",
160    "tls_cert_subject": "tls.cert_subject",
161    "tls_sni": "tls.sni",
162}
163
164BASE64_BUFFER_KEYWORDS = ("base64_data",)
165
166OTHER_BUFFERS = (
167    "http.location",
168    "http.request_header",
169    "http.response_header",
170    "http.server",
171    "ja3s.hash",
172    "tls.certs",
173    "tls.version",
174)
175
176assert set(OTHER_BUFFERS).isdisjoint(
177    set(STICKY_BUFFER_NAMING.keys()).union(STICKY_BUFFER_NAMING.values()),
178)
179
180BUFFER_KEYWORDS: Sequence[str] = tuple(
181    sorted(
182        set(STICKY_BUFFER_NAMING.keys())
183        .union(STICKY_BUFFER_NAMING.values())
184        .union(BASE64_BUFFER_KEYWORDS)
185        .union(OTHER_BUFFERS),
186    ),
187)
188
189SIZE_KEYWORDS = (
190    "bsize",
191    "dsize",
192)
193
194TRANSFORMATION_KEYWORDS = (
195    "compress_whitespace",
196    "dotprefix",
197    "header_lowercase",
198    "pcrexform",
199    "strip_pseudo_headers",
200    "strip_whitespace",
201    "to_lowercase",
202    "to_md5",
203    "to_sha1",
204    "to_sha256",
205    "to_uppercase",
206    "url_decode",
207    "xor",
208)
209
210BASE64_TRANSFORMATION_KEYWORDS = ("base64_decode",)
211
212ALL_TRANSFORMATION_KEYWORDS: Sequence[str] = tuple(
213    sorted(set(TRANSFORMATION_KEYWORDS).union(BASE64_TRANSFORMATION_KEYWORDS)),
214)
215
216CONTENT_KEYWORDS = ("content", "pcre")
217
218POINTER_MOVEMENT_KEYWORDS = (
219    "depth",
220    "distance",
221    "offset",
222    "pkt_data",
223    "within",
224)
225
226COMPATIBILITY_MODIFIER_KEYWORDS = ("rawbytes",)
227
228MODIFIER_KEYWORDS = ("nocase",)
229
230ALL_MODIFIER_KEYWORDS: Sequence[str] = tuple(
231    sorted(set(COMPATIBILITY_MODIFIER_KEYWORDS).union(MODIFIER_KEYWORDS)),
232)
233
234MATCH_LOCATION_KEYWORDS = (
235    "endswith",
236    "startswith",
237)
238
239OTHER_PAYLOAD_KEYWORDS = (
240    "byte_extract",
241    "byte_jump",
242    "byte_test",
243    "isdataat",
244)
245
246IP_SPECIFIC_KEYWORDS = (
247    "ip_proto",
248    "ttl",
249)
250
251TCP_SPECIFIC_KEYWORDS = (
252    "ack",
253    "flags",  # This is a duplicate of tcp.flags
254    "seq",
255    "tcp.flags",
256    "tcp.hdr",
257)
258
259UDP_SPECIFIC_KEYWORDS = ("udp.hdr",)
260
261ICMP_SPECIFIC_KEYWORDS = (
262    "fragbits",
263    "icode",
264    "icmp_id",
265    "icmp_seq",
266    "itype",
267)
268
269HTTP_SPECIFIC_KEYWORDS = (
270    "file.data",
271    "file_data",
272    "http.accept",
273    "http.accept_enc",
274    "http.accept_lang",
275    "http.connection",
276    "http.content_len",
277    "http.content_len",
278    "http.content_type",
279    "http.cookie",
280    "http.header",
281    "http.header_names",
282    "http.header.raw",
283    "http.host",
284    "http.host.raw",
285    "http.location",
286    "http.method",
287    "http.protocol",
288    "http.referer",
289    "http.request_body",
290    "http.request_header",
291    "http.request_line",
292    "http.response_body",
293    "http.response_header",
294    "http.response_line",
295    "http.server",
296    "http.start",
297    "http.stat_code",
298    "http.stat_code",
299    "http.stat_msg",
300    "http.uri",
301    "http.uri.raw",
302    "http.user_agent",
303    "http_accept",
304    "http_accept_enc",
305    "http_accept_lang",
306    "http_connection",
307    "http_content_len",
308    "http_content_len",
309    "http_content_type",
310    "http_cookie",
311    "http_header",
312    "http_header_names",
313    "http_host",
314    "http_location",
315    "http_method",
316    "http_protocol",
317    "http_raw_header",
318    "http_raw_host",
319    "http_raw_uri",
320    "http_referer",
321    "http_request_line",
322    "http_response_line",
323    "http_server_body",
324    "http_start",
325    "http_stat_code",
326    "http_stat_msg",
327    "http_uri",
328    "http_user_agent",
329    "urilen",
330)
331
332DNS_SPECIFIC_KEYWORDS = (
333    "dns.opcode",
334    "dns.query",
335    "dns_query",
336)
337
338TLS_SPECIFIC_KEYWORDS = (
339    "ssl_version",
340    "ssl_state",
341    "tls.cert_fingerprint",
342    "tls.cert_issuer",
343    "tls.cert_serial",
344    "tls.cert_subject",
345    "tls.certs",
346    "tls.sni",
347    "tls.version",
348    "tls_cert_fingerprint",
349    "tls_cert_issuer",
350    "tls_cert_serial",
351    "tls_cert_subject",
352    "tls_sni",
353)
354
355SSH_SPECIFIC_KEYWORDS = ("ssh_proto",)
356
357JA3_JA4_KEYWORDS = (
358    "ja3.hash",
359    "ja3_hash",
360    "ja3.string",
361    "ja3s.hash",
362)
363
364DCERPC_SPECIFIC_KEYWORDS = (
365    "dce.iface",
366    "dce.opnum",
367    "dce.stub_data",
368    "dce_iface",
369    "dce_opnum",
370    "dce_stub_data",
371)
372
373FTP_KEYWORDS = ("ftpbounce", "ftpdata_command")
374
375APP_LAYER_KEYWORDS = (
376    "app-layer-event",
377    "app-layer-protocol",
378)
379
380PROTOCOL_SPECIFIC_KEYWORDS = tuple(
381    sorted(
382        set().union(
383            *(
384                IP_SPECIFIC_KEYWORDS,
385                TCP_SPECIFIC_KEYWORDS,
386                UDP_SPECIFIC_KEYWORDS,
387                ICMP_SPECIFIC_KEYWORDS,
388                HTTP_SPECIFIC_KEYWORDS,
389                DNS_SPECIFIC_KEYWORDS,
390                TLS_SPECIFIC_KEYWORDS,
391                SSH_SPECIFIC_KEYWORDS,
392                DCERPC_SPECIFIC_KEYWORDS,
393                JA3_JA4_KEYWORDS,
394                FTP_KEYWORDS,
395                APP_LAYER_KEYWORDS,
396            ),
397        ),
398    ),
399)
400
401PERFORMANCE_DETECTION_OPTIONS = ("fast_pattern",)
402
403LUA_KEYWORDS = ("lua", "luajit")
404
405ALL_DETECTION_KEYWORDS: Sequence[str] = tuple(
406    sorted(
407        set().union(
408            *(
409                BUFFER_KEYWORDS,
410                SIZE_KEYWORDS,
411                ALL_TRANSFORMATION_KEYWORDS,
412                CONTENT_KEYWORDS,
413                POINTER_MOVEMENT_KEYWORDS,
414                ALL_MODIFIER_KEYWORDS,
415                MATCH_LOCATION_KEYWORDS,
416                OTHER_PAYLOAD_KEYWORDS,
417                PROTOCOL_SPECIFIC_KEYWORDS,
418                PERFORMANCE_DETECTION_OPTIONS,
419                LUA_KEYWORDS,
420            ),
421        ),
422    ),
423)
424
425THRESHOLD_KEYWORDS = (
426    "detection_filter",
427    "threshold",
428)
429
430STATEFUL_KEYWORDS = ("flowbits", "flowint", "xbits")
431
432OTHER_KEYWORDS = ("noalert", "tag")
433
434ALL_KEYWORDS = tuple(
435    sorted(
436        set().union(
437            *(
438                NON_FUNCTIONAL_KEYWORDS,
439                FLOW_KEYWORDS,
440                STREAM_KEYWORDS,
441                ALL_DETECTION_KEYWORDS,
442                THRESHOLD_KEYWORDS,
443                STATEFUL_KEYWORDS,
444                OTHER_KEYWORDS,
445            ),
446        ),
447    ),
448)
449
450METADATA_DATE_KEYWORDS = (
451    "created_at",
452    "reviewed_at",
453    "updated_at",
454)
455
456METADATA_NON_DATE_KEYWORDS = (
457    "affected_product",
458    "attack_target",
459    "confidence",
460    "cve",
461    "deprecation_reason",
462    "deployment",
463    "former_category",
464    "former_sid",
465    "impact_flag",
466    "malware_family",
467    "mitre_tactic_id",
468    "mitre_tactic_name",
469    "mitre_technique_id",
470    "mitre_technique_name",
471    "performance_impact",
472    "policy",
473    "ruleset",
474    "signature_severity",
475    "tag",
476    "tls_state",
477    "first_seen",
478    "confidence_level",
479)
480
481ALL_METADATA_KEYWORDS = tuple(
482    sorted(set(METADATA_DATE_KEYWORDS).union(METADATA_NON_DATE_KEYWORDS)),
483)
484
485IP_ADDRESS_REGEX = _regex_provider.compile(r"^.*\d+\.\d+\.\d+\.\d+.*$")
486
487_GROUP_REGEX = _regex_provider.compile(r"^(!)?\[(.*)\]$")
488_VARIABLE_GROUP_REGEX = _regex_provider.compile(r"^!?\$([A-Z\_]+)$")
489
490_ACTION_REGEX = _regex_provider.compile(
491    r"(alert|pass|drop|reject|rejectsrc|rejectdst|rejectboth)",
492)
493_PROTOCOL_REGEX = _regex_provider.compile(r"[a-z0-3\-]+")
494_ADDR_REGEX = _regex_provider.compile(r"[a-zA-Z0-9\$_\!\[\],\s/\.]+")
495_PORT_REGEX = _regex_provider.compile(r"[a-zA-Z0-9\$_\!\[\],\s:]+")
496_DIRECTION_REGEX = _regex_provider.compile(r"(\->|<>)")
497HEADER_REGEX = _regex_provider.compile(
498    rf"{_ACTION_REGEX.pattern}\s*{_PROTOCOL_REGEX.pattern}\s*{_ADDR_REGEX.pattern}\s*{_PORT_REGEX.pattern}\s*{_DIRECTION_REGEX.pattern}\s*{_ADDR_REGEX.pattern}\s*{_PORT_REGEX.pattern}",
499)
500_OPTION_REGEX = _regex_provider.compile(
501    r"[a-z\-\._]+\s*(:(\s*([0-9]+|.+)\s*\,?\s*)+)?;",
502)
503_BODY_REGEX = _regex_provider.compile(rf"\((\s*{_OPTION_REGEX.pattern}\s*)*\)")
504_RULE_REGEX = _regex_provider.compile(
505    rf"^(\s*#)?\s*{HEADER_REGEX.pattern}\s*{_BODY_REGEX.pattern}\s*(#.*)?$",
506)
507
508
509@lru_cache(maxsize=LRU_CACHE_SIZE)
510def __escape_regex(s: str) -> str:
511    # Escape the escape character first
512    s = s.replace("\\", "\\\\")
513
514    # Then escape all other characters
515    # . ^ $ * + ? { } [ ] \ | ( )
516    s = s.replace(".", "\\.")
517    s = s.replace("^", "\\^")
518    s = s.replace("$", "\\$")
519    s = s.replace("*", "\\*")
520    s = s.replace("+", "\\+")
521    s = s.replace("?", "\\?")
522    s = s.replace("{", "\\{")
523    s = s.replace("}", "\\}")
524    s = s.replace("[", "\\[")
525    s = s.replace("]", "\\]")
526    s = s.replace("|", "\\|")
527    s = s.replace("(", "\\(")
528    s = s.replace(")", "\\)")
529
530    return s  # noqa: RET504
531
532
[docs] 533def get_options_regex(options: Iterable[str]) -> Pattern: 534 """Returns a regular expression that can match any of the provided options.""" 535 return __get_options_regex(tuple(sorted(options)))
536 537 538@lru_cache(maxsize=LRU_CACHE_SIZE) 539def __get_options_regex(options: Sequence[str]) -> Pattern: 540 return _regex_provider.compile( 541 "(" + "|".join([__escape_regex(option) for option in options]) + ")", 542 ) 543 544 545def __is_group(entry: str) -> bool: 546 if _GROUP_REGEX.match(entry) is None: 547 return False 548 549 return True 550 551
[docs] 552def get_rule_group_entries(group: str) -> Sequence[str]: 553 """Returns a list of entries in a group.""" 554 stripped_group = group.strip() 555 556 if not __is_group(stripped_group): 557 return [stripped_group] 558 559 match = _GROUP_REGEX.match(stripped_group) 560 assert match is not None 561 negated = match.group(1) == "!" 562 563 entries = [] 564 for entry in match.group(2).split(","): 565 stripped_entry = entry.strip() 566 if __is_group(stripped_entry): 567 entries += get_rule_group_entries(stripped_entry) 568 else: 569 entries.append(stripped_entry) 570 571 if negated: 572 entries = ["!" + entry for entry in entries] 573 574 return entries
575 576
[docs] 577def get_variable_groups(value: str) -> Sequence[str]: 578 """Returns a list of variable groups such as $HTTP_SERVERS in a variable.""" 579 return __get_variable_groups(value)
580 581 582@lru_cache(maxsize=LRU_CACHE_SIZE) 583def __get_variable_groups(value: str) -> Sequence[str]: 584 entries = get_rule_group_entries(value) 585 variable_groups = [] 586 for entry in entries: 587 match = _VARIABLE_GROUP_REGEX.match(entry) 588 if match is not None: 589 variable_groups.append(match.group(1)) 590 591 return variable_groups 592 593
[docs] 594def get_rule_body(rule: Rule) -> str: 595 """Returns the body of a rule.""" 596 return __get_rule_body(rule)
597 598 599@lru_cache(maxsize=LRU_CACHE_SIZE) 600def __get_rule_body(rule: Rule) -> str: 601 match = _BODY_REGEX.search(rule.raw) 602 603 if match is None: 604 msg = f"Could not extract rule body from rule: {rule.raw}" 605 _logger.critical(msg) 606 raise RuntimeError(msg) 607 608 return match.group(0) 609 610
[docs] 611def is_valid_rule(rule: Rule) -> bool: 612 """Checks if a rule is valid.""" 613 if _RULE_REGEX.match(rule.raw) is None: 614 return False 615 616 return True