(g)ULP!
Loading...
Searching...
No Matches
helpers.py
Go to the documentation of this file.
1import json
2import logging
3
4import muty.file
5from sigma.processing.conditions import LogsourceCondition
6from sigma.processing.pipeline import ProcessingItem, ProcessingPipeline
7from sigma.processing.transformations import FieldMappingTransformation
8
9from gulp.api.mapping.models import FieldMappingEntry, GulpMapping
10from gulp.utils import logger
11
12async def _get_mappings_internal(mapping_file_path: str) -> dict:
13 """Check for file existance and return json file as dict
14
15 Args:
16 mapping_file_path (str): file to load mappings from
17
18 Raises:
19 FileNotFoundError: file does not exist
20
21 Returns:
22 dict: the loaded dictionary
23 """
24 l = logger()
25 if l is None:
26 l = logging.getLogger()
27
28 # read mappings from file path, if it exists
29 exists = await muty.file.exists_async(mapping_file_path)
30 if not exists:
31 l.warning("mapping file not found: %s" % (mapping_file_path))
32 raise FileNotFoundError("mapping file not found: %s" % (mapping_file_path))
33
34 # load the mapping file
35 l.debug("loading mapping file: %s" % (mapping_file_path))
36 buf = await muty.file.read_file_async(mapping_file_path)
37 js = json.loads(buf)
38 mappings = js["mappings"]
39 return mappings
40
41async def get_mappings_from_file(mapping_file_path:str) -> list[GulpMapping]:
42 """
43 Retrieve all mappings from a file.
44
45 Args:
46 mapping_file_path (str): The path to the mapping file.
47
48 Returns:
49 list[GulpMapping]: the mappings
50
51 Raises:
52 FileNotFoundError: if the mapping file does not exist
53 ValueError: if no mapping_id are found
54 """
55 mappings=[]
56
57 l = logger()
58 if l is None:
59 l = logging.getLogger()
60
61 maps = await _get_mappings_internal(mapping_file_path)
62
63 for mapping in maps:
64 mappings.append(GulpMapping.from_dict(mapping))
65
66 return mappings
67
69 mapping_file_path: str, mapping_id: str = None
70) -> GulpMapping:
71 """
72 Retrieve the mapping from a file.
73
74 Args:
75 mapping_file_path (str): The path to the mapping file.
76 mapping_id (str): The mapping id to retrieve from the file (default: None, return first mapping only).
77
78 Returns:
79 GulpMapping: the mapping
80
81 Raises:
82 FileNotFoundError: if the mapping file does not exist
83 ValueError: if the mapping_id is not found in the mapping file
84
85 NOTE: the following is an example of the mapping file format to indicate different mapping options and styles (single, multiple, variable mapping)
86
87 ~~~json
88 {
89 // an array of mapppings and related options
90 "mappings": [{
91 "fields": {
92 // source field
93 "EntryNumber": {
94 // map to single string
95 "map_to": "event.sequence"
96 }
97 },
98 "options": {
99 // set "event.code" to "record" if this mapping is used
100 "event_code": "record",
101 // set "agent.type" to "mftecmd" if this mapping is used
102 "agent_type": "mftecmd",
103 // this is to identify this mapping in the whole file
104 "mapping_id": "record"
105 }
106 },
107 {
108 "fields": {
109 "SourceFile": {
110 // multiple mapping
111 // SourceFile will be mapped to both "file.path" and "file.name"
112 "map_to": ["file.path", "file.name"]
113 }
114 },
115 "options": {
116 "mapping_id": "boot"
117 }
118 },
119 {
120 "fields": {
121 "Name": {
122 // variable mapping
123 // Name will be mapped to "user.name" and "service.name"
124 "map_to": [
125 ["service", "security", "user.name"],
126 ["service", "security", "service.name"]
127 ],
128 "is_variable_mapping": true
129 }
130 },
131 "options": {
132 "mapping_id": "j"
133 }
134 }
135 ]}
136 ~~~
137 """
138 l = logger()
139 if l is None:
140 l = logging.getLogger()
141
142 mappings = await _get_mappings_internal(mapping_file_path)
143 if mapping_id is None:
144 l.warning("no mapping_id set, returning first element: %s" % (mappings[0]))
145 m = GulpMapping.from_dict(mappings[0])
146 return m
147
148 # get specific mapping
149 for m in mappings:
150 options = m.get("options", None)
151 if options is not None:
152 if options.get("mapping_id", None) == mapping_id:
153 l.debug("mapping found for mapping_id=%s: %s" % (mapping_id, m))
154 return GulpMapping.from_dict(m)
155 raise ValueError("mapping_id not found in the mapping file: %s" % (mapping_id))
156
157
159 pipeline: ProcessingPipeline = None,
160 mapping_file_path: str = None,
161 mapping_id: str = None,
162 product: str = None,
163) -> GulpMapping:
164 """
165 Retrieves an enriched mapping by merging a pipeline mapping and a file mapping.
166
167 NOTE: This is to be used solely by the INGESTION plugins.
168
169 Args:
170 pipeline (ProcessingPipeline, optional): The processing pipeline containing the mapping. Defaults to None.
171 mapping_file_path (str, optional): The file path of the mapping file. Defaults to None.
172 mapping_id (str, optional): The ID of the mapping. Defaults to None.
173
174 Returns:
175 GulpMapping: The enriched mapping, may be an empty GulpMapping if i.e. both pipeline and mapping_file_path are not provided.
176
177 """
178 l = logger()
179 if l is None:
180 l = logging.getLogger()
181
182 pipeline_mapping: GulpMapping = None
183 file_mapping: GulpMapping = None
184
185 if pipeline is None and mapping_file_path is None:
186 # return an empty mapping
187 return GulpMapping.from_dict({})
188
189 if mapping_file_path is not None:
190 # get mapping from file
191 try:
192 file_mapping = await get_mapping_from_file(
193 mapping_file_path, mapping_id=mapping_id
194 )
195 except Exception as ex:
196 l.exception(
197 "error loading mapping file: %s, ex=%s" % (mapping_file_path, ex)
198 )
199 if pipeline is None:
200 return GulpMapping.from_dict({})
201
202 if pipeline is not None:
203 # get mapping from pipeline, convert each FieldMappingTransformation item to GulpMapping
204 l.debug("turning provided pipeline to GulpMapping ...")
205 d: dict = {
206 "fields": {},
207 "options": (
208 file_mapping.options.to_dict()
209 if file_mapping is not None and file_mapping.options is not None
210 else None
211 ),
212 }
213 for item in pipeline.items:
214 if isinstance(item.transformation, FieldMappingTransformation):
215 for k, v in item.transformation.mapping.items():
216 dd = {"map_to": v}
217 d["fields"][k] = dd
218
219 pipeline_mapping = GulpMapping.from_dict(d)
220
221 if pipeline_mapping is None:
222 # return mapping from file
223 l.warning(
224 "no pipeline provided, returning file mapping: %s"
225 % (json.dumps(file_mapping.to_dict(), indent=2))
226 )
227
228 return file_mapping
229
230 if file_mapping is None:
231 l.warning(
232 "no file mapping provided, returning pipeline mapping: %s"
233 % (json.dumps(pipeline_mapping.to_dict(), indent=2))
234 )
235 return pipeline_mapping
236
237 # merge mapping into pipeline_mapping
238 l.debug("merging file mapping into pipeline mapping ...")
239 # l.debug("pipeline_mapping PRE=\n%s" % (json.dumps(pipeline_mapping.to_dict(), indent=2)))
240 # l.debug("file_mapping=%s" % (json.dumps(file_mapping.to_dict(), indent=2)))
241 for m, v in file_mapping.fields.items():
242 if m not in pipeline_mapping.fields.keys():
243 # this seems a pylint issue: https://github.com/pylint-dev/pylint/issues/2767 and related
244 # pylint: disable=unsupported-assignment-operation
245 pipeline_mapping.fields[m] = v
246 else:
247 # merge
248 pipeline_v: FieldMappingEntry = pipeline_mapping[m]
249 file_v: FieldMappingEntry = v
250 if file_v.is_variable_mapping:
251 # "map_to" is a list[list[str]]
252 # where each member of the inner list is a variable mapping with 3 strings(logsourcename, logsource, mapped)
253 # since we're calling this for ingestion only, we simply convert the affected "map_to" to a multiple string mapping, to map the field to multiple values
254 real_map_to = []
255 for vm in file_v.map_to:
256 # logsrc_field = vm[0]
257 # logsrc = vm[1]
258 mapped = vm[2]
259 real_map_to.append(mapped)
260 file_v.map_to = real_map_to
261
262 # depending if the source (file) and destination (pipeline) mapping are strings or lists, we need to merge them accordingly
263 if file_v.map_to is not None:
264 if isinstance(pipeline_v.map_to, list) and isinstance(file_v.map_to, list):
265 pipeline_v.map_to.extend(file_v.map_to)
266 elif isinstance(pipeline_v.map_to, str) and isinstance(file_v.map_to, str):
267 pipeline_v.map_to = [pipeline_v.map_to, file_v.map_to]
268 elif isinstance(pipeline_v.map_to, list) and isinstance(file_v.map_to, str):
269 pipeline_v.map_to.append(file_v.map_to)
270 elif isinstance(pipeline_v.map_to, str) and isinstance(file_v.map_to, list):
271 file_v.map_to.append(pipeline_v.map_to)
272 pipeline_v.map_to = file_v.map_to
273
274 # set other options from the file mapping
275 pipeline_v.is_timestamp = file_v.is_timestamp
276 pipeline_v.event_code = file_v.event_code
277
278 # l.debug("MERGED mappings: %s" % (json.dumps(pipeline_mapping.to_dict(), indent=2)))
279 merged_mapping = pipeline_mapping
280 return merged_mapping
281
282
284 pipeline: ProcessingPipeline = None,
285 mapping_file_path: str = None,
286 mapping_id: str = None,
287 product: str = None,
288 **kwargs,
289) -> ProcessingPipeline:
290 """
291 Returns an enriched pysigma processing pipeline (base ProcessingPipeline mapping + file mapping) to be used to convert SIGMA RULES to ELASTICSEARCH DSL QUERY.
292
293 NOTE: This is to be used solely by the SIGMA plugins.
294
295 Args:
296 pipeline (ProcessingPipeline): optional, the base processing pipeline to enrich (default: None, empty pipeline)
297 mapping_file_path (str): optional, the path to the mapping file to load mappings from (default: None)
298 mapping_id (str): optional, the mapping id to retrieve from the mapping file (default: None, first mapping only)
299 product (str): optional, the product name to set in the resulting pipeline LogSourceCondition array (default: None, inferred from the file name if mapping_file_path is provided: /path/to/product.json -> product)
300 kwargs: additional keyword arguments
301 Returns:
302 ProcessingPipeline: The enriched processing pipeline.
303 if no mapping_file_path, the original pipeline (or an empty pipeline if base is None) is returned.
304 """
305 l = logger()
306 if l is None:
307 l = logging.getLogger()
308
309 if pipeline is None:
310 # use default pipeline as base
311 l.debug("no pipeline provided, using empty pipeline.")
312 pipeline = ProcessingPipeline()
313
314 if mapping_file_path is None:
315 # no file mapping provided, return the original pipeline
316 l.debug("no file mapping provided, using just the provided pipeline.")
317 return pipeline
318
319 try:
320 mapping = await get_mapping_from_file(mapping_file_path, mapping_id=mapping_id)
321 product = mapping_file_path.split("/")[-1].split(".")[0]
322 except:
323 l.exception("error loading mapping file: %s" % (mapping_file_path))
324 return pipeline
325
326 # enrich pipeline
327
328 # collect standard (single or multiple string) and variable mapping from FILE
329 std_mapping = {}
330 var_mapping = {}
331 for k, v in mapping.fields.items():
332 vv: FieldMappingEntry = v
333 if isinstance(vv.map_to, str):
334 # single, map k to vv.map_to
335 std_mapping[k] = [vv.map_to]
336 elif isinstance(vv.map_to, list):
337 if vv.is_variable_mapping:
338 # variable mapping, map k to vv.map_to which is a list of lists [logsource_field_name, logsource, mapped_field]
339 var_mapping[k] = vv.map_to
340 else:
341 # multiple, map k to vv.map_to which is a list of string
342 std_mapping[k] = vv.map_to
343
344 p_items: list[ProcessingItem] = []
345
346 # create processing items for each std_mapping
347 if len(std_mapping) > 0:
348 # use product only for rule conditions
349 rule_conditions: list[LogsourceCondition] = []
350 if product is not None:
351 rule_conditions = [LogsourceCondition(product=product)]
352
353 for k, v in std_mapping.items():
354 p = ProcessingItem(
355 identifier="gulp-field_mapping",
356 transformation=FieldMappingTransformation(std_mapping),
357 rule_conditions=rule_conditions,
358 )
359 p_items.append(p)
360
361 # create processing items for each variable mapping
362 if len(var_mapping) > 0:
363 # we will use the both product and logsource field/name for rule conditions
364 for k, v in var_mapping.items():
365 for m in v:
366 logsrc_field = m[0]
367 logsrc = m[1]
368 mapped = m[2]
369 p = ProcessingItem(
370 identifier="gulp-variable_field_mapping-%s-%s-%s"
371 % (k, logsrc_field, logsrc),
372 transformation=FieldMappingTransformation({k: mapped}),
373 rule_conditions=[
374 LogsourceCondition(
375 **{
376 "product": product,
377 logsrc_field: logsrc,
378 }
379 ),
380 ],
381 )
382 p_items.append(p)
383
384 # return the extended pipeline
385 pipeline.items.extend(p_items)
386 return pipeline
GulpMapping get_mapping_from_file(str mapping_file_path, str mapping_id=None)
Definition helpers.py:70
GulpMapping get_enriched_mapping_for_ingestion(ProcessingPipeline pipeline=None, str mapping_file_path=None, str mapping_id=None, str product=None)
Definition helpers.py:163
ProcessingPipeline get_enriched_pipeline(ProcessingPipeline pipeline=None, str mapping_file_path=None, str mapping_id=None, str product=None, **kwargs)
Definition helpers.py:289
list[GulpMapping] get_mappings_from_file(str mapping_file_path)
Definition helpers.py:41
dict _get_mappings_internal(str mapping_file_path)
Definition helpers.py:12