Time series anomaly detection for total volume of traffic
Id | 06a9b845-6a95-4432-a78b-83919b28c375 |
Rulename | Time series anomaly detection for total volume of traffic |
Description | Identifies anamalous spikes in network traffic logs as compared to baseline or normal historical patterns. The query leverages a KQL built-in anomaly detection algorithm to find large deviations from baseline patterns. Sudden increases in network traffic volume may be an indication of data exfiltration attempts and should be investigated. The higher the score, the further it is from the baseline value. The output is aggregated to provide summary view of unique source IP to destination IP address and port traffic observed in the flagged anomaly hour. The source IP addresses which were sending less than percentotalthreshold of the total traffic have been exluded whose value can be adjusted as needed . You may have to run queries for individual source IP addresses from SourceIPlist to determine if anything looks suspicious |
Severity | Medium |
Tactics | Exfiltration |
Techniques | T1030 |
Required data connectors | Barracuda CEF CheckPoint CiscoASA F5 Fortinet PaloAltoNetworks |
Kind | Scheduled |
Query frequency | 1d |
Query period | 14d |
Trigger threshold | 3 |
Trigger operator | gt |
Source Uri | https://github.com/Azure/Azure-Sentinel/blob/master/Detections/CommonSecurityLog/TimeSeriesAnomaly-MultiVendor_NetworkTraffic.yaml |
Version | 1.0.4 |
Arm template | 06a9b845-6a95-4432-a78b-83919b28c375.json |
let starttime = 14d;
let endtime = 1d;
let timeframe = 1h;
let scorethreshold = 5;
let percentotalthreshold = 50;
let TimeSeriesData = CommonSecurityLog
| where isnotempty(DestinationIP) and isnotempty(SourceIP)
| where TimeGenerated between (startofday(ago(starttime))..startofday(ago(endtime)))
| project TimeGenerated,SourceIP, DestinationIP, DeviceVendor
| make-series Total=count() on TimeGenerated from startofday(ago(starttime)) to startofday(ago(endtime)) step timeframe by DeviceVendor;
// Filtering specific records associated with spikes as outliers
let TimeSeriesAlerts=materialize(TimeSeriesData
| extend (anomalies, score, baseline) = series_decompose_anomalies(Total, scorethreshold, -1, 'linefit')
| mv-expand Total to typeof(double), TimeGenerated to typeof(datetime), anomalies to typeof(double),score to typeof(double), baseline to typeof(long)
| where anomalies > 0 | extend score = round(score,2), AnomalyHour = TimeGenerated
| project DeviceVendor,AnomalyHour, TimeGenerated, Total, baseline, anomalies, score);
let AnomalyHours = materialize(TimeSeriesAlerts | where TimeGenerated > ago(2d) | project TimeGenerated);
// Join anomalies with Base Data to popalate associated records for investigation - Results sorted by score in descending order
TimeSeriesAlerts
| where TimeGenerated > ago(2d)
| join (
CommonSecurityLog
| where isnotempty(DestinationIP) and isnotempty(SourceIP)
| where TimeGenerated > ago(2d)
| extend DateHour = bin(TimeGenerated, 1h) // create a new column and round to hour
| where DateHour in ((AnomalyHours)) //filter the dataset to only selected anomaly hours
| summarize HourlyCount = count(), TimeGeneratedMax = arg_max(TimeGenerated, *), DestinationIPlist = make_set(DestinationIP, 100), DestinationPortlist = make_set(DestinationPort, 100) by DeviceVendor, SourceIP, TimeGeneratedHour= bin(TimeGenerated, 1h)
| extend AnomalyHour = TimeGeneratedHour
) on AnomalyHour, DeviceVendor
| extend PercentTotal = round((HourlyCount / Total) * 100, 3)
| where PercentTotal > percentotalthreshold
| project DeviceVendor , AnomalyHour, TimeGeneratedMax, SourceIP, DestinationIPlist, DestinationPortlist, HourlyCount, PercentTotal, Total, baseline, score, anomalies
| summarize HourlyCount=sum(HourlyCount), StartTimeUtc=min(TimeGeneratedMax), EndTimeUtc=max(TimeGeneratedMax), SourceIPlist = make_set(SourceIP, 100), SourceIPMax= arg_max(SourceIP, *), DestinationIPlist = make_set(DestinationIPlist, 100), DestinationPortlist = make_set(DestinationPortlist, 100) by DeviceVendor , AnomalyHour, Total, baseline, score, anomalies
| project DeviceVendor , AnomalyHour, EndTimeUtc, SourceIPMax ,SourceIPlist, DestinationIPlist, DestinationPortlist, HourlyCount, Total, baseline, score, anomalies
version: 1.0.4
severity: Medium
queryFrequency: 1d
metadata:
categories:
domains:
- Security - Others
author:
name: Microsoft Security Research
support:
tier: Community
source:
kind: Community
triggerOperator: gt
relevantTechniques:
- T1030
OriginalUri: https://github.com/Azure/Azure-Sentinel/blob/master/Detections/CommonSecurityLog/TimeSeriesAnomaly-MultiVendor_NetworkTraffic.yaml
kind: Scheduled
triggerThreshold: 3
query: |
let starttime = 14d;
let endtime = 1d;
let timeframe = 1h;
let scorethreshold = 5;
let percentotalthreshold = 50;
let TimeSeriesData = CommonSecurityLog
| where isnotempty(DestinationIP) and isnotempty(SourceIP)
| where TimeGenerated between (startofday(ago(starttime))..startofday(ago(endtime)))
| project TimeGenerated,SourceIP, DestinationIP, DeviceVendor
| make-series Total=count() on TimeGenerated from startofday(ago(starttime)) to startofday(ago(endtime)) step timeframe by DeviceVendor;
// Filtering specific records associated with spikes as outliers
let TimeSeriesAlerts=materialize(TimeSeriesData
| extend (anomalies, score, baseline) = series_decompose_anomalies(Total, scorethreshold, -1, 'linefit')
| mv-expand Total to typeof(double), TimeGenerated to typeof(datetime), anomalies to typeof(double),score to typeof(double), baseline to typeof(long)
| where anomalies > 0 | extend score = round(score,2), AnomalyHour = TimeGenerated
| project DeviceVendor,AnomalyHour, TimeGenerated, Total, baseline, anomalies, score);
let AnomalyHours = materialize(TimeSeriesAlerts | where TimeGenerated > ago(2d) | project TimeGenerated);
// Join anomalies with Base Data to popalate associated records for investigation - Results sorted by score in descending order
TimeSeriesAlerts
| where TimeGenerated > ago(2d)
| join (
CommonSecurityLog
| where isnotempty(DestinationIP) and isnotempty(SourceIP)
| where TimeGenerated > ago(2d)
| extend DateHour = bin(TimeGenerated, 1h) // create a new column and round to hour
| where DateHour in ((AnomalyHours)) //filter the dataset to only selected anomaly hours
| summarize HourlyCount = count(), TimeGeneratedMax = arg_max(TimeGenerated, *), DestinationIPlist = make_set(DestinationIP, 100), DestinationPortlist = make_set(DestinationPort, 100) by DeviceVendor, SourceIP, TimeGeneratedHour= bin(TimeGenerated, 1h)
| extend AnomalyHour = TimeGeneratedHour
) on AnomalyHour, DeviceVendor
| extend PercentTotal = round((HourlyCount / Total) * 100, 3)
| where PercentTotal > percentotalthreshold
| project DeviceVendor , AnomalyHour, TimeGeneratedMax, SourceIP, DestinationIPlist, DestinationPortlist, HourlyCount, PercentTotal, Total, baseline, score, anomalies
| summarize HourlyCount=sum(HourlyCount), StartTimeUtc=min(TimeGeneratedMax), EndTimeUtc=max(TimeGeneratedMax), SourceIPlist = make_set(SourceIP, 100), SourceIPMax= arg_max(SourceIP, *), DestinationIPlist = make_set(DestinationIPlist, 100), DestinationPortlist = make_set(DestinationPortlist, 100) by DeviceVendor , AnomalyHour, Total, baseline, score, anomalies
| project DeviceVendor , AnomalyHour, EndTimeUtc, SourceIPMax ,SourceIPlist, DestinationIPlist, DestinationPortlist, HourlyCount, Total, baseline, score, anomalies
entityMappings:
- fieldMappings:
- columnName: SourceIPMax
identifier: Address
entityType: IP
name: Time series anomaly detection for total volume of traffic
queryPeriod: 14d
description: |
'Identifies anamalous spikes in network traffic logs as compared to baseline or normal historical patterns.
The query leverages a KQL built-in anomaly detection algorithm to find large deviations from baseline patterns.
Sudden increases in network traffic volume may be an indication of data exfiltration attempts and should be investigated.
The higher the score, the further it is from the baseline value.
The output is aggregated to provide summary view of unique source IP to destination IP address and port traffic observed in the flagged anomaly hour.
The source IP addresses which were sending less than percentotalthreshold of the total traffic have been exluded whose value can be adjusted as needed .
You may have to run queries for individual source IP addresses from SourceIPlist to determine if anything looks suspicious'
requiredDataConnectors:
- dataTypes:
- CommonSecurityLog
connectorId: Barracuda
- dataTypes:
- CommonSecurityLog
connectorId: CEF
- dataTypes:
- CommonSecurityLog
connectorId: CheckPoint
- dataTypes:
- CommonSecurityLog
connectorId: CiscoASA
- dataTypes:
- CommonSecurityLog
connectorId: F5
- dataTypes:
- CommonSecurityLog
connectorId: Fortinet
- dataTypes:
- CommonSecurityLog
connectorId: PaloAltoNetworks
id: 06a9b845-6a95-4432-a78b-83919b28c375
tactics:
- Exfiltration
{
"$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"workspace": {
"type": "String"
}
},
"resources": [
{
"apiVersion": "2024-01-01-preview",
"id": "[concat(resourceId('Microsoft.OperationalInsights/workspaces/providers', parameters('workspace'), 'Microsoft.SecurityInsights'),'/alertRules/06a9b845-6a95-4432-a78b-83919b28c375')]",
"kind": "Scheduled",
"name": "[concat(parameters('workspace'),'/Microsoft.SecurityInsights/06a9b845-6a95-4432-a78b-83919b28c375')]",
"properties": {
"alertRuleTemplateName": "06a9b845-6a95-4432-a78b-83919b28c375",
"customDetails": null,
"description": "'Identifies anamalous spikes in network traffic logs as compared to baseline or normal historical patterns.\nThe query leverages a KQL built-in anomaly detection algorithm to find large deviations from baseline patterns.\nSudden increases in network traffic volume may be an indication of data exfiltration attempts and should be investigated.\nThe higher the score, the further it is from the baseline value.\nThe output is aggregated to provide summary view of unique source IP to destination IP address and port traffic observed in the flagged anomaly hour.\nThe source IP addresses which were sending less than percentotalthreshold of the total traffic have been exluded whose value can be adjusted as needed .\nYou may have to run queries for individual source IP addresses from SourceIPlist to determine if anything looks suspicious'\n",
"displayName": "Time series anomaly detection for total volume of traffic",
"enabled": true,
"entityMappings": [
{
"entityType": "IP",
"fieldMappings": [
{
"columnName": "SourceIPMax",
"identifier": "Address"
}
]
}
],
"OriginalUri": "https://github.com/Azure/Azure-Sentinel/blob/master/Detections/CommonSecurityLog/TimeSeriesAnomaly-MultiVendor_NetworkTraffic.yaml",
"query": "let starttime = 14d;\nlet endtime = 1d;\nlet timeframe = 1h;\nlet scorethreshold = 5;\nlet percentotalthreshold = 50;\nlet TimeSeriesData = CommonSecurityLog\n| where isnotempty(DestinationIP) and isnotempty(SourceIP)\n| where TimeGenerated between (startofday(ago(starttime))..startofday(ago(endtime)))\n| project TimeGenerated,SourceIP, DestinationIP, DeviceVendor\n| make-series Total=count() on TimeGenerated from startofday(ago(starttime)) to startofday(ago(endtime)) step timeframe by DeviceVendor;\n// Filtering specific records associated with spikes as outliers\nlet TimeSeriesAlerts=materialize(TimeSeriesData\n| extend (anomalies, score, baseline) = series_decompose_anomalies(Total, scorethreshold, -1, 'linefit')\n| mv-expand Total to typeof(double), TimeGenerated to typeof(datetime), anomalies to typeof(double),score to typeof(double), baseline to typeof(long)\n| where anomalies > 0 | extend score = round(score,2), AnomalyHour = TimeGenerated\n| project DeviceVendor,AnomalyHour, TimeGenerated, Total, baseline, anomalies, score);\nlet AnomalyHours = materialize(TimeSeriesAlerts | where TimeGenerated > ago(2d) | project TimeGenerated);\n// Join anomalies with Base Data to popalate associated records for investigation - Results sorted by score in descending order\nTimeSeriesAlerts\n| where TimeGenerated > ago(2d)\n| join (\n CommonSecurityLog\n| where isnotempty(DestinationIP) and isnotempty(SourceIP)\n| where TimeGenerated > ago(2d)\n| extend DateHour = bin(TimeGenerated, 1h) // create a new column and round to hour\n| where DateHour in ((AnomalyHours)) //filter the dataset to only selected anomaly hours\n| summarize HourlyCount = count(), TimeGeneratedMax = arg_max(TimeGenerated, *), DestinationIPlist = make_set(DestinationIP, 100), DestinationPortlist = make_set(DestinationPort, 100) by DeviceVendor, SourceIP, TimeGeneratedHour= bin(TimeGenerated, 1h)\n| extend AnomalyHour = TimeGeneratedHour\n) on AnomalyHour, DeviceVendor\n| extend PercentTotal = round((HourlyCount / Total) * 100, 3)\n| where PercentTotal > percentotalthreshold\n| project DeviceVendor , AnomalyHour, TimeGeneratedMax, SourceIP, DestinationIPlist, DestinationPortlist, HourlyCount, PercentTotal, Total, baseline, score, anomalies\n| summarize HourlyCount=sum(HourlyCount), StartTimeUtc=min(TimeGeneratedMax), EndTimeUtc=max(TimeGeneratedMax), SourceIPlist = make_set(SourceIP, 100), SourceIPMax= arg_max(SourceIP, *), DestinationIPlist = make_set(DestinationIPlist, 100), DestinationPortlist = make_set(DestinationPortlist, 100) by DeviceVendor , AnomalyHour, Total, baseline, score, anomalies\n| project DeviceVendor , AnomalyHour, EndTimeUtc, SourceIPMax ,SourceIPlist, DestinationIPlist, DestinationPortlist, HourlyCount, Total, baseline, score, anomalies\n",
"queryFrequency": "P1D",
"queryPeriod": "P14D",
"severity": "Medium",
"subTechniques": [],
"suppressionDuration": "PT1H",
"suppressionEnabled": false,
"tactics": [
"Exfiltration"
],
"techniques": [
"T1030"
],
"templateVersion": "1.0.4",
"triggerOperator": "GreaterThan",
"triggerThreshold": 3
},
"type": "Microsoft.OperationalInsights/workspaces/providers/alertRules"
}
]
}