Possible contact with a domain generated by a DGA
Id | 4acd3a04-2fad-4efc-8a4b-51476594cec4 |
Rulename | Possible contact with a domain generated by a DGA |
Description | Identifies contacts with domains names in CommonSecurityLog that might have been generated by a Domain Generation Algorithm (DGA). DGAs can be used by malware to generate rendezvous points that are difficult to predict in advance. This detection uses the Alexa Top 1 million domain names to build a model of what normal domains look like. It uses this to identify domains that may have been randomly generated by an algorithm. The triThreshold is set to 500 - increase this to report on domains that are less likely to have been randomly generated, decrease it for more likely. The start time and end time look back over 6 hours of data and the dgaLengthThreshold is set to 8 - meaning domains whose length is 8 or more are reported. NOTE - The top1M csv zip file used in the query is dynamic and may produce different results over various time periods. It’s important to cross-check the events against the entities involved in the incident. |
Severity | Medium |
Tactics | CommandAndControl |
Techniques | T1568 |
Required data connectors | Barracuda CEF CheckPoint CiscoASA F5 Fortinet PaloAltoNetworks Zscaler |
Kind | Scheduled |
Query frequency | 6h |
Query period | 6h |
Trigger threshold | 0 |
Trigger operator | gt |
Source Uri | https://github.com/Azure/Azure-Sentinel/blob/master/Detections/CommonSecurityLog/MultiVendor-PossibleDGAContacts.yaml |
Version | 1.0.6 |
Arm template | 4acd3a04-2fad-4efc-8a4b-51476594cec4.json |
let triThreshold = 500;
let startTime = 6h;
let dgaLengthThreshold = 8;
// fetch the alexa top 1M domains
let top1M = (externaldata (Position:int, Domain:string) [@"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"] with (format="csv", zipPattern="*.csv"));
// extract tri grams that are above our threshold - i.e. are common
let triBaseline = top1M
| extend Domain = tolower(extract("([^.]*).{0,7}$", 1, Domain))
| extend AllTriGrams = array_concat(extract_all("(...)", Domain), extract_all("(...)", substring(Domain, 1)), extract_all("(...)", substring(Domain, 2)))
| mvexpand Trigram=AllTriGrams
| summarize triCount=count() by tostring(Trigram)
| sort by triCount desc
| where triCount > triThreshold
| distinct Trigram;
// collect domain information from common security log, filter and extract the DGA candidate and its trigrams
let allDataSummarized = CommonSecurityLog
| where TimeGenerated > ago(startTime)
| where isnotempty(DestinationHostName)
| extend Name = tolower(DestinationHostName)
| distinct Name
| where Name has "."
| where Name !endswith ".home" and Name !endswith ".lan"
// extract DGA candidate
| extend DGADomain = extract("([^.]*).{0,7}$", 1, Name)
| where strlen(DGADomain) > dgaLengthThreshold
// throw out domains with number in them
| where DGADomain matches regex "^[A-Za-z]{0,}$"
// extract the tri grams from summarized data
| extend AllTriGrams = array_concat(extract_all("(...)", DGADomain), extract_all("(...)", substring(DGADomain, 1)), extract_all("(...)", substring(DGADomain, 2)));
// throw out domains that have repeating tri's and/or >=3 repeating letters
let nonRepeatingTris = allDataSummarized
| join kind=leftanti
(
allDataSummarized
| mvexpand AllTriGrams
| summarize count() by tostring(AllTriGrams), DGADomain
| where count_ > 1
| distinct DGADomain
)
on DGADomain;
// find domains that do not have a common tri in the baseline
let dataWithRareTris = nonRepeatingTris
| join kind=leftanti
(
nonRepeatingTris
| mvexpand AllTriGrams
| extend Trigram = tostring(AllTriGrams)
| distinct Trigram, DGADomain
| join kind=inner
(
triBaseline
)
on Trigram
| distinct DGADomain
)
on DGADomain;
dataWithRareTris
// join DGAs back on connection data
| join kind=inner
(
CommonSecurityLog
| where TimeGenerated > ago(startTime)
| where isnotempty(DestinationHostName)
| extend DestinationHostName = tolower(DestinationHostName)
| project-rename Name=DestinationHostName, DataSource=DeviceVendor
| summarize StartTime=min(TimeGenerated), EndTime=max(TimeGenerated) by Name, SourceIP, DestinationIP, DataSource
)
on Name
| project StartTime, EndTime, Name, DGADomain, SourceIP, DestinationIP, DataSource
relevantTechniques:
- T1568
name: Possible contact with a domain generated by a DGA
requiredDataConnectors:
- dataTypes:
- CommonSecurityLog
connectorId: Zscaler
- dataTypes:
- CommonSecurityLog
connectorId: Barracuda
- dataTypes:
- CommonSecurityLog
connectorId: CEF
- dataTypes:
- CommonSecurityLog
connectorId: CheckPoint
- dataTypes:
- CommonSecurityLog
connectorId: CiscoASA
- dataTypes:
- CommonSecurityLog
connectorId: F5
- dataTypes:
- CommonSecurityLog
connectorId: Fortinet
- dataTypes:
- CommonSecurityLog
connectorId: PaloAltoNetworks
entityMappings:
- fieldMappings:
- identifier: Address
columnName: SourceIP
entityType: IP
- fieldMappings:
- identifier: DomainName
columnName: Name
entityType: DNS
triggerThreshold: 0
id: 4acd3a04-2fad-4efc-8a4b-51476594cec4
tactics:
- CommandAndControl
version: 1.0.6
OriginalUri: https://github.com/Azure/Azure-Sentinel/blob/master/Detections/CommonSecurityLog/MultiVendor-PossibleDGAContacts.yaml
queryPeriod: 6h
kind: Scheduled
metadata:
categories:
domains:
- Security - Others
author:
name: Microsoft Security Research
support:
tier: Community
source:
kind: Community
queryFrequency: 6h
severity: Medium
description: |
'Identifies contacts with domains names in CommonSecurityLog that might have been generated by a Domain Generation Algorithm (DGA). DGAs can be used by malware to generate rendezvous points that are difficult to predict in advance.
This detection uses the Alexa Top 1 million domain names to build a model of what normal domains look like. It uses this to identify domains that may have been randomly generated by an algorithm.
The triThreshold is set to 500 - increase this to report on domains that are less likely to have been randomly generated, decrease it for more likely.
The start time and end time look back over 6 hours of data and the dgaLengthThreshold is set to 8 - meaning domains whose length is 8 or more are reported.
NOTE - The top1M csv zip file used in the query is dynamic and may produce different results over various time periods. It's important to cross-check the events against the entities involved in the incident.'
query: |
let triThreshold = 500;
let startTime = 6h;
let dgaLengthThreshold = 8;
// fetch the alexa top 1M domains
let top1M = (externaldata (Position:int, Domain:string) [@"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"] with (format="csv", zipPattern="*.csv"));
// extract tri grams that are above our threshold - i.e. are common
let triBaseline = top1M
| extend Domain = tolower(extract("([^.]*).{0,7}$", 1, Domain))
| extend AllTriGrams = array_concat(extract_all("(...)", Domain), extract_all("(...)", substring(Domain, 1)), extract_all("(...)", substring(Domain, 2)))
| mvexpand Trigram=AllTriGrams
| summarize triCount=count() by tostring(Trigram)
| sort by triCount desc
| where triCount > triThreshold
| distinct Trigram;
// collect domain information from common security log, filter and extract the DGA candidate and its trigrams
let allDataSummarized = CommonSecurityLog
| where TimeGenerated > ago(startTime)
| where isnotempty(DestinationHostName)
| extend Name = tolower(DestinationHostName)
| distinct Name
| where Name has "."
| where Name !endswith ".home" and Name !endswith ".lan"
// extract DGA candidate
| extend DGADomain = extract("([^.]*).{0,7}$", 1, Name)
| where strlen(DGADomain) > dgaLengthThreshold
// throw out domains with number in them
| where DGADomain matches regex "^[A-Za-z]{0,}$"
// extract the tri grams from summarized data
| extend AllTriGrams = array_concat(extract_all("(...)", DGADomain), extract_all("(...)", substring(DGADomain, 1)), extract_all("(...)", substring(DGADomain, 2)));
// throw out domains that have repeating tri's and/or >=3 repeating letters
let nonRepeatingTris = allDataSummarized
| join kind=leftanti
(
allDataSummarized
| mvexpand AllTriGrams
| summarize count() by tostring(AllTriGrams), DGADomain
| where count_ > 1
| distinct DGADomain
)
on DGADomain;
// find domains that do not have a common tri in the baseline
let dataWithRareTris = nonRepeatingTris
| join kind=leftanti
(
nonRepeatingTris
| mvexpand AllTriGrams
| extend Trigram = tostring(AllTriGrams)
| distinct Trigram, DGADomain
| join kind=inner
(
triBaseline
)
on Trigram
| distinct DGADomain
)
on DGADomain;
dataWithRareTris
// join DGAs back on connection data
| join kind=inner
(
CommonSecurityLog
| where TimeGenerated > ago(startTime)
| where isnotempty(DestinationHostName)
| extend DestinationHostName = tolower(DestinationHostName)
| project-rename Name=DestinationHostName, DataSource=DeviceVendor
| summarize StartTime=min(TimeGenerated), EndTime=max(TimeGenerated) by Name, SourceIP, DestinationIP, DataSource
)
on Name
| project StartTime, EndTime, Name, DGADomain, SourceIP, DestinationIP, DataSource
triggerOperator: gt
{
"$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
"contentVersion": "1.0.0.0",
"parameters": {
"workspace": {
"type": "String"
}
},
"resources": [
{
"apiVersion": "2024-01-01-preview",
"id": "[concat(resourceId('Microsoft.OperationalInsights/workspaces/providers', parameters('workspace'), 'Microsoft.SecurityInsights'),'/alertRules/4acd3a04-2fad-4efc-8a4b-51476594cec4')]",
"kind": "Scheduled",
"name": "[concat(parameters('workspace'),'/Microsoft.SecurityInsights/4acd3a04-2fad-4efc-8a4b-51476594cec4')]",
"properties": {
"alertRuleTemplateName": "4acd3a04-2fad-4efc-8a4b-51476594cec4",
"customDetails": null,
"description": "'Identifies contacts with domains names in CommonSecurityLog that might have been generated by a Domain Generation Algorithm (DGA). DGAs can be used by malware to generate rendezvous points that are difficult to predict in advance.\nThis detection uses the Alexa Top 1 million domain names to build a model of what normal domains look like. It uses this to identify domains that may have been randomly generated by an algorithm.\nThe triThreshold is set to 500 - increase this to report on domains that are less likely to have been randomly generated, decrease it for more likely.\nThe start time and end time look back over 6 hours of data and the dgaLengthThreshold is set to 8 - meaning domains whose length is 8 or more are reported.\nNOTE - The top1M csv zip file used in the query is dynamic and may produce different results over various time periods. It's important to cross-check the events against the entities involved in the incident.'\n",
"displayName": "Possible contact with a domain generated by a DGA",
"enabled": true,
"entityMappings": [
{
"entityType": "IP",
"fieldMappings": [
{
"columnName": "SourceIP",
"identifier": "Address"
}
]
},
{
"entityType": "DNS",
"fieldMappings": [
{
"columnName": "Name",
"identifier": "DomainName"
}
]
}
],
"OriginalUri": "https://github.com/Azure/Azure-Sentinel/blob/master/Detections/CommonSecurityLog/MultiVendor-PossibleDGAContacts.yaml",
"query": "let triThreshold = 500;\nlet startTime = 6h;\nlet dgaLengthThreshold = 8;\n// fetch the alexa top 1M domains\nlet top1M = (externaldata (Position:int, Domain:string) [@\"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip\"] with (format=\"csv\", zipPattern=\"*.csv\"));\n// extract tri grams that are above our threshold - i.e. are common\nlet triBaseline = top1M\n| extend Domain = tolower(extract(\"([^.]*).{0,7}$\", 1, Domain))\n| extend AllTriGrams = array_concat(extract_all(\"(...)\", Domain), extract_all(\"(...)\", substring(Domain, 1)), extract_all(\"(...)\", substring(Domain, 2)))\n| mvexpand Trigram=AllTriGrams\n| summarize triCount=count() by tostring(Trigram)\n| sort by triCount desc\n| where triCount > triThreshold\n| distinct Trigram;\n// collect domain information from common security log, filter and extract the DGA candidate and its trigrams\nlet allDataSummarized = CommonSecurityLog\n| where TimeGenerated > ago(startTime)\n| where isnotempty(DestinationHostName)\n| extend Name = tolower(DestinationHostName)\n| distinct Name\n| where Name has \".\"\n| where Name !endswith \".home\" and Name !endswith \".lan\"\n// extract DGA candidate\n| extend DGADomain = extract(\"([^.]*).{0,7}$\", 1, Name)\n| where strlen(DGADomain) > dgaLengthThreshold\n// throw out domains with number in them\n| where DGADomain matches regex \"^[A-Za-z]{0,}$\"\n// extract the tri grams from summarized data\n| extend AllTriGrams = array_concat(extract_all(\"(...)\", DGADomain), extract_all(\"(...)\", substring(DGADomain, 1)), extract_all(\"(...)\", substring(DGADomain, 2)));\n// throw out domains that have repeating tri's and/or >=3 repeating letters\nlet nonRepeatingTris = allDataSummarized\n| join kind=leftanti\n(\n allDataSummarized\n | mvexpand AllTriGrams\n | summarize count() by tostring(AllTriGrams), DGADomain\n | where count_ > 1\n | distinct DGADomain\n)\non DGADomain;\n// find domains that do not have a common tri in the baseline\nlet dataWithRareTris = nonRepeatingTris\n| join kind=leftanti\n(\n nonRepeatingTris\n | mvexpand AllTriGrams\n | extend Trigram = tostring(AllTriGrams)\n | distinct Trigram, DGADomain\n | join kind=inner\n (\n triBaseline\n )\n on Trigram\n | distinct DGADomain\n)\non DGADomain;\ndataWithRareTris\n// join DGAs back on connection data\n| join kind=inner\n(\n CommonSecurityLog\n | where TimeGenerated > ago(startTime)\n | where isnotempty(DestinationHostName)\n | extend DestinationHostName = tolower(DestinationHostName)\n | project-rename Name=DestinationHostName, DataSource=DeviceVendor\n | summarize StartTime=min(TimeGenerated), EndTime=max(TimeGenerated) by Name, SourceIP, DestinationIP, DataSource\n)\non Name\n| project StartTime, EndTime, Name, DGADomain, SourceIP, DestinationIP, DataSource\n",
"queryFrequency": "PT6H",
"queryPeriod": "PT6H",
"severity": "Medium",
"subTechniques": [],
"suppressionDuration": "PT1H",
"suppressionEnabled": false,
"tactics": [
"CommandAndControl"
],
"techniques": [
"T1568"
],
"templateVersion": "1.0.6",
"triggerOperator": "GreaterThan",
"triggerThreshold": 0
},
"type": "Microsoft.OperationalInsights/workspaces/providers/alertRules"
}
]
}