Possible contact with a domain generated by a DGA
| Id | 4acd3a04-2fad-4efc-8a4b-51476594cec4 |
| Rulename | Possible contact with a domain generated by a DGA |
| Description | Identifies contacts with domains names in CommonSecurityLog that might have been generated by a Domain Generation Algorithm (DGA). DGAs can be used by malware to generate rendezvous points that are difficult to predict in advance. This detection uses the Alexa Top 1 million domain names to build a model of what normal domains look like. It uses this to identify domains that may have been randomly generated by an algorithm. The triThreshold is set to 500 - increase this to report on domains that are less likely to have been randomly generated, decrease it for more likely. The start time and end time look back over 6 hours of data and the dgaLengthThreshold is set to 8 - meaning domains whose length is 8 or more are reported. NOTE - The top1M csv zip file used in the query is dynamic and may produce different results over various time periods. It’s important to cross-check the events against the entities involved in the incident. |
| Severity | Medium |
| Tactics | CommandAndControl |
| Techniques | T1568 |
| Required data connectors | Barracuda CEF CheckPoint CiscoASA F5 Fortinet PaloAltoNetworks Zscaler |
| Kind | Scheduled |
| Query frequency | 6h |
| Query period | 6h |
| Trigger threshold | 0 |
| Trigger operator | gt |
| Source Uri | https://github.com/Azure/Azure-Sentinel/blob/master/Detections/CommonSecurityLog/MultiVendor-PossibleDGAContacts.yaml |
| Version | 1.0.6 |
| Arm template | 4acd3a04-2fad-4efc-8a4b-51476594cec4.json |
let triThreshold = 500;
let startTime = 6h;
let dgaLengthThreshold = 8;
// fetch the alexa top 1M domains
let top1M = (externaldata (Position:int, Domain:string) [@"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"] with (format="csv", zipPattern="*.csv"));
// extract tri grams that are above our threshold - i.e. are common
let triBaseline = top1M
| extend Domain = tolower(extract("([^.]*).{0,7}$", 1, Domain))
| extend AllTriGrams = array_concat(extract_all("(...)", Domain), extract_all("(...)", substring(Domain, 1)), extract_all("(...)", substring(Domain, 2)))
| mvexpand Trigram=AllTriGrams
| summarize triCount=count() by tostring(Trigram)
| sort by triCount desc
| where triCount > triThreshold
| distinct Trigram;
// collect domain information from common security log, filter and extract the DGA candidate and its trigrams
let allDataSummarized = CommonSecurityLog
| where TimeGenerated > ago(startTime)
| where isnotempty(DestinationHostName)
| extend Name = tolower(DestinationHostName)
| distinct Name
| where Name has "."
| where Name !endswith ".home" and Name !endswith ".lan"
// extract DGA candidate
| extend DGADomain = extract("([^.]*).{0,7}$", 1, Name)
| where strlen(DGADomain) > dgaLengthThreshold
// throw out domains with number in them
| where DGADomain matches regex "^[A-Za-z]{0,}$"
// extract the tri grams from summarized data
| extend AllTriGrams = array_concat(extract_all("(...)", DGADomain), extract_all("(...)", substring(DGADomain, 1)), extract_all("(...)", substring(DGADomain, 2)));
// throw out domains that have repeating tri's and/or >=3 repeating letters
let nonRepeatingTris = allDataSummarized
| join kind=leftanti
(
allDataSummarized
| mvexpand AllTriGrams
| summarize count() by tostring(AllTriGrams), DGADomain
| where count_ > 1
| distinct DGADomain
)
on DGADomain;
// find domains that do not have a common tri in the baseline
let dataWithRareTris = nonRepeatingTris
| join kind=leftanti
(
nonRepeatingTris
| mvexpand AllTriGrams
| extend Trigram = tostring(AllTriGrams)
| distinct Trigram, DGADomain
| join kind=inner
(
triBaseline
)
on Trigram
| distinct DGADomain
)
on DGADomain;
dataWithRareTris
// join DGAs back on connection data
| join kind=inner
(
CommonSecurityLog
| where TimeGenerated > ago(startTime)
| where isnotempty(DestinationHostName)
| extend DestinationHostName = tolower(DestinationHostName)
| project-rename Name=DestinationHostName, DataSource=DeviceVendor
| summarize StartTime=min(TimeGenerated), EndTime=max(TimeGenerated) by Name, SourceIP, DestinationIP, DataSource
)
on Name
| project StartTime, EndTime, Name, DGADomain, SourceIP, DestinationIP, DataSource
tactics:
- CommandAndControl
query: |
let triThreshold = 500;
let startTime = 6h;
let dgaLengthThreshold = 8;
// fetch the alexa top 1M domains
let top1M = (externaldata (Position:int, Domain:string) [@"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"] with (format="csv", zipPattern="*.csv"));
// extract tri grams that are above our threshold - i.e. are common
let triBaseline = top1M
| extend Domain = tolower(extract("([^.]*).{0,7}$", 1, Domain))
| extend AllTriGrams = array_concat(extract_all("(...)", Domain), extract_all("(...)", substring(Domain, 1)), extract_all("(...)", substring(Domain, 2)))
| mvexpand Trigram=AllTriGrams
| summarize triCount=count() by tostring(Trigram)
| sort by triCount desc
| where triCount > triThreshold
| distinct Trigram;
// collect domain information from common security log, filter and extract the DGA candidate and its trigrams
let allDataSummarized = CommonSecurityLog
| where TimeGenerated > ago(startTime)
| where isnotempty(DestinationHostName)
| extend Name = tolower(DestinationHostName)
| distinct Name
| where Name has "."
| where Name !endswith ".home" and Name !endswith ".lan"
// extract DGA candidate
| extend DGADomain = extract("([^.]*).{0,7}$", 1, Name)
| where strlen(DGADomain) > dgaLengthThreshold
// throw out domains with number in them
| where DGADomain matches regex "^[A-Za-z]{0,}$"
// extract the tri grams from summarized data
| extend AllTriGrams = array_concat(extract_all("(...)", DGADomain), extract_all("(...)", substring(DGADomain, 1)), extract_all("(...)", substring(DGADomain, 2)));
// throw out domains that have repeating tri's and/or >=3 repeating letters
let nonRepeatingTris = allDataSummarized
| join kind=leftanti
(
allDataSummarized
| mvexpand AllTriGrams
| summarize count() by tostring(AllTriGrams), DGADomain
| where count_ > 1
| distinct DGADomain
)
on DGADomain;
// find domains that do not have a common tri in the baseline
let dataWithRareTris = nonRepeatingTris
| join kind=leftanti
(
nonRepeatingTris
| mvexpand AllTriGrams
| extend Trigram = tostring(AllTriGrams)
| distinct Trigram, DGADomain
| join kind=inner
(
triBaseline
)
on Trigram
| distinct DGADomain
)
on DGADomain;
dataWithRareTris
// join DGAs back on connection data
| join kind=inner
(
CommonSecurityLog
| where TimeGenerated > ago(startTime)
| where isnotempty(DestinationHostName)
| extend DestinationHostName = tolower(DestinationHostName)
| project-rename Name=DestinationHostName, DataSource=DeviceVendor
| summarize StartTime=min(TimeGenerated), EndTime=max(TimeGenerated) by Name, SourceIP, DestinationIP, DataSource
)
on Name
| project StartTime, EndTime, Name, DGADomain, SourceIP, DestinationIP, DataSource
requiredDataConnectors:
- dataTypes:
- CommonSecurityLog
connectorId: Zscaler
- dataTypes:
- CommonSecurityLog
connectorId: Barracuda
- dataTypes:
- CommonSecurityLog
connectorId: CEF
- dataTypes:
- CommonSecurityLog
connectorId: CheckPoint
- dataTypes:
- CommonSecurityLog
connectorId: CiscoASA
- dataTypes:
- CommonSecurityLog
connectorId: F5
- dataTypes:
- CommonSecurityLog
connectorId: Fortinet
- dataTypes:
- CommonSecurityLog
connectorId: PaloAltoNetworks
name: Possible contact with a domain generated by a DGA
queryPeriod: 6h
metadata:
support:
tier: Community
categories:
domains:
- Security - Others
author:
name: Microsoft Security Research
source:
kind: Community
OriginalUri: https://github.com/Azure/Azure-Sentinel/blob/master/Detections/CommonSecurityLog/MultiVendor-PossibleDGAContacts.yaml
triggerThreshold: 0
description: |
'Identifies contacts with domains names in CommonSecurityLog that might have been generated by a Domain Generation Algorithm (DGA). DGAs can be used by malware to generate rendezvous points that are difficult to predict in advance.
This detection uses the Alexa Top 1 million domain names to build a model of what normal domains look like. It uses this to identify domains that may have been randomly generated by an algorithm.
The triThreshold is set to 500 - increase this to report on domains that are less likely to have been randomly generated, decrease it for more likely.
The start time and end time look back over 6 hours of data and the dgaLengthThreshold is set to 8 - meaning domains whose length is 8 or more are reported.
NOTE - The top1M csv zip file used in the query is dynamic and may produce different results over various time periods. It's important to cross-check the events against the entities involved in the incident.'
version: 1.0.6
kind: Scheduled
queryFrequency: 6h
severity: Medium
entityMappings:
- entityType: IP
fieldMappings:
- identifier: Address
columnName: SourceIP
- entityType: DNS
fieldMappings:
- identifier: DomainName
columnName: Name
triggerOperator: gt
id: 4acd3a04-2fad-4efc-8a4b-51476594cec4
relevantTechniques:
- T1568