Microsoft Sentinel Analytic Rules
cloudbrothers.infoAzure Sentinel RepoToggle Dark/Light/Auto modeToggle Dark/Light/Auto modeToggle Dark/Light/Auto modeBack to homepage

Possible contact with a domain generated by a DGA

Back
Id4acd3a04-2fad-4efc-8a4b-51476594cec4
RulenamePossible contact with a domain generated by a DGA
DescriptionIdentifies contacts with domains names in CommonSecurityLog that might have been generated by a Domain Generation Algorithm (DGA). DGAs can be used by malware to generate rendezvous points that are difficult to predict in advance.

This detection uses the Alexa Top 1 million domain names to build a model of what normal domains look like. It uses this to identify domains that may have been randomly generated by an algorithm.

The triThreshold is set to 500 - increase this to report on domains that are less likely to have been randomly generated, decrease it for more likely.

The start time and end time look back over 6 hours of data and the dgaLengthThreshold is set to 8 - meaning domains whose length is 8 or more are reported.

NOTE - The top1M csv zip file used in the query is dynamic and may produce different results over various time periods. It’s important to cross-check the events against the entities involved in the incident.
SeverityMedium
TacticsCommandAndControl
TechniquesT1568
Required data connectorsBarracuda
CEF
CheckPoint
CiscoASA
F5
Fortinet
PaloAltoNetworks
Zscaler
KindScheduled
Query frequency6h
Query period6h
Trigger threshold0
Trigger operatorgt
Source Urihttps://github.com/Azure/Azure-Sentinel/blob/master/Detections/CommonSecurityLog/MultiVendor-PossibleDGAContacts.yaml
Version1.0.6
Arm template4acd3a04-2fad-4efc-8a4b-51476594cec4.json
Deploy To Azure
let triThreshold = 500;
let startTime = 6h;
let dgaLengthThreshold = 8;
// fetch the alexa top 1M domains
let top1M =  (externaldata (Position:int, Domain:string)   [@"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"]  with (format="csv", zipPattern="*.csv"));
// extract tri grams that are above our threshold - i.e. are common
let triBaseline =   top1M
| extend Domain = tolower(extract("([^.]*).{0,7}$", 1, Domain))
| extend AllTriGrams = array_concat(extract_all("(...)", Domain), extract_all("(...)", substring(Domain, 1)), extract_all("(...)", substring(Domain, 2)))
| mvexpand Trigram=AllTriGrams
| summarize triCount=count() by tostring(Trigram)
| sort by triCount desc
| where triCount > triThreshold
| distinct Trigram;
// collect domain information from common security log, filter and extract the DGA candidate and its trigrams
let allDataSummarized =   CommonSecurityLog
| where TimeGenerated > ago(startTime)
| where isnotempty(DestinationHostName)
| extend Name = tolower(DestinationHostName)
| distinct Name
| where Name has "."
| where Name !endswith ".home" and Name !endswith ".lan"
// extract DGA candidate
| extend DGADomain = extract("([^.]*).{0,7}$", 1, Name)
| where strlen(DGADomain) > dgaLengthThreshold
// throw out domains with number in them
| where DGADomain matches regex "^[A-Za-z]{0,}$"
// extract the tri grams from summarized data
| extend AllTriGrams = array_concat(extract_all("(...)", DGADomain), extract_all("(...)", substring(DGADomain, 1)), extract_all("(...)", substring(DGADomain, 2)));
// throw out domains that have repeating tri's and/or >=3 repeating letters
let nonRepeatingTris =  allDataSummarized
| join kind=leftanti
(
    allDataSummarized
    | mvexpand AllTriGrams
    | summarize count() by tostring(AllTriGrams), DGADomain
    | where count_ > 1
    | distinct DGADomain
)
on DGADomain;
// find domains that do not have a common tri in the baseline
let dataWithRareTris =  nonRepeatingTris
| join kind=leftanti
(
    nonRepeatingTris
    | mvexpand AllTriGrams
    | extend Trigram = tostring(AllTriGrams)
    | distinct Trigram, DGADomain
    | join kind=inner
    (
        triBaseline
    )
    on Trigram
    | distinct DGADomain
)
on DGADomain;
dataWithRareTris
// join DGAs back on connection data
| join kind=inner
(
    CommonSecurityLog
    | where TimeGenerated > ago(startTime)
    | where isnotempty(DestinationHostName)
    | extend DestinationHostName = tolower(DestinationHostName)
    | project-rename Name=DestinationHostName, DataSource=DeviceVendor
    | summarize StartTime=min(TimeGenerated), EndTime=max(TimeGenerated) by Name, SourceIP, DestinationIP, DataSource
)
on Name
| project StartTime, EndTime, Name, DGADomain, SourceIP, DestinationIP, DataSource
relevantTechniques:
- T1568
name: Possible contact with a domain generated by a DGA
requiredDataConnectors:
- dataTypes:
  - CommonSecurityLog
  connectorId: Zscaler
- dataTypes:
  - CommonSecurityLog
  connectorId: Barracuda
- dataTypes:
  - CommonSecurityLog
  connectorId: CEF
- dataTypes:
  - CommonSecurityLog
  connectorId: CheckPoint
- dataTypes:
  - CommonSecurityLog
  connectorId: CiscoASA
- dataTypes:
  - CommonSecurityLog
  connectorId: F5
- dataTypes:
  - CommonSecurityLog
  connectorId: Fortinet
- dataTypes:
  - CommonSecurityLog
  connectorId: PaloAltoNetworks
entityMappings:
- fieldMappings:
  - identifier: Address
    columnName: SourceIP
  entityType: IP
- fieldMappings:
  - identifier: DomainName
    columnName: Name
  entityType: DNS
triggerThreshold: 0
id: 4acd3a04-2fad-4efc-8a4b-51476594cec4
tactics:
- CommandAndControl
version: 1.0.6
OriginalUri: https://github.com/Azure/Azure-Sentinel/blob/master/Detections/CommonSecurityLog/MultiVendor-PossibleDGAContacts.yaml
queryPeriod: 6h
kind: Scheduled
metadata:
  categories:
    domains:
    - Security - Others
  author:
    name: Microsoft Security Research
  support:
    tier: Community
  source:
    kind: Community
queryFrequency: 6h
severity: Medium
description: |
  'Identifies contacts with domains names in CommonSecurityLog that might have been generated by a Domain Generation Algorithm (DGA). DGAs can be used by malware to generate rendezvous points that are difficult to predict in advance.
  This detection uses the Alexa Top 1 million domain names to build a model of what normal domains look like. It uses this to identify domains that may have been randomly generated by an algorithm.
  The triThreshold is set to 500 - increase this to report on domains that are less likely to have been randomly generated, decrease it for more likely.
  The start time and end time look back over 6 hours of data and the dgaLengthThreshold is set to 8 - meaning domains whose length is 8 or more are reported.
  NOTE - The top1M csv zip file used in the query is dynamic and may produce different results over various time periods. It's important to cross-check the events against the entities involved in the incident.'  
query: |
  let triThreshold = 500;
  let startTime = 6h;
  let dgaLengthThreshold = 8;
  // fetch the alexa top 1M domains
  let top1M =  (externaldata (Position:int, Domain:string)   [@"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"]  with (format="csv", zipPattern="*.csv"));
  // extract tri grams that are above our threshold - i.e. are common
  let triBaseline =   top1M
  | extend Domain = tolower(extract("([^.]*).{0,7}$", 1, Domain))
  | extend AllTriGrams = array_concat(extract_all("(...)", Domain), extract_all("(...)", substring(Domain, 1)), extract_all("(...)", substring(Domain, 2)))
  | mvexpand Trigram=AllTriGrams
  | summarize triCount=count() by tostring(Trigram)
  | sort by triCount desc
  | where triCount > triThreshold
  | distinct Trigram;
  // collect domain information from common security log, filter and extract the DGA candidate and its trigrams
  let allDataSummarized =   CommonSecurityLog
  | where TimeGenerated > ago(startTime)
  | where isnotempty(DestinationHostName)
  | extend Name = tolower(DestinationHostName)
  | distinct Name
  | where Name has "."
  | where Name !endswith ".home" and Name !endswith ".lan"
  // extract DGA candidate
  | extend DGADomain = extract("([^.]*).{0,7}$", 1, Name)
  | where strlen(DGADomain) > dgaLengthThreshold
  // throw out domains with number in them
  | where DGADomain matches regex "^[A-Za-z]{0,}$"
  // extract the tri grams from summarized data
  | extend AllTriGrams = array_concat(extract_all("(...)", DGADomain), extract_all("(...)", substring(DGADomain, 1)), extract_all("(...)", substring(DGADomain, 2)));
  // throw out domains that have repeating tri's and/or >=3 repeating letters
  let nonRepeatingTris =  allDataSummarized
  | join kind=leftanti
  (
      allDataSummarized
      | mvexpand AllTriGrams
      | summarize count() by tostring(AllTriGrams), DGADomain
      | where count_ > 1
      | distinct DGADomain
  )
  on DGADomain;
  // find domains that do not have a common tri in the baseline
  let dataWithRareTris =  nonRepeatingTris
  | join kind=leftanti
  (
      nonRepeatingTris
      | mvexpand AllTriGrams
      | extend Trigram = tostring(AllTriGrams)
      | distinct Trigram, DGADomain
      | join kind=inner
      (
          triBaseline
      )
      on Trigram
      | distinct DGADomain
  )
  on DGADomain;
  dataWithRareTris
  // join DGAs back on connection data
  | join kind=inner
  (
      CommonSecurityLog
      | where TimeGenerated > ago(startTime)
      | where isnotempty(DestinationHostName)
      | extend DestinationHostName = tolower(DestinationHostName)
      | project-rename Name=DestinationHostName, DataSource=DeviceVendor
      | summarize StartTime=min(TimeGenerated), EndTime=max(TimeGenerated) by Name, SourceIP, DestinationIP, DataSource
  )
  on Name
  | project StartTime, EndTime, Name, DGADomain, SourceIP, DestinationIP, DataSource  
triggerOperator: gt
{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "workspace": {
      "type": "String"
    }
  },
  "resources": [
    {
      "apiVersion": "2024-01-01-preview",
      "id": "[concat(resourceId('Microsoft.OperationalInsights/workspaces/providers', parameters('workspace'), 'Microsoft.SecurityInsights'),'/alertRules/4acd3a04-2fad-4efc-8a4b-51476594cec4')]",
      "kind": "Scheduled",
      "name": "[concat(parameters('workspace'),'/Microsoft.SecurityInsights/4acd3a04-2fad-4efc-8a4b-51476594cec4')]",
      "properties": {
        "alertRuleTemplateName": "4acd3a04-2fad-4efc-8a4b-51476594cec4",
        "customDetails": null,
        "description": "'Identifies contacts with domains names in CommonSecurityLog that might have been generated by a Domain Generation Algorithm (DGA). DGAs can be used by malware to generate rendezvous points that are difficult to predict in advance.\nThis detection uses the Alexa Top 1 million domain names to build a model of what normal domains look like. It uses this to identify domains that may have been randomly generated by an algorithm.\nThe triThreshold is set to 500 - increase this to report on domains that are less likely to have been randomly generated, decrease it for more likely.\nThe start time and end time look back over 6 hours of data and the dgaLengthThreshold is set to 8 - meaning domains whose length is 8 or more are reported.\nNOTE - The top1M csv zip file used in the query is dynamic and may produce different results over various time periods. It's important to cross-check the events against the entities involved in the incident.'\n",
        "displayName": "Possible contact with a domain generated by a DGA",
        "enabled": true,
        "entityMappings": [
          {
            "entityType": "IP",
            "fieldMappings": [
              {
                "columnName": "SourceIP",
                "identifier": "Address"
              }
            ]
          },
          {
            "entityType": "DNS",
            "fieldMappings": [
              {
                "columnName": "Name",
                "identifier": "DomainName"
              }
            ]
          }
        ],
        "OriginalUri": "https://github.com/Azure/Azure-Sentinel/blob/master/Detections/CommonSecurityLog/MultiVendor-PossibleDGAContacts.yaml",
        "query": "let triThreshold = 500;\nlet startTime = 6h;\nlet dgaLengthThreshold = 8;\n// fetch the alexa top 1M domains\nlet top1M =  (externaldata (Position:int, Domain:string)   [@\"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip\"]  with (format=\"csv\", zipPattern=\"*.csv\"));\n// extract tri grams that are above our threshold - i.e. are common\nlet triBaseline =   top1M\n| extend Domain = tolower(extract(\"([^.]*).{0,7}$\", 1, Domain))\n| extend AllTriGrams = array_concat(extract_all(\"(...)\", Domain), extract_all(\"(...)\", substring(Domain, 1)), extract_all(\"(...)\", substring(Domain, 2)))\n| mvexpand Trigram=AllTriGrams\n| summarize triCount=count() by tostring(Trigram)\n| sort by triCount desc\n| where triCount > triThreshold\n| distinct Trigram;\n// collect domain information from common security log, filter and extract the DGA candidate and its trigrams\nlet allDataSummarized =   CommonSecurityLog\n| where TimeGenerated > ago(startTime)\n| where isnotempty(DestinationHostName)\n| extend Name = tolower(DestinationHostName)\n| distinct Name\n| where Name has \".\"\n| where Name !endswith \".home\" and Name !endswith \".lan\"\n// extract DGA candidate\n| extend DGADomain = extract(\"([^.]*).{0,7}$\", 1, Name)\n| where strlen(DGADomain) > dgaLengthThreshold\n// throw out domains with number in them\n| where DGADomain matches regex \"^[A-Za-z]{0,}$\"\n// extract the tri grams from summarized data\n| extend AllTriGrams = array_concat(extract_all(\"(...)\", DGADomain), extract_all(\"(...)\", substring(DGADomain, 1)), extract_all(\"(...)\", substring(DGADomain, 2)));\n// throw out domains that have repeating tri's and/or >=3 repeating letters\nlet nonRepeatingTris =  allDataSummarized\n| join kind=leftanti\n(\n    allDataSummarized\n    | mvexpand AllTriGrams\n    | summarize count() by tostring(AllTriGrams), DGADomain\n    | where count_ > 1\n    | distinct DGADomain\n)\non DGADomain;\n// find domains that do not have a common tri in the baseline\nlet dataWithRareTris =  nonRepeatingTris\n| join kind=leftanti\n(\n    nonRepeatingTris\n    | mvexpand AllTriGrams\n    | extend Trigram = tostring(AllTriGrams)\n    | distinct Trigram, DGADomain\n    | join kind=inner\n    (\n        triBaseline\n    )\n    on Trigram\n    | distinct DGADomain\n)\non DGADomain;\ndataWithRareTris\n// join DGAs back on connection data\n| join kind=inner\n(\n    CommonSecurityLog\n    | where TimeGenerated > ago(startTime)\n    | where isnotempty(DestinationHostName)\n    | extend DestinationHostName = tolower(DestinationHostName)\n    | project-rename Name=DestinationHostName, DataSource=DeviceVendor\n    | summarize StartTime=min(TimeGenerated), EndTime=max(TimeGenerated) by Name, SourceIP, DestinationIP, DataSource\n)\non Name\n| project StartTime, EndTime, Name, DGADomain, SourceIP, DestinationIP, DataSource\n",
        "queryFrequency": "PT6H",
        "queryPeriod": "PT6H",
        "severity": "Medium",
        "subTechniques": [],
        "suppressionDuration": "PT1H",
        "suppressionEnabled": false,
        "tactics": [
          "CommandAndControl"
        ],
        "techniques": [
          "T1568"
        ],
        "templateVersion": "1.0.6",
        "triggerOperator": "GreaterThan",
        "triggerThreshold": 0
      },
      "type": "Microsoft.OperationalInsights/workspaces/providers/alertRules"
    }
  ]
}