Microsoft Sentinel Analytic Rules
cloudbrothers.infoAzure Sentinel RepoToggle Dark/Light/Auto modeToggle Dark/Light/Auto modeToggle Dark/Light/Auto modeBack to homepage

Possible contact with a domain generated by a DGA

Back
Id4acd3a04-2fad-4efc-8a4b-51476594cec4
RulenamePossible contact with a domain generated by a DGA
DescriptionIdentifies contacts with domains names in CommonSecurityLog that might have been generated by a Domain Generation Algorithm (DGA). DGAs can be used by malware to generate rendezvous points that are difficult to predict in advance.

This detection uses the Alexa Top 1 million domain names to build a model of what normal domains look like. It uses this to identify domains that may have been randomly generated by an algorithm.

The triThreshold is set to 500 - increase this to report on domains that are less likely to have been randomly generated, decrease it for more likely.

The start time and end time look back over 6 hours of data and the dgaLengthThreshold is set to 8 - meaning domains whose length is 8 or more are reported.
SeverityMedium
TacticsCommandAndControl
TechniquesT1568
Required data connectorsBarracuda
CEF
CheckPoint
CiscoASA
F5
Fortinet
PaloAltoNetworks
Zscaler
KindScheduled
Query frequency6h
Query period6h
Trigger threshold0
Trigger operatorgt
Source Urihttps://github.com/Azure/Azure-Sentinel/blob/master/Detections/CommonSecurityLog/MultiVendor-PossibleDGAContacts.yaml
Version1.0.5
Arm template4acd3a04-2fad-4efc-8a4b-51476594cec4.json
Deploy To Azure
let triThreshold = 500;
let startTime = 6h;
let dgaLengthThreshold = 8;
// fetch the alexa top 1M domains
let top1M =  (externaldata (Position:int, Domain:string)   [@"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"]  with (format="csv", zipPattern="*.csv"));
// extract tri grams that are above our threshold - i.e. are common
let triBaseline =   top1M
| extend Domain = tolower(extract("([^.]*).{0,7}$", 1, Domain))
| extend AllTriGrams = array_concat(extract_all("(...)", Domain), extract_all("(...)", substring(Domain, 1)), extract_all("(...)", substring(Domain, 2)))
| mvexpand Trigram=AllTriGrams
| summarize triCount=count() by tostring(Trigram)
| sort by triCount desc
| where triCount > triThreshold
| distinct Trigram;
// collect domain information from common security log, filter and extract the DGA candidate and its trigrams
let allDataSummarized =   CommonSecurityLog
| where TimeGenerated > ago(startTime)
| where isnotempty(DestinationHostName)
| extend Name = tolower(DestinationHostName)
| distinct Name
| where Name has "."
| where Name !endswith ".home" and Name !endswith ".lan"
// extract DGA candidate
| extend DGADomain = extract("([^.]*).{0,7}$", 1, Name)
| where strlen(DGADomain) > dgaLengthThreshold
// throw out domains with number in them
| where DGADomain matches regex "^[A-Za-z]{0,}$"
// extract the tri grams from summarized data
| extend AllTriGrams = array_concat(extract_all("(...)", DGADomain), extract_all("(...)", substring(DGADomain, 1)), extract_all("(...)", substring(DGADomain, 2)));
// throw out domains that have repeating tri's and/or >=3 repeating letters
let nonRepeatingTris =  allDataSummarized
| join kind=leftanti
(
    allDataSummarized
    | mvexpand AllTriGrams
    | summarize count() by tostring(AllTriGrams), DGADomain
    | where count_ > 1
    | distinct DGADomain
)
on DGADomain;
// find domains that do not have a common tri in the baseline
let dataWithRareTris =  nonRepeatingTris
| join kind=leftanti
(
    nonRepeatingTris
    | mvexpand AllTriGrams
    | extend Trigram = tostring(AllTriGrams)
    | distinct Trigram, DGADomain
    | join kind=inner
    (
        triBaseline
    )
    on Trigram
    | distinct DGADomain
)
on DGADomain;
dataWithRareTris
// join DGAs back on connection data
| join kind=inner
(
    CommonSecurityLog
    | where TimeGenerated > ago(startTime)
    | where isnotempty(DestinationHostName)
    | extend DestinationHostName = tolower(DestinationHostName)
    | project-rename Name=DestinationHostName, DataSource=DeviceVendor
    | summarize StartTime=min(TimeGenerated), EndTime=max(TimeGenerated) by Name, SourceIP, DestinationIP, DataSource
)
on Name
| project StartTime, EndTime, Name, DGADomain, SourceIP, DestinationIP, DataSource
queryPeriod: 6h
requiredDataConnectors:
- connectorId: Zscaler
  dataTypes:
  - CommonSecurityLog
- connectorId: Barracuda
  dataTypes:
  - CommonSecurityLog
- connectorId: CEF
  dataTypes:
  - CommonSecurityLog
- connectorId: CheckPoint
  dataTypes:
  - CommonSecurityLog
- connectorId: CiscoASA
  dataTypes:
  - CommonSecurityLog
- connectorId: F5
  dataTypes:
  - CommonSecurityLog
- connectorId: Fortinet
  dataTypes:
  - CommonSecurityLog
- connectorId: PaloAltoNetworks
  dataTypes:
  - CommonSecurityLog
triggerThreshold: 0
OriginalUri: https://github.com/Azure/Azure-Sentinel/blob/master/Detections/CommonSecurityLog/MultiVendor-PossibleDGAContacts.yaml
tactics:
- CommandAndControl
triggerOperator: gt
metadata:
  source:
    kind: Community
  author:
    name: Microsoft Security Research
  categories:
    domains:
    - Security - Others
  support:
    tier: Community
severity: Medium
name: Possible contact with a domain generated by a DGA
relevantTechniques:
- T1568
query: |
  let triThreshold = 500;
  let startTime = 6h;
  let dgaLengthThreshold = 8;
  // fetch the alexa top 1M domains
  let top1M =  (externaldata (Position:int, Domain:string)   [@"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"]  with (format="csv", zipPattern="*.csv"));
  // extract tri grams that are above our threshold - i.e. are common
  let triBaseline =   top1M
  | extend Domain = tolower(extract("([^.]*).{0,7}$", 1, Domain))
  | extend AllTriGrams = array_concat(extract_all("(...)", Domain), extract_all("(...)", substring(Domain, 1)), extract_all("(...)", substring(Domain, 2)))
  | mvexpand Trigram=AllTriGrams
  | summarize triCount=count() by tostring(Trigram)
  | sort by triCount desc
  | where triCount > triThreshold
  | distinct Trigram;
  // collect domain information from common security log, filter and extract the DGA candidate and its trigrams
  let allDataSummarized =   CommonSecurityLog
  | where TimeGenerated > ago(startTime)
  | where isnotempty(DestinationHostName)
  | extend Name = tolower(DestinationHostName)
  | distinct Name
  | where Name has "."
  | where Name !endswith ".home" and Name !endswith ".lan"
  // extract DGA candidate
  | extend DGADomain = extract("([^.]*).{0,7}$", 1, Name)
  | where strlen(DGADomain) > dgaLengthThreshold
  // throw out domains with number in them
  | where DGADomain matches regex "^[A-Za-z]{0,}$"
  // extract the tri grams from summarized data
  | extend AllTriGrams = array_concat(extract_all("(...)", DGADomain), extract_all("(...)", substring(DGADomain, 1)), extract_all("(...)", substring(DGADomain, 2)));
  // throw out domains that have repeating tri's and/or >=3 repeating letters
  let nonRepeatingTris =  allDataSummarized
  | join kind=leftanti
  (
      allDataSummarized
      | mvexpand AllTriGrams
      | summarize count() by tostring(AllTriGrams), DGADomain
      | where count_ > 1
      | distinct DGADomain
  )
  on DGADomain;
  // find domains that do not have a common tri in the baseline
  let dataWithRareTris =  nonRepeatingTris
  | join kind=leftanti
  (
      nonRepeatingTris
      | mvexpand AllTriGrams
      | extend Trigram = tostring(AllTriGrams)
      | distinct Trigram, DGADomain
      | join kind=inner
      (
          triBaseline
      )
      on Trigram
      | distinct DGADomain
  )
  on DGADomain;
  dataWithRareTris
  // join DGAs back on connection data
  | join kind=inner
  (
      CommonSecurityLog
      | where TimeGenerated > ago(startTime)
      | where isnotempty(DestinationHostName)
      | extend DestinationHostName = tolower(DestinationHostName)
      | project-rename Name=DestinationHostName, DataSource=DeviceVendor
      | summarize StartTime=min(TimeGenerated), EndTime=max(TimeGenerated) by Name, SourceIP, DestinationIP, DataSource
  )
  on Name
  | project StartTime, EndTime, Name, DGADomain, SourceIP, DestinationIP, DataSource  
queryFrequency: 6h
id: 4acd3a04-2fad-4efc-8a4b-51476594cec4
version: 1.0.5
kind: Scheduled
entityMappings:
- fieldMappings:
  - columnName: SourceIP
    identifier: Address
  entityType: IP
- fieldMappings:
  - columnName: Name
    identifier: DomainName
  entityType: DNS
description: |
  'Identifies contacts with domains names in CommonSecurityLog that might have been generated by a Domain Generation Algorithm (DGA). DGAs can be used by malware to generate rendezvous points that are difficult to predict in advance.
  This detection uses the Alexa Top 1 million domain names to build a model of what normal domains look like. It uses this to identify domains that may have been randomly generated by an algorithm.
  The triThreshold is set to 500 - increase this to report on domains that are less likely to have been randomly generated, decrease it for more likely.
  The start time and end time look back over 6 hours of data and the dgaLengthThreshold is set to 8 - meaning domains whose length is 8 or more are reported.'  
{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "workspace": {
      "type": "String"
    }
  },
  "resources": [
    {
      "apiVersion": "2024-01-01-preview",
      "id": "[concat(resourceId('Microsoft.OperationalInsights/workspaces/providers', parameters('workspace'), 'Microsoft.SecurityInsights'),'/alertRules/4acd3a04-2fad-4efc-8a4b-51476594cec4')]",
      "kind": "Scheduled",
      "name": "[concat(parameters('workspace'),'/Microsoft.SecurityInsights/4acd3a04-2fad-4efc-8a4b-51476594cec4')]",
      "properties": {
        "alertRuleTemplateName": "4acd3a04-2fad-4efc-8a4b-51476594cec4",
        "customDetails": null,
        "description": "'Identifies contacts with domains names in CommonSecurityLog that might have been generated by a Domain Generation Algorithm (DGA). DGAs can be used by malware to generate rendezvous points that are difficult to predict in advance.\nThis detection uses the Alexa Top 1 million domain names to build a model of what normal domains look like. It uses this to identify domains that may have been randomly generated by an algorithm.\nThe triThreshold is set to 500 - increase this to report on domains that are less likely to have been randomly generated, decrease it for more likely.\nThe start time and end time look back over 6 hours of data and the dgaLengthThreshold is set to 8 - meaning domains whose length is 8 or more are reported.'\n",
        "displayName": "Possible contact with a domain generated by a DGA",
        "enabled": true,
        "entityMappings": [
          {
            "entityType": "IP",
            "fieldMappings": [
              {
                "columnName": "SourceIP",
                "identifier": "Address"
              }
            ]
          },
          {
            "entityType": "DNS",
            "fieldMappings": [
              {
                "columnName": "Name",
                "identifier": "DomainName"
              }
            ]
          }
        ],
        "OriginalUri": "https://github.com/Azure/Azure-Sentinel/blob/master/Detections/CommonSecurityLog/MultiVendor-PossibleDGAContacts.yaml",
        "query": "let triThreshold = 500;\nlet startTime = 6h;\nlet dgaLengthThreshold = 8;\n// fetch the alexa top 1M domains\nlet top1M =  (externaldata (Position:int, Domain:string)   [@\"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip\"]  with (format=\"csv\", zipPattern=\"*.csv\"));\n// extract tri grams that are above our threshold - i.e. are common\nlet triBaseline =   top1M\n| extend Domain = tolower(extract(\"([^.]*).{0,7}$\", 1, Domain))\n| extend AllTriGrams = array_concat(extract_all(\"(...)\", Domain), extract_all(\"(...)\", substring(Domain, 1)), extract_all(\"(...)\", substring(Domain, 2)))\n| mvexpand Trigram=AllTriGrams\n| summarize triCount=count() by tostring(Trigram)\n| sort by triCount desc\n| where triCount > triThreshold\n| distinct Trigram;\n// collect domain information from common security log, filter and extract the DGA candidate and its trigrams\nlet allDataSummarized =   CommonSecurityLog\n| where TimeGenerated > ago(startTime)\n| where isnotempty(DestinationHostName)\n| extend Name = tolower(DestinationHostName)\n| distinct Name\n| where Name has \".\"\n| where Name !endswith \".home\" and Name !endswith \".lan\"\n// extract DGA candidate\n| extend DGADomain = extract(\"([^.]*).{0,7}$\", 1, Name)\n| where strlen(DGADomain) > dgaLengthThreshold\n// throw out domains with number in them\n| where DGADomain matches regex \"^[A-Za-z]{0,}$\"\n// extract the tri grams from summarized data\n| extend AllTriGrams = array_concat(extract_all(\"(...)\", DGADomain), extract_all(\"(...)\", substring(DGADomain, 1)), extract_all(\"(...)\", substring(DGADomain, 2)));\n// throw out domains that have repeating tri's and/or >=3 repeating letters\nlet nonRepeatingTris =  allDataSummarized\n| join kind=leftanti\n(\n    allDataSummarized\n    | mvexpand AllTriGrams\n    | summarize count() by tostring(AllTriGrams), DGADomain\n    | where count_ > 1\n    | distinct DGADomain\n)\non DGADomain;\n// find domains that do not have a common tri in the baseline\nlet dataWithRareTris =  nonRepeatingTris\n| join kind=leftanti\n(\n    nonRepeatingTris\n    | mvexpand AllTriGrams\n    | extend Trigram = tostring(AllTriGrams)\n    | distinct Trigram, DGADomain\n    | join kind=inner\n    (\n        triBaseline\n    )\n    on Trigram\n    | distinct DGADomain\n)\non DGADomain;\ndataWithRareTris\n// join DGAs back on connection data\n| join kind=inner\n(\n    CommonSecurityLog\n    | where TimeGenerated > ago(startTime)\n    | where isnotempty(DestinationHostName)\n    | extend DestinationHostName = tolower(DestinationHostName)\n    | project-rename Name=DestinationHostName, DataSource=DeviceVendor\n    | summarize StartTime=min(TimeGenerated), EndTime=max(TimeGenerated) by Name, SourceIP, DestinationIP, DataSource\n)\non Name\n| project StartTime, EndTime, Name, DGADomain, SourceIP, DestinationIP, DataSource\n",
        "queryFrequency": "PT6H",
        "queryPeriod": "PT6H",
        "severity": "Medium",
        "subTechniques": [],
        "suppressionDuration": "PT1H",
        "suppressionEnabled": false,
        "tactics": [
          "CommandAndControl"
        ],
        "techniques": [
          "T1568"
        ],
        "templateVersion": "1.0.5",
        "triggerOperator": "GreaterThan",
        "triggerThreshold": 0
      },
      "type": "Microsoft.OperationalInsights/workspaces/providers/alertRules"
    }
  ]
}