Microsoft Sentinel Analytic Rules
cloudbrothers.infoAzure Sentinel RepoToggle Dark/Light/Auto modeToggle Dark/Light/Auto modeToggle Dark/Light/Auto modeBack to homepage

Potential communication with a Domain Generation Algorithm DGA based hostname ASIM Web Session schema

Back
Id9176b18f-a946-42c6-a2f6-0f6d17cd6a8a
RulenamePotential communication with a Domain Generation Algorithm (DGA) based hostname (ASIM Web Session schema)
DescriptionThis rule identifies communication with hosts that have a domain name that might have been generated by a Domain Generation Algorithm (DGA). DGAs are used by malware to generate rendezvous points that are difficult to predict in advance. This detection uses the top 1 million domain names to build a model of what normal domains look like nad uses the model to identify domains that may have been randomly generated by an algorithm. You can modify the triThreshold and dgaLengthThreshold query parameters to change Analytic Rule sensitivity. The higher the numbers, the less noisy the rule is. <br>

This analytic rule uses ASIM and supports any built-in or custom source that supports the ASIM WebSession schema (ASIM WebSession Schema)
SeverityMedium
TacticsCommandAndControl
TechniquesT1568
Required data connectorsSquidProxy
Zscaler
KindScheduled
Query frequency6h
Query period6h
Trigger threshold0
Trigger operatorgt
Source Urihttps://github.com/Azure/Azure-Sentinel/blob/master/Detections/ASimWebSession/PossibleDGAContacts.yaml
Version1.1.3
Arm template9176b18f-a946-42c6-a2f6-0f6d17cd6a8a.json
Deploy To Azure
let triThreshold = 500;
let querystarttime = 6h;
let dgaLengthThreshold = 8;
// fetch the cisco umbrella top 1M domains
let top1M =  (externaldata (Position:int, Domain:string)   [@"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"]  with (format="csv", zipPattern="*.csv"));
// extract tri grams that are above our threshold - i.e. are common
let triBaseline =   top1M
  | extend Domain = tolower(extract("([^.]*).{0,7}$", 1, Domain))
  | extend AllTriGrams = array_concat(extract_all("(...)", Domain), extract_all("(...)", substring(Domain, 1)), extract_all("(...)", substring(Domain, 2)))
  | mvexpand Trigram=AllTriGrams to typeof(string)
  | summarize triCount=count() by Trigram
  | sort by triCount desc
  | where triCount > triThreshold
  | distinct Trigram;
// collect domain information from common security log, filter and extract the DGA candidate and its trigrams
let allDataSummarized =  _Im_WebSession
| where isnotempty(Url)
| extend Name = tolower(tostring(parse_url(Url)["Host"]))
| summarize NameCount=count() by Name
| where Name has "."
| where Name !endswith ".home" and Name !endswith ".lan"
// extract DGA candidate
| extend DGADomain = extract("([^.]*).{0,7}$", 1, Name)
| where strlen(DGADomain) > dgaLengthThreshold
// throw out domains with number in them
| where DGADomain matches regex "^[A-Za-z]{0,}$"
// extract the tri grams from summarized data
| extend AllTriGrams = array_concat(extract_all("(...)", DGADomain), extract_all("(...)", substring(DGADomain, 1)), extract_all("(...)", substring(DGADomain, 2)));
// throw out domains that have repeating tri's and/or >=3 repeating letters
let nonRepeatingTris =  allDataSummarized
| join kind=leftanti
(
    allDataSummarized
    | mvexpand AllTriGrams
    | summarize count() by tostring(AllTriGrams), DGADomain
    | where count_ > 1
    | distinct DGADomain
)
on DGADomain;
// find domains that do not have a common tri in the baseline
let dataWithRareTris =  nonRepeatingTris
| join kind=leftanti
(
    nonRepeatingTris
    | mvexpand AllTriGrams
    | extend Trigram = tostring(AllTriGrams)
    | distinct Trigram, DGADomain
    | join kind=inner
    (
        triBaseline
    )
    on Trigram
    | distinct DGADomain
)
on DGADomain;
dataWithRareTris
// join DGAs back on connection data
| join kind=inner
(
    _Im_WebSession
    | where isnotempty(Url)
    | extend Url = tolower(Url)
    | summarize arg_max(TimeGenerated, EventVendor,  SrcIpAddr) by Url
    | extend Name=tostring(parse_url(Url)["Host"])
    | summarize StartTime=min(TimeGenerated), EndTime=max(TimeGenerated) by Name, SrcIpAddr, Url
)
on Name
| project StartTime, EndTime, Name, DGADomain, SrcIpAddr, Url, NameCount
customDetails:
  DGAPattern: DGADomain
  NameCount: NameCount
name: Potential communication with a Domain Generation Algorithm (DGA) based hostname (ASIM Web Session schema)
id: 9176b18f-a946-42c6-a2f6-0f6d17cd6a8a
triggerThreshold: 0
severity: Medium
tactics:
- CommandAndControl
OriginalUri: https://github.com/Azure/Azure-Sentinel/blob/master/Detections/ASimWebSession/PossibleDGAContacts.yaml
entityMappings:
- entityType: IP
  fieldMappings:
  - columnName: SrcIpAddr
    identifier: Address
- entityType: URL
  fieldMappings:
  - columnName: Url
    identifier: Url
queryPeriod: 6h
queryFrequency: 6h
version: 1.1.3
triggerOperator: gt
description: |
  'This rule identifies communication with hosts that have a domain name that might have been generated by a Domain Generation Algorithm (DGA). DGAs are used by malware to generate rendezvous points that are difficult to predict in advance. This detection uses the top 1 million domain names to build a model of what normal domains look like nad uses the model to identify domains that may have been randomly generated by an algorithm. You can modify the triThreshold and dgaLengthThreshold query parameters to change Analytic Rule sensitivity. The higher the numbers, the less noisy the rule is. <br>
   This analytic rule uses [ASIM](https://aka.ms/AboutASIM) and supports any built-in or custom source that supports the ASIM WebSession schema (ASIM WebSession Schema)'  
metadata:
  support:
    tier: Community
  author:
    name: Yaron
  source:
    kind: Community
  categories:
    domains:
    - Security - Threat Protection
query: |
  let triThreshold = 500;
  let querystarttime = 6h;
  let dgaLengthThreshold = 8;
  // fetch the cisco umbrella top 1M domains
  let top1M =  (externaldata (Position:int, Domain:string)   [@"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"]  with (format="csv", zipPattern="*.csv"));
  // extract tri grams that are above our threshold - i.e. are common
  let triBaseline =   top1M
    | extend Domain = tolower(extract("([^.]*).{0,7}$", 1, Domain))
    | extend AllTriGrams = array_concat(extract_all("(...)", Domain), extract_all("(...)", substring(Domain, 1)), extract_all("(...)", substring(Domain, 2)))
    | mvexpand Trigram=AllTriGrams to typeof(string)
    | summarize triCount=count() by Trigram
    | sort by triCount desc
    | where triCount > triThreshold
    | distinct Trigram;
  // collect domain information from common security log, filter and extract the DGA candidate and its trigrams
  let allDataSummarized =  _Im_WebSession
  | where isnotempty(Url)
  | extend Name = tolower(tostring(parse_url(Url)["Host"]))
  | summarize NameCount=count() by Name
  | where Name has "."
  | where Name !endswith ".home" and Name !endswith ".lan"
  // extract DGA candidate
  | extend DGADomain = extract("([^.]*).{0,7}$", 1, Name)
  | where strlen(DGADomain) > dgaLengthThreshold
  // throw out domains with number in them
  | where DGADomain matches regex "^[A-Za-z]{0,}$"
  // extract the tri grams from summarized data
  | extend AllTriGrams = array_concat(extract_all("(...)", DGADomain), extract_all("(...)", substring(DGADomain, 1)), extract_all("(...)", substring(DGADomain, 2)));
  // throw out domains that have repeating tri's and/or >=3 repeating letters
  let nonRepeatingTris =  allDataSummarized
  | join kind=leftanti
  (
      allDataSummarized
      | mvexpand AllTriGrams
      | summarize count() by tostring(AllTriGrams), DGADomain
      | where count_ > 1
      | distinct DGADomain
  )
  on DGADomain;
  // find domains that do not have a common tri in the baseline
  let dataWithRareTris =  nonRepeatingTris
  | join kind=leftanti
  (
      nonRepeatingTris
      | mvexpand AllTriGrams
      | extend Trigram = tostring(AllTriGrams)
      | distinct Trigram, DGADomain
      | join kind=inner
      (
          triBaseline
      )
      on Trigram
      | distinct DGADomain
  )
  on DGADomain;
  dataWithRareTris
  // join DGAs back on connection data
  | join kind=inner
  (
      _Im_WebSession
      | where isnotempty(Url)
      | extend Url = tolower(Url)
      | summarize arg_max(TimeGenerated, EventVendor,  SrcIpAddr) by Url
      | extend Name=tostring(parse_url(Url)["Host"])
      | summarize StartTime=min(TimeGenerated), EndTime=max(TimeGenerated) by Name, SrcIpAddr, Url
  )
  on Name
  | project StartTime, EndTime, Name, DGADomain, SrcIpAddr, Url, NameCount  
relevantTechniques:
- T1568
tags:
- ParentAlert: https://github.com/Azure/Azure-Sentinel/blob/master/Detections/CommonSecurityLog/MultiVendor-PossibleDGAContacts.yaml
  version: 1.0.0
- Schema: ASIMWebSession
  SchemaVersion: 0.2.0
requiredDataConnectors:
- dataTypes:
  - SquidProxy_CL
  connectorId: SquidProxy
- dataTypes:
  - CommonSecurityLog
  connectorId: Zscaler
alertDetailsOverride:
  alertDisplayNameFormat: Potential communication from {{SrcIpAddr} with a Domain Generation Algorithm (DGA) based host {{Name}}
  alertDescriptionFormat: A client with address {{SrcIpAddr}} communicated with host {{Name}} that have a domain name that might have been generated by a Domain Generation Algorithm (DGA), identified by the pattern {{DGADomain}}. DGAs are used by malware to generate rendezvous points that are difficult to predict in advance. This detection uses the top 1 million domain names to build a model of what normal domains look like and uses the model to identify domains that may have been randomly generated by an algorithm.
kind: Scheduled
{
  "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
  "contentVersion": "1.0.0.0",
  "parameters": {
    "workspace": {
      "type": "String"
    }
  },
  "resources": [
    {
      "apiVersion": "2023-02-01-preview",
      "id": "[concat(resourceId('Microsoft.OperationalInsights/workspaces/providers', parameters('workspace'), 'Microsoft.SecurityInsights'),'/alertRules/9176b18f-a946-42c6-a2f6-0f6d17cd6a8a')]",
      "kind": "Scheduled",
      "name": "[concat(parameters('workspace'),'/Microsoft.SecurityInsights/9176b18f-a946-42c6-a2f6-0f6d17cd6a8a')]",
      "properties": {
        "alertDetailsOverride": {
          "alertDescriptionFormat": "A client with address {{SrcIpAddr}} communicated with host {{Name}} that have a domain name that might have been generated by a Domain Generation Algorithm (DGA), identified by the pattern {{DGADomain}}. DGAs are used by malware to generate rendezvous points that are difficult to predict in advance. This detection uses the top 1 million domain names to build a model of what normal domains look like and uses the model to identify domains that may have been randomly generated by an algorithm.",
          "alertDisplayNameFormat": "Potential communication from {{SrcIpAddr} with a Domain Generation Algorithm (DGA) based host {{Name}}"
        },
        "alertRuleTemplateName": "9176b18f-a946-42c6-a2f6-0f6d17cd6a8a",
        "customDetails": {
          "DGAPattern": "DGADomain",
          "NameCount": "NameCount"
        },
        "description": "'This rule identifies communication with hosts that have a domain name that might have been generated by a Domain Generation Algorithm (DGA). DGAs are used by malware to generate rendezvous points that are difficult to predict in advance. This detection uses the top 1 million domain names to build a model of what normal domains look like nad uses the model to identify domains that may have been randomly generated by an algorithm. You can modify the triThreshold and dgaLengthThreshold query parameters to change Analytic Rule sensitivity. The higher the numbers, the less noisy the rule is. <br>\n This analytic rule uses [ASIM](https://aka.ms/AboutASIM) and supports any built-in or custom source that supports the ASIM WebSession schema (ASIM WebSession Schema)'\n",
        "displayName": "Potential communication with a Domain Generation Algorithm (DGA) based hostname (ASIM Web Session schema)",
        "enabled": true,
        "entityMappings": [
          {
            "entityType": "IP",
            "fieldMappings": [
              {
                "columnName": "SrcIpAddr",
                "identifier": "Address"
              }
            ]
          },
          {
            "entityType": "URL",
            "fieldMappings": [
              {
                "columnName": "Url",
                "identifier": "Url"
              }
            ]
          }
        ],
        "OriginalUri": "https://github.com/Azure/Azure-Sentinel/blob/master/Detections/ASimWebSession/PossibleDGAContacts.yaml",
        "query": "let triThreshold = 500;\nlet querystarttime = 6h;\nlet dgaLengthThreshold = 8;\n// fetch the cisco umbrella top 1M domains\nlet top1M =  (externaldata (Position:int, Domain:string)   [@\"http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip\"]  with (format=\"csv\", zipPattern=\"*.csv\"));\n// extract tri grams that are above our threshold - i.e. are common\nlet triBaseline =   top1M\n  | extend Domain = tolower(extract(\"([^.]*).{0,7}$\", 1, Domain))\n  | extend AllTriGrams = array_concat(extract_all(\"(...)\", Domain), extract_all(\"(...)\", substring(Domain, 1)), extract_all(\"(...)\", substring(Domain, 2)))\n  | mvexpand Trigram=AllTriGrams to typeof(string)\n  | summarize triCount=count() by Trigram\n  | sort by triCount desc\n  | where triCount > triThreshold\n  | distinct Trigram;\n// collect domain information from common security log, filter and extract the DGA candidate and its trigrams\nlet allDataSummarized =  _Im_WebSession\n| where isnotempty(Url)\n| extend Name = tolower(tostring(parse_url(Url)[\"Host\"]))\n| summarize NameCount=count() by Name\n| where Name has \".\"\n| where Name !endswith \".home\" and Name !endswith \".lan\"\n// extract DGA candidate\n| extend DGADomain = extract(\"([^.]*).{0,7}$\", 1, Name)\n| where strlen(DGADomain) > dgaLengthThreshold\n// throw out domains with number in them\n| where DGADomain matches regex \"^[A-Za-z]{0,}$\"\n// extract the tri grams from summarized data\n| extend AllTriGrams = array_concat(extract_all(\"(...)\", DGADomain), extract_all(\"(...)\", substring(DGADomain, 1)), extract_all(\"(...)\", substring(DGADomain, 2)));\n// throw out domains that have repeating tri's and/or >=3 repeating letters\nlet nonRepeatingTris =  allDataSummarized\n| join kind=leftanti\n(\n    allDataSummarized\n    | mvexpand AllTriGrams\n    | summarize count() by tostring(AllTriGrams), DGADomain\n    | where count_ > 1\n    | distinct DGADomain\n)\non DGADomain;\n// find domains that do not have a common tri in the baseline\nlet dataWithRareTris =  nonRepeatingTris\n| join kind=leftanti\n(\n    nonRepeatingTris\n    | mvexpand AllTriGrams\n    | extend Trigram = tostring(AllTriGrams)\n    | distinct Trigram, DGADomain\n    | join kind=inner\n    (\n        triBaseline\n    )\n    on Trigram\n    | distinct DGADomain\n)\non DGADomain;\ndataWithRareTris\n// join DGAs back on connection data\n| join kind=inner\n(\n    _Im_WebSession\n    | where isnotempty(Url)\n    | extend Url = tolower(Url)\n    | summarize arg_max(TimeGenerated, EventVendor,  SrcIpAddr) by Url\n    | extend Name=tostring(parse_url(Url)[\"Host\"])\n    | summarize StartTime=min(TimeGenerated), EndTime=max(TimeGenerated) by Name, SrcIpAddr, Url\n)\non Name\n| project StartTime, EndTime, Name, DGADomain, SrcIpAddr, Url, NameCount\n",
        "queryFrequency": "PT6H",
        "queryPeriod": "PT6H",
        "severity": "Medium",
        "suppressionDuration": "PT1H",
        "suppressionEnabled": false,
        "tactics": [
          "CommandAndControl"
        ],
        "tags": [
          {
            "ParentAlert": "https://github.com/Azure/Azure-Sentinel/blob/master/Detections/CommonSecurityLog/MultiVendor-PossibleDGAContacts.yaml",
            "version": "1.0.0"
          },
          {
            "Schema": "ASIMWebSession",
            "SchemaVersion": "0.2.0"
          }
        ],
        "techniques": [
          "T1568"
        ],
        "templateVersion": "1.1.3",
        "triggerOperator": "GreaterThan",
        "triggerThreshold": 0
      },
      "type": "Microsoft.OperationalInsights/workspaces/providers/alertRules"
    }
  ]
}