Update elasticsearch fields to use "_" rather than "."
authorMatthew Newton <mcn4@leicester.ac.uk>
Mon, 18 Jan 2016 17:41:49 +0000 (17:41 +0000)
committerMatthew Newton <mcn4@leicester.ac.uk>
Mon, 18 Jan 2016 17:41:49 +0000 (17:41 +0000)
It seems that while elasticsearch can have data in a field as well as sub-fields, logstash
can't currently do this (it stores the event in a hash, so an entry either contains data
or another hash or list).

Work around this by using underscores, which should in theory also mean these examples will
work with elasticsearch 2, though I haven't tried it.

doc/schemas/logstash/kibana3-dashboard.json
doc/schemas/logstash/kibana4-dashboard.json
doc/schemas/logstash/logstash-radius.conf
doc/schemas/logstash/radius-mapping.sh

index 6941059..498a1c8 100644 (file)
           },
           "tmode": "terms_stats",
           "tstat": "max",
-          "valuefield": "Acct-Output-Octets.long",
+          "valuefield": "Acct-Output-Octets_long",
           "title": "TopN data Output"
         },
         {
           },
           "tmode": "terms_stats",
           "tstat": "max",
-          "valuefield": "Acct-Input-Octets.long",
+          "valuefield": "Acct-Input-Octets_long",
           "title": "TopN Data Input"
         }
       ],
     "hide": false
   },
   "refresh": false
-}
\ No newline at end of file
+}
index f3cb850..eb7930e 100644 (file)
@@ -97,7 +97,7 @@
     "_type": "visualization",
     "_source": {
       "title": "RADIUS table topN data transferred by User-Name",
-      "visState": "{\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"Acct-Output-Octets.long\"}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"User-Name\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}},{\"id\":\"3\",\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"Acct-Input-Octets.long\"}}],\"listeners\":{}}",
+      "visState": "{\"type\":\"table\",\"params\":{\"perPage\":10,\"showPartialRows\":false,\"showMeticsAtAllLevels\":false},\"aggs\":[{\"id\":\"1\",\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"Acct-Output-Octets_long\"}},{\"id\":\"2\",\"type\":\"terms\",\"schema\":\"bucket\",\"params\":{\"field\":\"User-Name\",\"size\":10,\"order\":\"desc\",\"orderBy\":\"1\"}},{\"id\":\"3\",\"type\":\"max\",\"schema\":\"metric\",\"params\":{\"field\":\"Acct-Input-Octets_long\"}}],\"listeners\":{}}",
       "description": "",
       "savedSearchId": "RADIUS-data",
       "version": 1,
       }
     }
   }
-]
\ No newline at end of file
+]
index a1d9f6a..8b277a0 100644 (file)
@@ -104,8 +104,8 @@ filter {
                # down into components then do that here and add
                # the data as sub-fields. For example,
                # Called-Station-Id might be able to be broken
-               # down to Called-Station-Id.mac and Called-Station-Id.ssid
-               # on some wireless systems, or to .ip and .port
+               # down to Called-Station-Id_mac and Called-Station-Id_ssid
+               # on some wireless systems, or to _ip and _port
                # with a VPN.
 
                # Multiple calls to grok otherwise it can stop
@@ -113,16 +113,16 @@ filter {
                # e.g. you want to pull both IP and port out of
                # the same field in two different regex's.
 
-               # Pull out some IP addresses as field.ip:
+               # Pull out some IP addresses as field_ip:
 
                grok {
                        break_on_match => false
                        tag_on_failure => []
                        match => [
-                               "Framed-IP-Address", "^(?<Framed-IP-Address.ip>\d+\.\d+\.\d+\.\d+$)",
-                               "NAS-IP-Address", "^(?<NAS-IP-Address.ip>\d+\.\d+\.\d+\.\d+$)",
-                               "Calling-Station-Id", "^(?<Calling-Station-Id.ip>\d+\.\d+\.\d+\.\d+)",
-                               "Called-Station-Id", "^(?<Called-Station-Id.ip>\d+\.\d+\.\d+\.\d+)"
+                               "Framed-IP-Address", "^(?<Framed-IP-Address_ip>\d+\.\d+\.\d+\.\d+$)",
+                               "NAS-IP-Address", "^(?<NAS-IP-Address_ip>\d+\.\d+\.\d+\.\d+$)",
+                               "Calling-Station-Id", "^(?<Calling-Station-Id_ip>\d+\.\d+\.\d+\.\d+)",
+                               "Called-Station-Id", "^(?<Called-Station-Id_ip>\d+\.\d+\.\d+\.\d+)"
                        ]
                }
 
@@ -133,11 +133,11 @@ filter {
                        break_on_match => false
                        tag_on_failure => []
                        match => [
-                               "User-Name", "^(?<User-Name.username>[^@]+)?(?:@(?<User-Name.realm>[^@]+))$",
-                               "Operator-Name", "^(?<Operator-Name.id>.)(?<Operator-Name.value>.+)$",
+                               "User-Name", "^(?<User-Name_username>[^@]+)?(?:@(?<User-Name_realm>[^@]+))$",
+                               "Operator-Name", "^(?<Operator-Name_id>.)(?<Operator-Name_value>.+)$",
 
-                               "Calling-Station-Id", "\[(?<Calling-Station-Id.port>\d+)\]$",
-                               "Called-Station-Id", "\[(?<Called-Station-Id.port>\d+)\]$"
+                               "Calling-Station-Id", "\[(?<Calling-Station-Id_port>\d+)\]$",
+                               "Called-Station-Id", "\[(?<Called-Station-Id_port>\d+)\]$"
                        ]
                }
 
@@ -149,13 +149,13 @@ filter {
                        break_on_match => false
                        tag_on_failure => []
                        match => [
-                               "Calling-Station-Id", "^(?<Calling-Station-Id.mac>[a-fA-F0-9:-]{17})$",
-                               "Calling-Station-Id", "^(?<Calling-Station-Id.mac>[a-fA-F0-9\.]{14})$",
-                               "Calling-Station-Id", "^(?<Calling-Station-Id.mac>[a-fA-F0-9]{12})$",
+                               "Calling-Station-Id", "^(?<Calling-Station-Id_mac>[a-fA-F0-9:-]{17})$",
+                               "Calling-Station-Id", "^(?<Calling-Station-Id_mac>[a-fA-F0-9\.]{14})$",
+                               "Calling-Station-Id", "^(?<Calling-Station-Id_mac>[a-fA-F0-9]{12})$",
 
-                               "Called-Station-Id", "^(?<Called-Station-Id.mac>[a-fA-F0-9:-]{17})(?::(?<Called-Station-Id.ssid>.*))?$",
-                               "Called-Station-Id", "^(?<Called-Station-Id.mac>[a-fA-F0-9\.]{14})(?::(?<Called-Station-Id.ssid>.*))?$",
-                               "Called-Station-Id", "^(?<Called-Station-Id.mac>[a-fA-F0-9]{12})(?::(?<Called-Station-Id.ssid>.*))?$"
+                               "Called-Station-Id", "^(?<Called-Station-Id_mac>[a-fA-F0-9:-]{17})(?::(?<Called-Station-Id_ssid>.*))?$",
+                               "Called-Station-Id", "^(?<Called-Station-Id_mac>[a-fA-F0-9\.]{14})(?::(?<Called-Station-Id_ssid>.*))?$",
+                               "Called-Station-Id", "^(?<Called-Station-Id_mac>[a-fA-F0-9]{12})(?::(?<Called-Station-Id_ssid>.*))?$"
                        ]
                }
 
@@ -167,8 +167,8 @@ filter {
 
                # sanitize_mac {
                #       match => {
-               #               "Called-Station-Id.mac" => "Called-Station-Id.mac"
-               #               "Calling-Station-Id.mac" => "Calling-Station-Id.mac"
+               #               "Called-Station-Id_mac" => "Called-Station-Id_mac"
+               #               "Calling-Station-Id_mac" => "Calling-Station-Id_mac"
                #               }
                #       separator => ":"
                #       fixcase => "lower"
@@ -182,14 +182,14 @@ filter {
 
                if ([Acct-Input-Octets]) {
                        ruby {
-                               code => "event['Acct-Input-Octets.long'] =
+                               code => "event['Acct-Input-Octets_long'] =
                                        event['Acct-Input-Octets'].to_i + ( event['Acct-Input-Gigawords'] ? (event['Acct-Input-Gigawords'].to_i * (2**32)) : 0)"
                        }
                }
 
                if ([Acct-Output-Octets]) {
                        ruby {
-                               code => "event['Acct-Output-Octets.long'] =
+                               code => "event['Acct-Output-Octets_long'] =
                                        event['Acct-Output-Octets'].to_i + ( event['Acct-Output-Gigawords'] ? (event['Acct-Output-Gigawords'].to_i * (2**32)) : 0)"
                        }
                }
index 8fe8a48..f3f4724 100755 (executable)
 
 # Additionally, the supplied logstash config will try and extract
 # MAC addresses, IP addresses and ports from the data. These are
-# stored as sub-fields under the respective attribute. For
-# example, an attribute
+# stored with suffixes on the respective attribute. For example,
+# an attribute
 #
 #   Called-Station-Id := "10.0.4.6[4500]"
 #
 # will be broken down into the following fields in elasticsearch:
 #
 #   Called-Station-Id = "10.0.4.6[4500]"
-#   Called-Station-Id.ip = "10.0.4.6"
-#   Called-Station-Id.port = "4500"
+#   Called-Station-Id_ip = "10.0.4.6"
+#   Called-Station-Id_port = "4500"
 #
 # This mapping ensures that these have an appropriate data type.
 
@@ -61,7 +61,7 @@ curl -XPUT '127.0.0.1:9200/_template/radius' -d '
         },
 
         { "ipv4_address": {
-            "path_match": "*.ip",
+            "path_match": "*_ip",
             "mapping": {
               "type": "ip",
               "doc_values": true
@@ -70,7 +70,16 @@ curl -XPUT '127.0.0.1:9200/_template/radius' -d '
         },
 
         { "network_port": {
-            "path_match": "*.port",
+            "path_match": "*_port",
+            "mapping": {
+              "type": "integer",
+              "doc_values": true
+            }
+          }
+        },
+
+        { "long_number": {
+            "path_match": "*_long",
             "mapping": {
               "type": "integer",
               "doc_values": true