8b277a005f3c89ede31ca95f08e0a3548adda95f
[freeradius.git] / doc / schemas / logstash / logstash-radius.conf
1 # logstash configuration to process RADIUS detail files
2 #
3 # Matthew Newton
4 # January 2016
5
6 # RADIUS "detail" files are textual representations of the RADIUS
7 # packets, and are written to disk by e.g. FreeRADIUS. They look
8 # something like the following, with the timestamp on the first
9 # line then all attributes/values tab-indented.
10 #
11 #       Tue Mar 10 15:32:24 2015
12 #               Packet-Type = Access-Request
13 #               User-Name = "test@example.com"
14 #               Calling-Station-Id = "01-02-03-04-05-06"
15 #               Called-Station-Id = "aa-bb-cc-dd-ee-ff:myssid"
16 #               NAS-Port = 10
17 #               NAS-IP-Address = 10.9.0.4
18 #               NAS-Identifier = "Wireless-Controller-1"
19 #               Service-Type = Framed-User
20 #               NAS-Port-Type = Wireless-802.11
21 #
22
23
24
25 # Example input - read data from a file. This can be useful for
26 # testing, but usually not so much for live service. For example,
27 # to read in a detail file with this input you could use:
28 #
29 #   /opt/logstash/bin/logstash -v -f logstash-radius.conf < detailfile
30
31 input {
32         stdin {
33                 type => radiusdetail
34         }
35 }
36
37 # Moving into production will likely need something more reliable.
38 # There are many input methods, an example here using log-courier
39 # (which supports client-site multiline processing and does not
40 # lose log events if logstash is restarted).
41
42 # input {
43 #       courier {
44 #               port => 5140
45 #               transport => "tcp"
46 #       }
47 # }
48
49
50
51 # Filter stage. Here we take the raw logs and process them into
52 # something structured ready to index. Each attribute is stored as
53 # a separate field in the output document.
54
55 filter {
56
57         if [type] == "radiusdetail" {
58
59                 # If you are using a log feeder that can join
60                 # multiple lines together then that is preferrable
61                 # to using multiline here, because this can not be
62                 # used with threaded logstash (i.e. -w<n> at
63                 # startup).
64
65                 # In that case you should comment out the following
66                 # section. For example, see the log-courier
67                 # configuration configuration in this directory.
68
69                 multiline {
70                         pattern => "^[A-Z\t]"
71                         negate => false
72                         what => "next"
73                 }
74
75                 # Pull off the timestamp at the start of the
76                 # detail record. Note there may be additional data
77                 # after it that has been added by the local admin,
78                 # so stop at a newline OR a tab.
79
80                 grok {
81                         match => [ "message", "^(?<timestamp>[^\n\t]+)[\n\t]" ]
82                 }
83
84                 # Create the @timestamp field.
85
86                 date {
87                         match => [ "timestamp", "EEE MMM dd HH:mm:ss yyyy",
88                                                 "EEE MMM  d HH:mm:ss yyyy" ]
89                 }
90
91                 # Split the attributes and values into fields.
92                 # This is the bulk of processing that adds all of
93                 # the RADIUS attributes as elasticsearch fields.
94
95                 kv {
96                         field_split => "\n"
97                         source => "message"
98                         trim => "\" "
99                         trimkey => "\t "
100                 }
101
102                 # Now we try and add some useful additional
103                 # information. If certain fields can be broken
104                 # down into components then do that here and add
105                 # the data as sub-fields. For example,
106                 # Called-Station-Id might be able to be broken
107                 # down to Called-Station-Id_mac and Called-Station-Id_ssid
108                 # on some wireless systems, or to _ip and _port
109                 # with a VPN.
110
111                 # Multiple calls to grok otherwise it can stop
112                 # processing once it has matched one field, but
113                 # e.g. you want to pull both IP and port out of
114                 # the same field in two different regex's.
115
116                 # Pull out some IP addresses as field_ip:
117
118                 grok {
119                         break_on_match => false
120                         tag_on_failure => []
121                         match => [
122                                 "Framed-IP-Address", "^(?<Framed-IP-Address_ip>\d+\.\d+\.\d+\.\d+$)",
123                                 "NAS-IP-Address", "^(?<NAS-IP-Address_ip>\d+\.\d+\.\d+\.\d+$)",
124                                 "Calling-Station-Id", "^(?<Calling-Station-Id_ip>\d+\.\d+\.\d+\.\d+)",
125                                 "Called-Station-Id", "^(?<Called-Station-Id_ip>\d+\.\d+\.\d+\.\d+)"
126                         ]
127                 }
128
129                 # Split User-Name, Operator-Name, and pull out
130                 # some IP ports if they are there:
131
132                 grok {
133                         break_on_match => false
134                         tag_on_failure => []
135                         match => [
136                                 "User-Name", "^(?<User-Name_username>[^@]+)?(?:@(?<User-Name_realm>[^@]+))$",
137                                 "Operator-Name", "^(?<Operator-Name_id>.)(?<Operator-Name_value>.+)$",
138
139                                 "Calling-Station-Id", "\[(?<Calling-Station-Id_port>\d+)\]$",
140                                 "Called-Station-Id", "\[(?<Called-Station-Id_port>\d+)\]$"
141                         ]
142                 }
143
144                 # Extract MAC addresses (and SSIDs if there).
145                 # MAC address matching here is lazy, but should be
146                 # good enough.
147
148                 grok {
149                         break_on_match => false
150                         tag_on_failure => []
151                         match => [
152                                 "Calling-Station-Id", "^(?<Calling-Station-Id_mac>[a-fA-F0-9:-]{17})$",
153                                 "Calling-Station-Id", "^(?<Calling-Station-Id_mac>[a-fA-F0-9\.]{14})$",
154                                 "Calling-Station-Id", "^(?<Calling-Station-Id_mac>[a-fA-F0-9]{12})$",
155
156                                 "Called-Station-Id", "^(?<Called-Station-Id_mac>[a-fA-F0-9:-]{17})(?::(?<Called-Station-Id_ssid>.*))?$",
157                                 "Called-Station-Id", "^(?<Called-Station-Id_mac>[a-fA-F0-9\.]{14})(?::(?<Called-Station-Id_ssid>.*))?$",
158                                 "Called-Station-Id", "^(?<Called-Station-Id_mac>[a-fA-F0-9]{12})(?::(?<Called-Station-Id_ssid>.*))?$"
159                         ]
160                 }
161
162                 # With the optional sanitize_mac plugin, it's
163                 # possible to make sure all MAC addresses look the
164                 # same, which has obvious benefits.
165                 #
166                 # https://github.com/mcnewton/elk/blob/master/logstash-filters/sanitize_mac.rb
167
168                 # sanitize_mac {
169                 #       match => {
170                 #               "Called-Station-Id_mac" => "Called-Station-Id_mac"
171                 #               "Calling-Station-Id_mac" => "Calling-Station-Id_mac"
172                 #               }
173                 #       separator => ":"
174                 #       fixcase => "lower"
175                 # }
176
177
178                 # Gigawords presents an issue because the 64-bit
179                 # value is split across two attributes. Combine
180                 # them both back into a single attribute so that
181                 # the full value is available to use.
182
183                 if ([Acct-Input-Octets]) {
184                         ruby {
185                                 code => "event['Acct-Input-Octets_long'] =
186                                         event['Acct-Input-Octets'].to_i + ( event['Acct-Input-Gigawords'] ? (event['Acct-Input-Gigawords'].to_i * (2**32)) : 0)"
187                         }
188                 }
189
190                 if ([Acct-Output-Octets]) {
191                         ruby {
192                                 code => "event['Acct-Output-Octets_long'] =
193                                         event['Acct-Output-Octets'].to_i + ( event['Acct-Output-Gigawords'] ? (event['Acct-Output-Gigawords'].to_i * (2**32)) : 0)"
194                         }
195                 }
196
197         }
198 }
199
200
201
202 # Output data to the local elasticsearch cluster (called
203 # "elasticsearch") using type "detail" in index "radius-DATE".
204
205 output {
206         if [type] == "radiusdetail" {
207                 elasticsearch {
208                         host => localhost
209                         protocol => http
210                         cluster => elasticsearch
211                         index_type => "detail"
212                         index => "radius-%{+YYYY.MM.dd}"
213                         flush_size => 1000
214                 }
215         }
216 }
217