source: squid-ssl/trunk/fuentes/helpers/log_daemon/DB/log_db_daemon @ 5495

Last change on this file since 5495 was 5495, checked in by Juanma, 22 months ago

Initial release

File size: 12.2 KB
Line 
1#!/usr/bin/perl
2use strict;
3use warnings;
4use DBI;
5use English qw( -no_match_vars );
6use Getopt::Long;
7use Pod::Usage;
8
9$|=1;
10
11=pod
12
13=head1 NAME
14
15log_db_daemon - Database logging daemon for Squid
16
17=head1 SYNOPSIS
18
19log_db_daemon DSN [options]
20
21=head1 DESCRIPTOIN
22
23This program writes Squid access.log entries to a database.
24Presently only accepts the B<squid> native format
25
26=over 8
27
28=item   B<DSN>
29
30Database DSN encoded as a path. This is sent as the access_log file path.
31
32Sample configuration:
33  access_log daemon:/host/database/table/username/password squid
34
35  to leave a parameter unspecified use a double slash:
36  access_log daemon://database/table/username/password squid
37
38Default "DBI:mysql:database=squid"
39
40=item   B<--debug>
41
42Write debug messages to Squid stderr or cache.log
43
44=back
45
46=cut
47
48# the first argument to this script is the log file path describing the DSN
49my $log_file = shift;
50
51# others may be options
52my $debug = 0;
53GetOptions(
54        'debug' => \$debug,
55        );
56
57
58# utility routine to print messages on stderr (so they appear in cache log)
59# without using warn, which would clutter the log with source line numbers
60sub log_info {
61    my $msg = shift;
62    print STDERR "$msg\n";
63}
64
65# we use logfile to pass database access information to this script
66# sample configuration:
67# access_log daemon:/host/database/table/username/password squid
68# to let a parmeter unspecified, e.g. the database host, use a double slash:
69# access_log daemon://database/table/username/password squid
70my ( $host, $database, $table, $user, $pass ) = $log_file =~ / \/(.*?)\/(.*?)\/(.*?)\/(.*?)\/(.*?) \z /xms;
71
72if ( !$host ) {
73    $host = 'localhost';
74    log_info("Database host not specified. Using $host.");
75}
76
77if ( !$database ) {
78    $database = 'squid_log';
79    log_info("Database name not specified. Using $database.");
80}
81
82if ( !$table ) {
83    $table = 'access_log';
84    log_info("Table parameter not specified. Using $table.");
85}
86
87if ( !$user ) {
88    $user = 'squid';
89    log_info("User parameter not specified. Using $user.");
90}
91
92if ( !$pass ) {
93    log_info('No password specified. Connecting with NO password.');
94}
95
96# fields that we should have in the table
97# Order here must match the order of fields in the Log format and parse() output array.
98my @db_fields = qw(
99    id
100    time_since_epoch
101    time_response
102    ip_client
103    squid_request_status
104    http_status_code
105    http_reply_size
106    http_method
107    http_url
108    http_username
109    squid_hier_status
110    ip_server
111    http_mime_type
112);
113
114# perform db connection
115my $dsn = "DBI:mysql:database=$database" . ($host ne "localhost" ? ":$host" : "");
116my $dbh;
117my $sth;
118eval {
119    warn "Connecting... dsn='$dsn', username='$user', password='...'";
120    $dbh = DBI->connect($dsn, $user, $pass, { AutoCommit => 1, RaiseError => 1, PrintError => 1 });
121};
122if ($EVAL_ERROR) {
123    die "Cannot connect to database: $DBI::errstr";
124}
125
126
127# a simple test to assure the specified table exists
128eval {
129    my $q = 'SELECT ' . join(',',@db_fields) . " FROM $table LIMIT 1";
130    my $sth = $dbh->prepare($q);
131    $sth->execute;
132};
133if ($EVAL_ERROR) {
134    # run a query to create the table of required syntax
135    my $create_query = 'CREATE TABLE ' . $table . ' (' .
136    " id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY," .
137    " time_since_epoch     DECIMAL(15,3)," .
138    " time_response        INTEGER," .
139    " ip_client            CHAR(15)," .
140    " ip_server            CHAR(15)," .
141    " http_status_code     VARCHAR(10)," .
142    " http_reply_size      INTEGER," .
143    " http_method          VARCHAR(20)," .
144    " http_url             TEXT," .
145    " http_username        VARCHAR(20)," .
146    " http_mime_type       VARCHAR(50)," .
147    " squid_request_status VARCHAR(50)," .
148    " squid_hier_status    VARCHAR(20)" .
149    ");" ;
150
151    $sth = $dbh->prepare($create_query);
152    $sth->execute;
153    # test again and fail hard if it is still broken.
154    eval {
155        my $q = 'SELECT ' . join(',',@db_fields) . " FROM $table LIMIT 1";
156        my $sth = $dbh->prepare($q);
157        $sth->execute;
158    };
159    if ($EVAL_ERROR) {
160        die "Error initializing database table: $EVAL_ERROR";
161    };
162}
163# test
164
165# for better performance, prepare the statement at startup
166eval {
167    my $q = "INSERT INTO $table (" . join(',',@db_fields) . ") VALUES(NULL" . ',?' x (scalar(@db_fields)-1) . ')';
168    #$sth = $dbh->prepare("INSERT INTO $table VALUES(NULL,?,?,?,?,?,?,?,?,?,?,?,?)");
169    $sth = $dbh->prepare($q);
170};
171if ($EVAL_ERROR) {
172    die "Error while preparing sql statement: $EVAL_ERROR";
173}
174
175sub parse($) {
176    my ($line) = @_;
177    my (@t) = $line =~ /^L(\d+\.\d+) *(\d+?) (.*?) (.*?)\/(\d+?) (\d+?) (.*?) (.*?) (.*?) (.*?)\/(.*?) (.*)$/;
178}
179
180# main loop
181while (my $line = <>) {
182    chomp $line;
183
184    my $cmd = substr($line, 0, 1);      # extract command byte
185
186    if ( $cmd eq 'L' ) {
187        my @log_entry = parse($line);
188        eval {                  # we catch db errors to avoid crashing squid in case something goes wrong...
189            $sth->execute(@log_entry) or die $sth->errstr
190        };
191        if ( $EVAL_ERROR ) {    # leave a trace of the error in the logs
192            warn $EVAL_ERROR . " values=(" . join(', ', @log_entry) . ')';
193        }
194    }
195}
196
197$dbh->disconnect();
198
199__END__
200
201=pod
202
203=head1 DESCRIPTION
204
205This module exploits the new logfile daemon support available in squid 2.7 and 3.2 to store access log entries in a MySQL database.
206
207=head1 CONFIGURATION
208
209=head2 Squid configuration
210
211=head3 access_log directive
212
213The path to the access log file is used to provide the database connection parameters.
214
215  access_log daemon:/mysql_host:port/database/table/username/password squid
216
217The 'daemon' prefix is mandatory and tells squid that the B<logfile_daemon> helper is to be used instead of the normal file logging.
218
219The last parameter tells squid which log format to use when writing lines to the log daemon.
220Presently B<squid> format is supported.
221
222=over 4
223
224=item mysql_host:port
225
226Host where the mysql server is running. If left empty, 'localhost' is assumed.
227
228=item database
229
230Name of the database to connect to. If left empty, 'squid_log' is assumed.
231
232=item table
233
234Name of the database table where log lines are stored. If left empty, 'access_log' is assumed.
235
236=item username
237
238Username to use when connecting to the database. If left empty, 'squid' is assumed.
239
240=item password
241
242Password to use when connecting to the database. If left empty, no password is used.
243
244=back
245
246To leave all fields to their default values, you can use a single slash:
247
248  access_log daemon:/ squid
249
250To specify only the database password, which by default is empty, you must leave unspecified all the other parameters by using null strings:
251
252  access_log daemon://///password squid
253
254=head3 logfile_daemon directive
255
256This is the current way of telling squid where the logfile daemon resides.
257
258  logfile_daemon /path/to/squid/libexec/logfile-daemon_mysql.pl
259
260The script must be copied to the location specified in the directive.
261
262=head2 Database configuration
263
264Let's call the database 'squid_log' and the log table 'access_log'. The username and password for the db connection will be both 'squid'.
265
266=head3 Database
267
268Create the database:
269
270  CREATE DATABASE squid_log;
271
272=head3 User
273
274Create the user:
275
276  GRANT INSERT,SELECT,CREATE ON squid_log.* TO 'squid'@'localhost' IDENTIFIED BY 'squid';
277  FLUSH PRIVILEGES;
278
279Note that only CREATE, INSERT and SELECT privileges are granted to the 'squid' user. This ensures that the logfile daemon script cannot change or modify the log entries.
280
281=head3 Table
282
283The Daemon will attempt to initialize this table if none exists when it starts.
284
285The table created should look like:
286
287  CREATE TABLE access_log (
288    id                   INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY,
289    time_since_epoch     DECIMAL(15,3),
290    time_response        INTEGER,
291    ip_client            CHAR(15),
292    ip_server            CHAR(15),
293    http_status_code     VARCHAR(10),
294    http_reply_size      INTEGER,
295    http_method          VARCHAR(20),
296    http_url             TEXT,
297    http_username        VARCHAR(20),
298    http_mime_type       VARCHAR(50),
299    squid_hier_status    VARCHAR(20),
300    squid_request_status VARCHAR(20)
301  );
302
303=head1 VERSION INFORMATION
304
305This document refers to C<log_db_daemon> script version 0.5.
306
307The script has been developed and tested in the following environment:
308
309=over 4
310
311=item squid-2.7 Squid-3.2
312
313=item mysql 5.0.26 and 5.1
314
315=item perl 5.8.8
316
317=item OpenSUSE 10.2
318
319=back
320
321=head1 DATA EXTRACTION
322
323=head2 Sample queries.
324
325=over 4
326
327=item Clients accessing the cache
328
329  SELECT DISTINCT ip_client FROM access_log;
330
331=item Number of request per day
332
333  SELECT
334    DATE(FROM_UNIXTIME(time_since_epoch)) AS date_day,
335    COUNT(*) AS num_of_requests
336  FROM access_log
337  GROUP BY 1
338  ORDER BY 1;
339
340=item Request status count
341
342To obtain the raw count of each request status:
343
344  SELECT squid_request_status, COUNT(*) AS n
345  FROM access_log
346  GROUP BY squid_request_status
347  ORDER BY 2 DESC;
348
349To calculate the percentage of each request status:
350
351  SELECT
352    squid_request_status,
353    (COUNT(*)/(SELECT COUNT(*) FROM access_log)*100) AS percentage
354  FROM access_log
355  GROUP BY squid_request_status
356  ORDER BY 2 DESC;
357
358To distinguish only between HITs and MISSes:
359
360  SELECT
361    'hits',
362    (SELECT COUNT(*)
363    FROM access_log
364    WHERE squid_request_status LIKE '%HIT%')
365    /
366    (SELECT COUNT(*) FROM access_log)*100
367    AS percentage
368  UNION
369  SELECT
370    'misses',
371    (SELECT COUNT(*)
372    FROM access_log
373    WHERE squid_request_status LIKE '%MISS%')
374    /
375    (SELECT COUNT(*) FROM access_log)*100
376    AS pecentage;
377
378=item Response time ranges
379
380  SELECT
381    '0..500',
382    COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
383  FROM access_log
384  WHERE time_response >= 0 AND time_response < 500
385  UNION
386  SELECT
387    '500..1000',
388    COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
389  FROM access_log
390  WHERE time_response >= 500 AND time_response < 1000
391  UNION
392  SELECT
393    '1000..2000',
394    COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
395  FROM access_log
396  WHERE time_response >= 1000 AND time_response < 2000
397  UNION
398  SELECT
399    '>= 2000',
400    COUNT(*)/(SELECT COUNT(*) FROM access_log)*100 AS percentage
401  FROM access_log
402  WHERE time_response >= 2000;
403
404=item Traffic by mime type
405
406  SELECT
407    http_mime_type,
408    SUM(http_reply_size) as total_bytes
409  FROM access_log
410  GROUP BY http_mime_type
411  ORDER BY 2 DESC;
412
413=item Traffic by client
414
415  SELECT
416    ip_client,
417    SUM(http_reply_size) AS total_bytes
418  FROM access_log
419  GROUP BY 1
420  ORDER BY 2 DESC;
421
422=back
423
424=head2 Speed issues
425
426The MyISAM storage engine is known to be faster than the InnoDB one, so although it doesn't support transactions and referential integrity, it might be more appropriate in this scenario. You might want to append "ENGINE=MYISAM" at the end of the table creation code in the above SQL script.
427
428Indexes should be created according to the queries that are more frequently run. The DDL script only creates an implicit index for the primary key column.
429
430=head1 TODO
431
432=head2 Table cleanup
433
434This script currently implements only the C<L> (i.e. "append a line to the log") command, therefore the log lines are never purged from the table. This approach has an obvious scalability problem.
435
436One solution would be to implement e.g. the "rotate log" command in a way that would calculate some summary values, put them in a "summary table" and then delete the lines used to caluclate those values.
437
438Similar cleanup code could be implemented in an external script and run periodically independently from squid log commands.
439
440=head2 Testing
441
442This script has only been tested in low-volume scenarios (single client, less than 10 req/s). Tests in high volume environments could reveal performance bottlenecks and bugs.
443
444=head1 AUTHOR
445
446Marcello Romani, marcello.romani@libero.it
447Amos Jeffries, amosjeffries@squid-cache.org
448
449=head1 COPYRIGHT AND LICENSE
450
451 * Copyright (C) 1996-2015 The Squid Software Foundation and contributors
452 *
453 * Squid software is distributed under GPLv2+ license and includes
454 * contributions from numerous individuals and organizations.
455 * Please see the COPYING and CONTRIBUTORS files for details.
456
457Copyright (C) 2008 by Marcello Romani
458
459This library is free software; you can redistribute it and/or modify
460it under the same terms as Perl itself, either Perl version 5.8.8 or,
461at your option, any later version of Perl 5 you may have available.
462
463=cut
Note: See TracBrowser for help on using the repository browser.