Uložení logu Apache do mySQL
Petr Wiedemann, 8. březen 2009
Následující program čte řádky z logu Apache a ukládá je do mySQL databáze. Řádky logu očekává ve formátu
LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\""
Data jsou vkládána uloženou procedurou sp_insert_record_to_http_accesslog do tabulky http_accesslog. Protože program využívá uložené procedury, je nutné použít mySQL od verze 5.
Začněme vytvořením tabulky pro uložení výsledků a uložené procedury.
create table http_accesslog (
ip varchar(255) NOT NULL,
client varchar(255) NOT NULL,
user varchar(255) NOT NULL,
time datetime NOT NULL default '2007-01-01 00:00:00',
request text NOT NULL,
error_code varchar(255) NOT NULL,
data_size int(11) NOT NULL default '0',
refer text NOT NULL,
browser text NOT NULL,
KEY ip (ip),
KEY error_code (error_code),
KEY time (time)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
delimiter $$
drop procedure if exists sp_insert_record_to_http_accesslog$$
create procedure sp_insert_record_to_http_accesslog (in_ip varchar(255),
in_client varchar(255),
in_user varchar(255),
in_time datetime,
in_request text,
in_error_code varchar(255),
in_data_size int(11),
in_refer text,
in_browser text)
begin
if not exists(select * from http_accesslog where ip = in_ip and
time = in_time and request = in_request) then
insert into http_accesslog (ip, client, user, time, request, error_code,
data_size, refer, browser) values (in_ip, in_client, in_user, in_time,
in_request, in_error_code, in_data_size, in_refer, in_browser);
end if;
end$$
delimiter ;
Program je napsán v C a pro přeložení je nutné mít nainstalovanou knihovnu libmysqlclient. Dalším krokem je tedy zkopírování zdrojáku a uložení do souboru httplog.c.
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <mysql/mysql.h>
#include <unistd.h>
#define LOCK_FILE "/tmp/httplog.lock"
#define LOG_ACCESS "/var/log/apache/access_log"
#define LINE_SIZE 32768
#define SQL_CALL_INSERT_RECORD "call sp_insert_record_to_http_accesslog \
('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');"
typedef struct
{
char *ip;
char *client;
char *user;
char *time;
char *request;
char *error_code;
char *data_size;
char *refer;
char *browser;
} st_accesslog_line;
st_accesslog_line accesslog_line;
MYSQL *myData = NULL;
static const char *months[] = {
"Jan", "-01-",
"Feb", "-02-",
"Mar", "-03-",
"Apr", "-04-",
"May", "-05-",
"Jun", "-06-",
"Jul", "-07-",
"Aug", "-08-",
"Sep", "-09-",
"Oct", "-10-",
"Nov", "-11-",
"Dec", "-12-",
NULL, NULL
};
int connect2db()
{
int ret = 0;
MYSQL *myRealConnectResult;
myData = mysql_init((MYSQL*) 0);
assert(myData);
myRealConnectResult = mysql_real_connect(myData,
"localhost", "user", "password",
NULL, MYSQL_PORT, NULL, 0 );
if (!myRealConnectResult)
{
printf("%s\n", mysql_error(myData));
ret = 1;
}
if (mysql_select_db(myData, "pristupy"))
{
printf("%s\n", mysql_error(myData));
mysql_close(myData);
myData = NULL;
ret = 1;
}
return ret;
}
void addescapes(char *line)
{
int srcpos = 0, dstpos = 0;
int size = strlen(line);
if (!size)
return;
char *buffer = malloc(size*2);
assert(buffer);
while (line[srcpos])
{
if ((line[srcpos] == '\\') || (line[srcpos] == '\'') || (line[srcpos] == '"'))
buffer[dstpos++] = '\\';
buffer[dstpos++] = line[srcpos++];
}
buffer[dstpos++] = 0;
if (dstpos < LINE_SIZE)
strcpy(line, buffer);
free(buffer);
}
// z textu ve formatu [05/Aug/2007:10:21:12 +0200]
// vrati 2007-08-05 10:21:12
void extractdate(char *src, char *dst)
{
int i, dstpos = 0;
char month[4];
// rok
for (i = 8; i < 12; i++)
dst[dstpos++] = src[i];
dst[dstpos++] = 0;
// mesic
for (i = 0; i < 3; i++)
month[i] = src[i+4];
month[i] = 0;
i = 0;
while (months[i] != NULL)
{
if (!strcmp(months[i], month))
{
strcat(dst, months[i+1]);
break;
}
else i += 2;
}
// den
dstpos = strlen(dst);
for (i = 1; i < 3; i++)
dst[dstpos++] = src[i];
dst[dstpos++] = ' ';
// cas
for (i = 13; i < 21; i++)
dst[dstpos++] = src[i];
dst[dstpos++] = 0;
}
// rozseka radek na casti, oddelene mezerami.
// texty v uvozovkach jsou brany jako jedna cast.
void parseline_accesslog(char *line)
{
int stop = 0, linepos = 0, part = 0, bufferpos = 0;
char inquotation = 0, firstquote = 0, inparent = 0;
char *buffer = malloc(LINE_SIZE);
assert(buffer);
while (!stop)
{
if (line[linepos] == 0)
stop = 1;
else
{
// kontrola uvozovek
if ((line[linepos] == '"') && line[linepos-1] != '\\')
{
if (inquotation == 0)
firstquote = 1;
inquotation = ~inquotation & 1;
}
// zkopirovani znaku v textu mezi uvozovkami
if (inquotation)
{
// vyhozeni prvnich uvozovek
if (firstquote)
firstquote = 0;
else
buffer[bufferpos++] = line[linepos];
}
else
{
if (((line[linepos] == ' ') || (line[linepos+1] == 0)) && !inparent)
{
buffer[bufferpos++] = 0;
// ulozeni hodnot z radku logu
switch (part)
{
case 0:
strcpy(accesslog_line.ip, buffer);
break;
case 1:
strcpy(accesslog_line.client, buffer);
break;
case 2:
strcpy(accesslog_line.user, buffer);
break;
case 3:
extractdate(buffer, accesslog_line.time);
break;
case 4:
strcpy(accesslog_line.request, buffer);
break;
case 5:
strcpy(accesslog_line.error_code, buffer);
break;
case 6:
strcpy(accesslog_line.data_size, buffer);
break;
case 7:
strcpy(accesslog_line.refer, buffer);
break;
case 8:
strcpy(accesslog_line.browser, buffer);
break;
}
bufferpos = 0;
part++;
}
else
{
if (line[linepos] != '"')
{
if ((line[linepos] == '[') && !inparent)
inparent = 1;
buffer[bufferpos++] = line[linepos];
if ((line[linepos] == ']') && inparent)
inparent = 0;
}
}
}
}
linepos++;
}
free(buffer);
}
void process_accesslog(FILE *f)
{
char *line = malloc(LINE_SIZE);
assert(line);
while(fgets(line, LINE_SIZE, f) != NULL)
{
// pro jistotu
line[LINE_SIZE-1] = 0;
int linelen = strlen(line);
if (linelen > 4096)
continue;
parseline_accesslog(line);
addescapes(accesslog_line.ip);
addescapes(accesslog_line.client);
addescapes(accesslog_line.user);
addescapes(accesslog_line.time);
addescapes(accesslog_line.request);
addescapes(accesslog_line.error_code);
addescapes(accesslog_line.data_size);
addescapes(accesslog_line.refer);
addescapes(accesslog_line.browser);
sprintf(line, SQL_CALL_INSERT_RECORD,
accesslog_line.ip,
accesslog_line.client,
accesslog_line.user,
accesslog_line.time,
accesslog_line.request,
accesslog_line.error_code,
accesslog_line.data_size,
accesslog_line.refer,
accesslog_line.browser);
if (mysql_query(myData, line))
printf("%s\n", mysql_error(myData));
}
free(line);
}
int main(int argc, char *argv[])
{
int ret = EXIT_SUCCESS;
FILE *f_access, *f_lock;
// kontrola, jestli jiz neni program spusten
f_lock = fopen(LOCK_FILE, "r");
if (f_lock)
{
// soubor se podarilo otevrit
// program se ukonci
fclose(f_lock);
printf("httplog: lockfile already exists\n");
return EXIT_FAILURE;
}
else
{
// lockfile nebyl otevren. zkusime ho vytvorit
f_lock = fopen(LOCK_FILE, "w");
if (f_lock)
{
// lockfile vytvoren
fclose(f_lock);
}
else
{
// chyba pri vytvareni lockfile
printf("httplog: can't create lockfile\n");
return EXIT_FAILURE;
}
}
// spojeni na mySQL databazi
if (connect2db())
return(EXIT_FAILURE);
// alokace promenych pro ulozeni informaci z logu
char *accessline_ip = malloc(LINE_SIZE);
assert(accessline_ip);
char *accessline_client = malloc(LINE_SIZE);
assert(accessline_client);
char *accessline_user = malloc(LINE_SIZE);
assert(accessline_user);
char *accessline_time = malloc(LINE_SIZE);
assert(accessline_time);
char *accessline_request = malloc(LINE_SIZE);
assert(accessline_request);
char *accessline_error_code = malloc(LINE_SIZE);
assert(accessline_error_code);
char *accessline_data_size = malloc(LINE_SIZE);
assert(accessline_data_size);
char *accessline_refer = malloc(LINE_SIZE);
assert(accessline_refer);
char *accessline_browser = malloc(LINE_SIZE);
assert(accessline_browser);
// prirazeni do struktury accesslog_line
accesslog_line.ip = accessline_ip;
accesslog_line.client = accessline_client;
accesslog_line.user = accessline_user;
accesslog_line.time = accessline_time;
accesslog_line.request = accessline_request;
accesslog_line.error_code = accessline_error_code;
accesslog_line.data_size = accessline_data_size;
accesslog_line.refer = accessline_refer;
accesslog_line.browser = accessline_browser;
f_access = fopen(LOG_ACCESS, "r");
if (f_access)
{
process_accesslog(f_access);
fclose(f_access);
}
else
{
printf("can't open file %s\n", LOG_ACCESS);
ret = EXIT_FAILURE;
}
if (myData)
mysql_close(myData);
free(accessline_ip);
free(accessline_client);
free(accessline_user);
free(accessline_time);
free(accessline_request);
free(accessline_error_code);
free(accessline_data_size);
free(accessline_refer);
free(accessline_browser);
// smazani lockfile
unlink(LOCK_FILE);
return ret;
}
V konzoli spustit gcc -Wall -lmysqlclient -o httplog httplog.c
a tím vytvořit spustitelný soubor. Ten potom stačí periodicky spouštět, například každou hodinu.