Uložení logu Apache do mySQL

Petr Wiedemann, 8. březen 2009

Následující program čte řádky z logu Apache a ukládá je do mySQL databáze. Řádky logu očekává ve formátu LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\""

Data jsou vkládána uloženou procedurou sp_insert_record_to_http_accesslog do tabulky http_accesslog. Protože program využívá uložené procedury, je nutné použít mySQL od verze 5.

Začněme vytvořením tabulky pro uložení výsledků a uložené procedury.

create table http_accesslog (
  ip varchar(255) NOT NULL,
  client varchar(255) NOT NULL,
  user varchar(255) NOT NULL,
  time datetime NOT NULL default '2007-01-01 00:00:00',
  request text NOT NULL,
  error_code varchar(255) NOT NULL,
  data_size int(11) NOT NULL default '0',
  refer text NOT NULL,
  browser text NOT NULL,
  KEY ip (ip),
  KEY error_code (error_code),
  KEY time (time)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

delimiter $$
drop procedure if exists sp_insert_record_to_http_accesslog$$
create procedure sp_insert_record_to_http_accesslog (in_ip varchar(255),
  in_client varchar(255),
  in_user varchar(255),
  in_time datetime,
  in_request text,
  in_error_code varchar(255),
  in_data_size int(11),
  in_refer text,
  in_browser text)
begin
  if not exists(select * from http_accesslog where ip = in_ip and
  time = in_time and request = in_request) then
  insert into http_accesslog (ip, client, user, time, request, error_code,
  data_size, refer, browser) values (in_ip, in_client, in_user, in_time,
  in_request, in_error_code, in_data_size, in_refer, in_browser);
end if;
end$$
delimiter ;

Program je napsán v C a pro přeložení je nutné mít nainstalovanou knihovnu libmysqlclient. Dalším krokem je tedy zkopírování zdrojáku a uložení do souboru httplog.c.

#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <mysql/mysql.h>
#include <unistd.h>

#define LOCK_FILE "/tmp/httplog.lock"
#define LOG_ACCESS "/var/log/apache/access_log"
#define LINE_SIZE  32768

#define SQL_CALL_INSERT_RECORD "call sp_insert_record_to_http_accesslog \
  ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s');"

typedef struct
{
  char *ip;
  char *client;
  char *user;
  char *time;
  char *request;
  char *error_code;
  char *data_size;
  char *refer;
  char *browser;
} st_accesslog_line;

st_accesslog_line accesslog_line;
MYSQL *myData = NULL;
static const char *months[] = {
  "Jan", "-01-",
  "Feb", "-02-",
  "Mar", "-03-",
  "Apr", "-04-",
  "May", "-05-",
  "Jun", "-06-",
  "Jul", "-07-",
  "Aug", "-08-",
  "Sep", "-09-",
  "Oct", "-10-",
  "Nov", "-11-",
  "Dec", "-12-",
  NULL, NULL
};

int connect2db()
{
  int ret = 0;
  MYSQL *myRealConnectResult;
  myData = mysql_init((MYSQL*) 0);
  assert(myData);

  myRealConnectResult =  mysql_real_connect(myData,
                                            "localhost", "user", "password",
                                            NULL, MYSQL_PORT, NULL, 0 );
  if (!myRealConnectResult)
  {
    printf("%s\n", mysql_error(myData));
    ret = 1;
  }

  if (mysql_select_db(myData, "pristupy"))
  {
    printf("%s\n", mysql_error(myData));
    mysql_close(myData);
    myData = NULL;
    ret = 1;
  }

  return ret;
}

void addescapes(char *line)
{
  int srcpos = 0, dstpos = 0;
  int size = strlen(line);
  if (!size)
    return;

  char *buffer = malloc(size*2);
  assert(buffer);

  while (line[srcpos])
  {
    if ((line[srcpos] == '\\') || (line[srcpos] == '\'') || (line[srcpos] == '"'))
      buffer[dstpos++] = '\\';
    buffer[dstpos++] = line[srcpos++];
  }
  buffer[dstpos++] = 0;

  if (dstpos < LINE_SIZE)
    strcpy(line, buffer);

  free(buffer);
}

// z textu ve formatu [05/Aug/2007:10:21:12 +0200]
// vrati 2007-08-05 10:21:12
void extractdate(char *src, char *dst)
{
  int i, dstpos = 0;
  char month[4];

  // rok
  for (i = 8; i < 12; i++)
    dst[dstpos++] = src[i];
  dst[dstpos++] = 0;

  // mesic
  for (i = 0; i < 3; i++)
    month[i] = src[i+4];
  month[i] = 0;

  i = 0;
  while (months[i] != NULL)
  {
    if (!strcmp(months[i], month))
    {
      strcat(dst, months[i+1]);
      break;
    }
    else i += 2;
  }

  // den
  dstpos = strlen(dst);
  for (i = 1; i < 3; i++)
    dst[dstpos++] = src[i];
  dst[dstpos++] = ' ';

  // cas
  for (i = 13; i < 21; i++)
    dst[dstpos++] = src[i];
  dst[dstpos++] = 0;
}

// rozseka radek na casti, oddelene mezerami.
// texty v uvozovkach jsou brany jako jedna cast.
void parseline_accesslog(char *line)
{
  int stop = 0, linepos = 0, part = 0, bufferpos = 0;
  char inquotation = 0, firstquote = 0, inparent = 0;
  char *buffer = malloc(LINE_SIZE);
  assert(buffer);

  while (!stop)
  {
    if (line[linepos] == 0)
      stop = 1;
    else
    {
      // kontrola uvozovek
      if ((line[linepos] == '"') && line[linepos-1] != '\\')
      {
        if (inquotation == 0)
          firstquote = 1;
        inquotation = ~inquotation & 1;
      }

      // zkopirovani znaku v textu mezi uvozovkami
      if (inquotation)
      {
        // vyhozeni prvnich uvozovek
        if (firstquote)
          firstquote = 0;
        else
          buffer[bufferpos++] = line[linepos];
      }
      else
      {
        if (((line[linepos] == ' ') || (line[linepos+1] == 0)) && !inparent)
        {
          buffer[bufferpos++] = 0;

          // ulozeni hodnot z radku logu
          switch (part)
          {
            case 0:
              strcpy(accesslog_line.ip, buffer);
              break;
            case 1:
              strcpy(accesslog_line.client, buffer);
              break;
            case 2:
              strcpy(accesslog_line.user, buffer);
              break;
            case 3:
              extractdate(buffer, accesslog_line.time);
              break;
            case 4:
              strcpy(accesslog_line.request, buffer);
              break;
            case 5:
              strcpy(accesslog_line.error_code, buffer);
              break;
            case 6:
              strcpy(accesslog_line.data_size, buffer);
              break;
            case 7:
              strcpy(accesslog_line.refer, buffer);
              break;
            case 8:
              strcpy(accesslog_line.browser, buffer);
              break;
          }
          bufferpos = 0;
          part++;
        }
        else
        {
          if (line[linepos] != '"')
          {
            if ((line[linepos] == '[') && !inparent)
              inparent = 1;
            buffer[bufferpos++] = line[linepos];
            if ((line[linepos] == ']') && inparent)
              inparent = 0;
          }
        }
      }
    }
    linepos++;
  }

  free(buffer);
}

void process_accesslog(FILE *f)
{
  char *line = malloc(LINE_SIZE);
  assert(line);

  while(fgets(line, LINE_SIZE, f) != NULL)
  {
    // pro jistotu
    line[LINE_SIZE-1] = 0;

    int linelen = strlen(line);
    if (linelen > 4096)
      continue;

    parseline_accesslog(line);

    addescapes(accesslog_line.ip);
    addescapes(accesslog_line.client);
    addescapes(accesslog_line.user);
    addescapes(accesslog_line.time);
    addescapes(accesslog_line.request);
    addescapes(accesslog_line.error_code);
    addescapes(accesslog_line.data_size);
    addescapes(accesslog_line.refer);
    addescapes(accesslog_line.browser);

    sprintf(line, SQL_CALL_INSERT_RECORD,
            accesslog_line.ip,
            accesslog_line.client,
            accesslog_line.user,
            accesslog_line.time,
            accesslog_line.request,
            accesslog_line.error_code,
            accesslog_line.data_size,
            accesslog_line.refer,
            accesslog_line.browser);

    if (mysql_query(myData, line))
      printf("%s\n", mysql_error(myData));
  }

  free(line);
}

int main(int argc, char *argv[])
{
  int ret = EXIT_SUCCESS;
  FILE *f_access, *f_lock;

  // kontrola, jestli jiz neni program spusten
  f_lock = fopen(LOCK_FILE, "r");
  if (f_lock)
  {
    // soubor se podarilo otevrit
    // program se ukonci
    fclose(f_lock);
    printf("httplog: lockfile already exists\n");
    return EXIT_FAILURE;
  }
  else
  {
    // lockfile nebyl otevren. zkusime ho vytvorit
    f_lock = fopen(LOCK_FILE, "w");
    if (f_lock)
    {
      // lockfile vytvoren
      fclose(f_lock);
    }
    else
    {
      // chyba pri vytvareni lockfile
      printf("httplog: can't create lockfile\n");
      return EXIT_FAILURE;
    }
  }

  // spojeni na mySQL databazi
  if (connect2db())
    return(EXIT_FAILURE);

  // alokace promenych pro ulozeni informaci z logu
  char *accessline_ip = malloc(LINE_SIZE);
  assert(accessline_ip);
  char *accessline_client = malloc(LINE_SIZE);
  assert(accessline_client);
  char *accessline_user = malloc(LINE_SIZE);
  assert(accessline_user);
  char *accessline_time = malloc(LINE_SIZE);
  assert(accessline_time);
  char *accessline_request = malloc(LINE_SIZE);
  assert(accessline_request);
  char *accessline_error_code = malloc(LINE_SIZE);
  assert(accessline_error_code);
  char *accessline_data_size = malloc(LINE_SIZE);
  assert(accessline_data_size);
  char *accessline_refer = malloc(LINE_SIZE);
  assert(accessline_refer);
  char *accessline_browser = malloc(LINE_SIZE);
  assert(accessline_browser);

  // prirazeni do struktury accesslog_line
  accesslog_line.ip = accessline_ip;
  accesslog_line.client = accessline_client;
  accesslog_line.user = accessline_user;
  accesslog_line.time = accessline_time;
  accesslog_line.request = accessline_request;
  accesslog_line.error_code = accessline_error_code;
  accesslog_line.data_size = accessline_data_size;
  accesslog_line.refer = accessline_refer;
  accesslog_line.browser = accessline_browser;

  f_access = fopen(LOG_ACCESS, "r");
  if (f_access)
  {
    process_accesslog(f_access);
    fclose(f_access);
  }
  else
  {
    printf("can't open file %s\n", LOG_ACCESS);
    ret = EXIT_FAILURE;
  }

  if (myData)
    mysql_close(myData);

  free(accessline_ip);
  free(accessline_client);
  free(accessline_user);
  free(accessline_time);
  free(accessline_request);
  free(accessline_error_code);
  free(accessline_data_size);
  free(accessline_refer);
  free(accessline_browser);

  // smazani lockfile
  unlink(LOCK_FILE);

  return ret;
}

V konzoli spustit gcc -Wall -lmysqlclient -o httplog httplog.c a tím vytvořit spustitelný soubor. Ten potom stačí periodicky spouštět, například každou hodinu.