|
我用perl写了一个程序,主要是要把一个文本形式的日志文件导入到数据库中去。平时运行的很好,有一次日志文件特别大了(差不多300M吧),就出现问题了,程序运行很慢了,最近出错,反复检查才发现,这个程序用内存非常厉害,计算机上有2G的内存,很快就用的差不多了,而这时候,任务还没完成1/3呢。
附:
日志文件片断:
- onlinetime='2005-02-21 12:15:00' seq=1226107 isptype='myim' accountid='2902051' mmid=2902051
- onlinetime='2005-02-21 12:15:00' seq=1226108 isptype='myim' accountid='9711337' mmid=9711337
- offlinetime='2005-02-21 12:15:00' seq=1121372 mmid=7590051
- offlinetime='2005-02-21 12:15:00' seq=1226092 mmid=10500688
- onlinetime='2005-02-21 12:15:00' seq=1226109 isptype='myim' accountid='3658613' mmid=3658613
- onlinetime='2005-02-21 12:15:00' seq=1226110 isptype='myim' accountid='11360610' mmid=11360610
- offlinetime='2005-02-21 12:15:00' seq=1173632 mmid=2698105
- onlinetime='2005-02-21 12:15:00' seq=1226111 isptype='myim' accountid='11311292' mmid=11311292
- offlinetime='2005-02-21 12:15:00' seq=1226100 mmid=11310226
复制代码
我的程序(修改过多次了,日志文件不大时运行还可以。200M以上肯定有问题的)
[PHP]#!/usr/bin/perl
$bakFileName = "./UserTraceLog.bak";
@keyflags = ("seq", "isptype", "accountid", "mmid");
%data = ();
use IO::File;
use DBI;
my $dbh = DBI->connect("DBI:mysqlog", 'username', 'passwd')
or die "Can NOT connect to MySQL server!\n";
#open(bakFileHandle,$bakFileName);
#while($line = readline(bakFileHandle)) {
$filehandle = new IO::File;
$filehandle->open("<UserTraceLog.bak") or die "Could not open file";
while($line = $filehandle->getline()) {
$data{seq} = 0;
$data{mmid} = "";
$data{isptype} = "";
$data{accountid} => "";
$data{onlinetime} = "0000-00-00 00:00:00";
$data{offlinetime} = "0000-00-00 00:00:00";
$data{flag} = 0;
# 去掉时间字段中的引号
$line =~ s/'//g;
# 为后面的字段名前加一、_",方便分隔,又不会与日期时间中的空格混淆
foreach $keyflag (@keyflags) {
$line =~ s/\ $keyflag/_$keyflag/;
}
@fields = split (/_/, $line);
foreach $field (@fields) {
@keyvalues = split (/=/, $field);
$k = 0;
foreach $keyvalue (@keyvalues) {
$k++;
if ($k % 2 == 0) {
$value = $keyvalue;
} else {
$key = $keyvalue;
}
$data{$key} = $value;
undef $value, $key;
}
undef $keyvalue, @kevalues;
}
# 检测记录是否存在
$strSQLisExist = sprintf("SELECT * FROM UserTraceLog WHERE seq=%s", $data{seq});
my $sth = $dbh->prepare($strSQLisExist);
$sth->execute();
if ($sth->rows > 0) {
$isRecExist = 1;
} else {
$isRecExist = 0;
}
# 检查是否存在非法下线的记录
$strSQLerrorOffline = sprintf("SELECT * FROM UserTraceLog WHERE mmid='%s' AND isptype='%s' AND accountid='%s' AND offlinetime='0000-00 00:00:00' AND flag='0'", $data{mmid}, $data{isptype}, $data{accountid});
if ($data{offlinetime} eq "0000-00-00 00:00:00") {
# 添加新记录
$strSQL = sprintf("INSERT INTO UserTraceLog VALUES ('%d', '%s', '%s', '%s', '%s', '%s', '%d')", $data{seq}, $data{mmid}, $data{isptype}, $data{accountid}, $data{onlinetime}, $data{offlinetime}, $data{flag});
if ($isRecExist) {
printf("Insert record Error: seq=%s", $data{seq});
} else {
my $sth = $dbh->prepare($strSQLerrorOffline);
$sth->execute();
# 检查到非法下线的记录后对其做标记
if ($sth->rows > 0) {
$strErrorenFlag = sprintf("UPDATE UserTraceLog SET flag='1' WHERE flag='0' AND mmid='%s' AND accountid='%s' AND offlinetime='0000-00-00 00:00:00'", $data{mmid}, $data{accountid});
$dbh->do($strErrorenFlag);
}
$dbh->do($strSQL) or print "[INSERT]: ft! Errors can occur in here?\n";
}
} else {
# 下线记录
$strSQL = sprintf("UPDATE UserTraceLog SET offlinetime='%s' WHERE seq='%s' AND mmid='%s'", $data{offlinetime}, $data{seq}, $data{mmid});
if ($isRecExist) {
$dbh->do($strSQL) or print "[UPDATE]: ft! Errors can occur in here?\n";
} else {
printf("[UPDATE]: funny! No online record to offline. (seq=%s)\n", $data{seq});
$strSQL = sprintf("INSERT INTO UserTraceLog VALUES ('%d', '%s', '%s', '%s', '%s', '%s', '%d')", $data{seq}, $data{mmid}, $data{isptype}, $data{accountid}, $data{onlinetime}, $data{offlinetime}, $data{flag});
$dbh->do($strSQL) or print "[UPDATE]: ft! Errors can occur in here?\n";
}
}
delete ($data{seq});
delete ($data{mmid});
delete ($data{accountid});
delete ($data{onlinetime});
delete ($data{offlinetime});
delete ($data{flag});
undef $isRecExist;
undef $strSQL, $strSQLisExist, $strErrorenFlag;
}
[/PHP] |
|