#! /usr/bin/env python """ Module for Cleaning up databases, logs etc. when a crawl is finished """ # Copyright 2005, 2006 EIAO Consoritum # This program is distributed under the terms of the GNU General # Public License. # # This file is part of the European Internet Accessibility Observatory # (EIAO) # # EIAO is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # EIAO is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with EIAO; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, # MA 02110-1301 USA __author__ = "Morten Goodwin Olsen" __maintainer__ = 'Nils Ulltveit-Moe' __version__ = "$Id$" from eiaotime import * import SOAPpy import os import time import urlparse import MySQLdb import sys from sc import SystemConfiguration sco=SystemConfiguration() PIDFILE="/var/run/dbcleaner.pid" class Log: """Ensure auto-flush after each write""" def __init__(self, f): self.f = f def write(self, s): self.f.write(s) self.f.flush() def flush(self): self.f.flush() log=Log(open(sco.loglocation+'/dbcleaner.log','a')) class DBCleaner: """DBCleaner class """ def __init__(self,port=8892,host=None): """initialise the class KeyWord arguments: port -- [Optional] Port to serve at. 8892 as default. host -- [Optional] Host to server at. Localhost as default. """ self.port = port self.host = host self.connection = MySQLdb.connect(host='localhost', user='root') self.cur = self.connection.cursor() def removeoldlogs(self): """ Removes logs older than 7 days """ print 'Starting to remove old mysql databases.' mysqllocation = '/data/mysql/' for file in os.listdir(mysqllocation): if file.startswith('rdf_'): #Makes sure only RDF databases are removed... stat = '/usr/bin/stat -c"%s" '+mysqllocation + file t = os.popen(stat %'%z').read().strip() dur = time.mktime(time.localtime())-time.mktime(time.strptime(t[:t.rfind('.')],'%Y-%m-%d %H:%M:%S')) if dur>60*60*24*7: self.cleandb(file) #print 'The following database is really old:',file,dur print 'Starting to remove old logs' for file in os.listdir(sco.loglocation): stat = '/usr/bin/stat -c"%s" '+sco.loglocation+file t = os.popen(stat %'%z').read().strip() dur = time.mktime(time.localtime())-time.mktime(time.strptime(t[:t.rfind('.')],'%Y-%m-%d %H:%M:%S')) if dur>60*60*24*7: if not os.system('rm '+sco.loglocation+file): print 'Successfully removed log '+sco.loglocation+file else: print 'Error removing log '+sco.loglocation+file print 'Starting to removing old config files.' for file in os.listdir(sco.configdirectory): stat = '/usr/bin/stat -c"%s" '+sco.configdirectory+file try: t = os.popen(stat %'%z').read().strip() except TypeError: continue #The config files may have very strange filenames and could because of this cause typeerrors dur = time.mktime(time.localtime())-time.mktime(time.strptime(t[:t.rfind('.')],'%Y-%m-%d %H:%M:%S')) if dur>60*60*24*7: if not os.system('rm '+sco.configdirectory+file): print 'Successfully removed log '+sco.configdirectory+file else: print 'Error removing log '+sco.configdirectory+file def cleandb(self,db): """ Cleaning up a db, including all logs Returns None """ #Just to make sure everything is stopped. The next command is strictly not needed it the ETL server is used correctly. However, it is no harm in calling it too much. print 'Starting to remove DB and logs for crawl %(db)s'%locals() #Dropping actual database try: self.cur.execute('DROP DATABASE %(db)s;'%locals()) except MySQLdb.OperationalError: print 'Error removing databse %(db)s. Please investigate.'%locals() else: print 'Successfully removed database %(db)s is dropped'%locals() loglocation = sco.loglocation #Removing logs (This is needed since the logs sometimes takes a lot of space for component,output in [('crawler','stdout.log'),('crawler','stderr.log'),('sampler','stdout.log'),('sampler','stderr.log'),('etl','stdout.log'),('etl','stderr.log')]: if not os.system('rm %(loglocation)s%(component)s_%(db)s_%(output)s'%locals()): print 'Successfully removed log %(loglocation)s%(component)s_%(db)s_%(output)s'%locals() else: print 'Error removing log %(loglocation)s%(component)s_%(db)s_%(output)s'%locals() print 'All logs for crawl towards %(db)s removed'%locals() def printHelp(): """For printing help This is needed as input to any user starting the DB Cleaner server. Exits all active modules. Returns None """ print 'Usage:\npython dbcleaner [OPTIONS]' print 'Options:' print '-p N - port to use. 8892 as default.' print '--help - This help file' sys.exit(1) def main(port): sc = SystemConfiguration() host,port = urlparse.urlsplit(sc.dbcleaner)[1].split(':') s = DBCleaner(host=host,port=int(port)) tries = 1 error = True print 'Starting DBCleaner, please wait...' while tries<5 and error: #If address is taken, try several times. The reason is most often that restart has happened... try: server = SOAPpy.SOAPServer((host, int(port))) except socket.error: time.sleep(random.randint(1,30)*tries) tries += 1 else: error = False if tries==5: #If max tries, this will throw an exeption server = SOAPpy.SOAPServer((host, int(port))) print 'DBCleaner started.' server.registerFunction(s.cleandb) server.registerFunction(s.removeoldlogs) try: server.serve_forever() except KeyboardInterrupt: print 'DB Cleaner stopped' def daemonize(): # Disconnect from controlling TTY as a service try: pid = os.fork() if pid > 0: sys.exit(0) except OSError, e: print >>sys.stderr, "fork #1 failed: %d (%s)" % (e.errno, e.strerror) sys.exit(1) # Do not prevent unmounting... os.chdir("/") os.setsid() os.umask(0) # do second fork try: pid = os.fork() if pid > 0: # exit from second parent, print eventual PID before #print "Daemon PID %d" % pid open(PIDFILE,'w').write("%d"%pid) sys.exit(0) except OSError, e: print >>sys.stderr, "fork #2 failed: %d (%s)" % (e.errno, e.strerror) sys.exit(1) # Redirect stdout/stderr to log file sys.stdout=sys.stderr=log # UID and GID Nobody os.setegid(99) os.seteuid(99) if __name__ == '__main__': if '--help' in sys.argv: printHelp() try: if '-p' in sys.argv: port = sys.argv[sys.argv.index('-p')+1] else: port = '8892' if '-t' in sys.argv: numthreads = sys.argv[sys.argv.index('-t')+1] else: numthreads = 1 if '-d' in sys.argv: daemonise=True else: daemonise=False except IndexError: printHelp() if daemonise: daemonize() main(port)