# -*- coding: UTF-8 -*- # Copyright 2005, 2006 EIAO Consoritum # This program is distributed under the terms of the GNU General # Public License. # # This file is part of the European Internet Accessibility Observatory # (EIAO) # # EIAO is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # EIAO is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with EIAO; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, # MA 02110-1301 USA __author__ = "Morten Goodwin Olsen, Jens Frøkjær" __maintainer__ = "Nils Ulltveit-Moe" __version__ = "$Id: DWReader2.py 2662 2007-18-01 09:21:19Z goodwin $" import psycopg import sys import pickle import urllib import threading from threading import Semaphore from DWError import * import sc2 """Module for reading from the Data Warehouse. This module contains a wrapper of the DW to be part of the User Interface Model""" def region_compare(x, y): return len(x)-len(y) def nfloat(input): if input is None: return input else: return float(input) def nint(input): if input is None: return input else: return int(input) def cap(input): ret = '' for n in input.split('-'): for m in n.split(' '): if m.startswith('ö'): m = m.replace('ö','Ö') if not m.startswith('Ü'): m = m.replace('Ü','ü') ret += m.capitalize() + ' ' ret = ret.strip(' ') ret +='-' ret = ret.strip('-') return ret def localinfo(input): """Returns 'presentable' locale variables which can be written to a log Keywords arguments: input -- Dictionary of variable, typically locals() returns List of tuples to be presented. """ return [l for l in input.items() if 'self' not in l[0] and not l[0].startswith('_')] class CacheWriter(threading.Thread): """Thread for writing cache""" def __init__(self): threading.Thread.__init__(self) self.cachetowrite = {} self.cachesemaphore=Semaphore(1) self.shouldrun = True def addcache(self,nameofdumpfile,dictionary): self.cachesemaphore.acquire() self.cachetowrite[nameofdumpfile] = dictionary self.cachesemaphore.release() def run(self): import time import random while(self.shouldrun): time.sleep(random.uniform(0,10)) self.writecache() def stop(self): self.shouldrun = False def writecache(self): self.cachesemaphore.acquire() for nameofdumpfile,dictionary in self.cachetowrite.items(): error = False try: f = open('/tmp/'+nameofdumpfile+'.dmp','w') pickle.dump(dictionary,f) f.close() except: error = True if not error: self.cachetowrite.pop(nameofdumpfile) self.cachesemaphore.release() class DWReader2: """ Class for reading the second version of the Data Warehouse data. This class is basically a wrapper for the CWAM functions, in order to have a model for the Graphical User Interface """ def __init__(self, username=None, password=None,database=None, searchpath=None,cached=False,host=None): """Creates an connection to the data warehouse and generated statical data available as lists / dictionaries as they are needed from the controller. Keyword arguments: username -- [Optional] Data Warehouse username. Retrieved from EIAOConfiguration if left empty. password -- [Optional] Data Warehouse password. Retrieved from EIAOConfiguration if left empty. database -- [Optional] Data Warehouse database. Retrieved from EIAOConfiguration if left empty. searchpath -- [Optional] To set a DW searchpath other than default. cached -- [Optional] If the results should be cached. This is turned off by default. Note that by turning this off, latest results cannot be guarantied. However, the results should be retrieved much quicker. It does not make sence to let this variable be True once the materialised views are implemented. host -- [Optional] Host to connect to. Localhost as default. """ #Import eiaoconfiguration #import pdb #pdb.set_trace() #sys.path.append('/etc/') #self.sc = sc.SystemConfiguration() self.sc = sc2.SystemConfiguration() #import eiaoconfiguration #dwconf = eiaoconfiguration.dwconf() #self.t = dwconf.writeTimeLog #self.t('__init__') #if not username: # username=dwconf.username #if not password: # password=dwconf.password #if not database: # database=dwconf.database #if not host: # host=dwconf.host username = self.sc.dwuser password = self.sc.dwpassword database = self.sc.dwdatabase host = self.sc.dwhost try: if not host: self.con = psycopg.connect(user=username, database=database, password=password) else: self.con = psycopg.connect(user=username, database=database, password=password,host=host) except psycopg.OperationalError: #Connection cannot be made. Raises an DWReader2 specific exception. raise DWConnectionError(username,database,host) self.con.set_isolation_level(0); cur = self.con.cursor() if searchpath: cur.execute('set search_path to %(searchpath)s', locals()) if cached: self.cacheddomains = self.__filecache('/tmp/cacheddomains.dmp') self.cachednuts = self.__filecache('/tmp/cachednuts.dmp') self.cacheddomain = self.__filecache('/tmp/cachedomain.dmp') self.cachedcategory = self.__filecache('/tmp/cachedcategory.dmp') self.cachedaccessibilitygroups = self.__filecache('/tmp/cachedaccessibilitygroups.dmp') self.cacheuwemids = self.__filecache('/tmp/cacheuwemids.dmp') self.cachesectors = self.__filecache('/tmp/cachesectors.dmp') self.cachecountries = self.__filecache('/tmp/cachecountries.dmp') self.cachedeu = self.__filecache('/tmp/cachedeu.dmp') self.testrunids = self.__filecache('/tmp/testrunids.dmp') self.cachemonthandyear = self.__filecache('/tmp/cachedmonthandyear.dmp') self.stddev = self.__filecache('/tmp/cachedstddev.dmp') self.stddevlang = self.__filecache('/tmp/cachedstdlang.dmp') self.pagelist = self.__filecache('/tmp/cachedpagelist.dmp') self.mainsiteinfo = self.__filecache('/tmp/cachedmainsiteinfo.dmp') self.cachedlanguages = self.__filecache('/tmp/cachedlanguages.dmp') self.indicatorsfortest = self.__filecache('/tmp/cachedindicatorsfortest.dmp') self.indicatorsforcp = self.__filecache('/tmp/cachedindicatorsforcp.dmp') self.cachecps = self.__filecache('/tmp/cachecpids.dmp') self.cachedmimetypes = self.__filecache('/tmp/cachedmimetypes.dmp') self.sitecontent = self.__filecache('/tmp/cachedsitecontent.dmp') self.languagestatistics = self.__filecache('/tmp/cachedlanguagestatisitics.dmp') self.cachednace = self.__filecache('/tmp/cachednace.dmp') self.sitestddev = self.__filecache('/tmp/cachedstddev.dmp') self.sitelist = self.__filecache('/tmp/cachedsitelist.dmp') self.cachednuts = self.__filecache('/tmp/cachednuts.dmp') self.cachedparentnuts = self.__filecache('/tmp/cachedparentnuts.dmp') self.cachedpagescore = self.__filecache('/tmp/cachedpagescore.dmp') self.cachedscenarioids = self.__filecache('/tmp/cachedscenariourl.dmp') self.cachedinclusiontypes = self.__filecache('/tmp/cachedinclusiontypes.dmp') self.cachedtestrunids = self.__filecache('/tmp/cachedtestrunids.dmp') self.cachednumbersites = self.__filecache('/tmp/cachednumbersites.dmp') else: self.cachedscenarioids = None self.cachedpagescore = None self.cacheddomains = None self.cachednuts = None self.cacheddomain = None self.cachedcategory = None self.cachedaccessibilitygroups= None self.cacheuwemids = None self.cachesectors = None self.cachecountries = None self.cachedeu = None self.testrunids = None self.cachemonthandyear = None self.stddevlang = None self.pagelist = None self.mainsiteinfo = None self.cachedlanguages = None self.stddev = None self.indicatorsfortest = None self.indicatorsforcp = None self.cachecps = None self.languagestatistics = None self.cachedmimetypes = None self.sitecontent = None self.languagestatistics = None self.cachednace = None self.sitestddev = None self.sitelist = None self.cachednuts = None self.cachedparentnuts = None self.cachedscenariodids = None self.cachedinclusiontypes = None self.cachedtestrunids = None self.cachednumbersites = None self.cw = CacheWriter() self.cw.start() self.supercachecache = {} self.allmimetypes = self.mimetypes() self.allinclusions = self.inclusions() self.allcountries = self.countries() self.alldomains = self.domains() self.alluwemids = self.uwemIds() self.testrunids = self.currentTestRun() self.latesttestrun,self.secondlatesttestrun = self.testrunids[0],self.testrunids[1] self.monthandyear= self.currentMonth() self.latestmonthandyear = self.currentMonth(self.secondlatesttestrun) self.currentyear,self.currentmonth = self.monthandyear[0],self.monthandyear[1] self.secondlatestyear,self.secondlatestmonth = self.latestmonthandyear[0],self.latestmonthandyear[1] self.alllanguages = self.languages() self.allnace = self.nace() self.allnuts = self.nuts() if not self.currentyear: raise DWEmptyInformationError('year',[self.currentyear]) if not self.currentmonth: raise DWEmptyInformationError('month',[self.currentmonth]) if not self.allcountries: raise DWEmptyInformationError('Countries',self.allcountries) if not self.alldomains: raise DWEmptyInformationError('Domains',self.alldomains) if not self.alluwemids: raise DWEmptyInformationError('UWEM IDs',self.alluwemids) if not self.allnuts: raise DWEmptyInformationError('NUTS',self.allnuts) if not self.allnace: raise DWEmptyInformationError('NACE',self.allnace) if not self.allmimetypes: raise DWEmptyInformationError('Mimetypes',self.allmimetypes) if not self.allinclusions: raise DWEmptyInformationError('Inclusions',self.allinclusions) #self.t('__init__',0) def stop(self): self.cw.stop() def supercache(self,sql,input): sql = sql % input if sql in self.supercachecache.keys(): return self.supercachecache[sql] else: if not self.supercachecache: try: f = open('/tmp/supercachecache.dmp','r') self.supercachecache = pickle.load(f) f.close() except: pass cur = self.con.cursor() cur.execute(sql) result = cur.fetchall() self.supercachecache[sql] = result f = open('/tmp/supercachecache.dmp','w') pickle.dump(self.supercachecache,f) f.close() return self.supercachecache[sql] def __filecache(self,nameofdumpfile): """Gets file from written cache Keyword arguments: nameofdumpfile -- The filename of the dump file Returns cached dictionary or {} of not exists """ #if 'monthandyear' in nameofdumpfile: # import pdb # pdb.set_trace() try: return pickle.load(open(nameofdumpfile)) except: return {} def __cached(self, key,dictionary,result=None,nameofdumpfile=None,read=True): """Internal function for retrieved cached result from a given dictionary if such a result exists. A FIFO cache of the most common lookups makes sence because holding the data in Memory with Python/Zope is a lot more efficient than retrieving the the DW. This function _may_ be redundant when the materialised views are implemented. However, without the materialised views it makes the lookups a lot more efficient. Keyword arguments: key -- Key to perform lookup on dictionary -- Dictionary to lookup results result -- Result to be set if any nameofdumpfile -- The name of the dump file Returns looked up results or False """ if not nameofdumpfile: raise DWNoCacheProvided(key,dictionary,result,nameofdumpfile) #This cache is not needed if the materialized views are fast enough. However, if there exists performance issues with these, this internal cache will remove most of these issues. if dictionary is None: return False if not dictionary: if nameofdumpfile: try: f = open('/tmp/'+nameofdumpfile+'.dmp','r') dictionary = pickle.load(f) exec('self.'+nameofdumpfile + ' = pickle.load(f)') f.close() except: pass #if result is not None: if read==False: dictionary[key] = result if nameofdumpfile: self.cw.addcache(nameofdumpfile,dictionary) #f = open('/tmp/'+nameofdumpfile+'.dmp','w') #pickle.dump(dictionary,f) #f.close() return result #if len(dictionary)>1000000: # #To prevent too large dictionaries and memory overflow. # dictionary.popitem() return dictionary.get(key,False) def nuts(self): """Retrieves all nuts codes Returns all nuts codes as a list of tuples [(nuts1code,nuts2code,nuts3code,nuts1name,nuts2name,nuts3name),...] Examples: >>> nace = dwr.nuts() >>> len(nuts)>0 True >>> type(nuts)==type([]) True >>> dwr.allnuts==nuts True >>> type(nuts[0][0])==type('') True >>> type(nuts[0][1])==type('') True """ #self.t('nuts') result = self.__cached((None),self.cachednuts,nameofdumpfile='cachednuts') if result is not False: #self.t('nuts',0) return result cur = self.con.cursor() cur.execute('select distinct nuts1code,nuts2code,nuts3code,nuts1name,nuts2name,nuts3name from datastaging.nutslevel2,datastaging.nutslevel1,datastaging.nutslevel3 where datastaging.nutslevel2.nutslevel1id= datastaging.nutslevel1.nutslevel1id and datastaging.nutslevel3.nutslevel2id=datastaging.nutslevel2.nutslevel2id;') result = [(d[0],d[1],d[2],cap(d[3]),cap(d[4]),cap(d[5])) for d in cur.fetchall()] self.__cached((None),self.cachednuts,result,nameofdumpfile='cachednuts',read=False) #self.t('nuts',0,1) return result def nace(self): """Retrieves all nace codes Returns all nace codes as a list of tuples [(nacecode,nacedescription),(nacecode,nacedescription),...] Examples: >>> nace = dwr.nace() >>> len(nace)>0 True >>> type(nace)==type([]) True >>> dwr.allnace==nace True >>> type(nace[0][0])==type('') True >>> type(nace[0][1])==type('') True """ #self.t('nace') result = self.__cached((None),self.cachednace,nameofdumpfile='cachednace') if result is not False: #self.t('nace',0) return result cur = self.con.cursor() cur.execute('select distinct nacecode,nacedescription from datastaging.nace;') result = [(d[0],d[1]) for d in cur.fetchall()] self.__cached((None),self.cachednace,result,nameofdumpfile='cachednace',read=False) #self.t('nace',0,1) return result def getURLsFromScenarioID(self,scenarioid,testrun=None): """Gets the URLs from a given scenarioid Keyword agruments: scenariodif -- ID of the Scenario testrun -- Testrunid. Current testrun if left empty """ if not scenarioid: return [] linfo = localinfo(locals()) #self.t('getURLsFromScenarioID') if not testrun: testrun = self.latesttestrun result = self.__cached((scenarioid,testrun),self.cachedscenarioids,nameofdumpfile='cachedscenariourl') if result is not False: #self.t('getURLsFromScenarioID',0,0,linfo) return result cur = self.con.cursor() cur.execute('select * from DURLListForScenario(%(scenarioid)s, %(testrun)s)%(testrun)s);',locals()) result = [urllib.unquote(d[0]) for d in cur.fetchall()] self.__cached((scenarioid,testrun),self.cachedscenarioids,result,nameofdumpfile='cachedscenariourl',read=False) #self.t('getURLsFromScenarioID',0,1,linfo) return result def getTestRun(self,testrun=None): """Gets the latest testrun if testrun is empty Keyword arguments: testrun -- [Optional] Testrun to us. Uses latest if left empty Returns testrun from input or latest testrun """ if testrun: return testrun else: return self.latesttestrun def getSiteStddevByGroup(self,nace,nuts,testrun=None): """Retrieves the standard deviation from a given group Keyword arguments: nace -- Nace category nuts -- Nuts category testrun -- [Optional] Testrun to get. Currenttestrun if None Returns standard deviation as float Examples: >>> nace = dwr.allnace[0][0] >>> nuts = dwr.allnuts[0][0] >>> std = dwr.getSiteStddevByGroup(nace,nuts,dwr.currenttestrun) >>> type(std) == type(0.0) True """ #self.t('getSiteStddevByGroup') linfo = localinfo(locals()) testrun = self.getTestRun(testrun) result = self.__cached((nace,nuts,testrun),self.sitestddev,nameofdumpfile='cachedstddev') if result is not False: #self.t('getSiteStddevByGroup',0,0,linfo) return result cur = self.con.cursor() cur.execute('select DgroupStddev(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),0::smallint,%(testrun)s);',locals()) results = nfloat(cur.fetchall()[0][0]) self.__cached((nace,nuts,testrun),self.sitestddev,results,nameofdumpfile='cachedstddev',read=False) #self.t('getSiteStddevByGroup',0,1,linfo) return results def getParentRegions(self,nuts): """Retrieves parent-regions from a given region nuts -- Nuts region e.g. DE142 Examples: >>> getParentRegions('DE142') ['All', 'EU', 'DE', 'DE1' og 'DE14','DE142'] """ #self.t('getParentRegions') linfo = localinfo(locals()) result = self.__cached((nuts),self.cachedparentnuts,nameofdumpfile='cachedparentnuts') if result is not False: #self.t('getParentRegions',0,0,linfo) return result if nuts in ['E','F','C', 'U']: results = ['All']+[nuts] elif nuts in ['All']: results = ['All'] else: cur = self.con.cursor() if nuts.startswith('NO') and len(nuts)==4: realnuts = nuts nuts = 'NO0' results = [nuts[:i] for i in range(2,len(nuts))] + [realnuts] nuts = realnuts elif nuts.startswith('CH') and len(nuts)==4: realnuts = nuts nuts = 'CH0' results = [nuts[:i] for i in range(2,len(nuts))] + [realnuts] nuts = realnuts elif nuts.startswith('RO') and len(nuts)==4: realnuts = nuts nuts = 'RO0' results = [nuts[:i] for i in range(2,len(nuts))] + [realnuts] nuts = realnuts else: results = [nuts[:i] for i in range(2,len(nuts))] + [nuts] country = results[0] cur.execute("select distinct eumember from datastaging.country where countryabbreviation=%(country)s;",locals()) eumember = cur.fetchall()[0][0] if eumember=='EU member': eumember = 'EU' elif eumember=='Outside EU': eumember = 'FT' elif eumember=='Applicant country': eumember = 'CC' results = list(set(results)) results.sort(region_compare) results = ['All',eumember] + results self.__cached((nuts),self.cachedparentnuts,results,nameofdumpfile='cachedparentnuts',read=False) #self.t('getParentRegions',0,1,linfo) return results def getSubRegions(self,nuts): """Retrieves sub-regions from a given region Keyword arguments: nuts -- Nuts region e.g. AT1 Examples: >>> getSubRegions('AT1') ['AT11','AT12','AT13'] """ #self.t('getSubRegions') linfo = localinfo(locals()) result = self.__cached((nuts),self.cachednuts,nameofdumpfile='cachednuts') if result is not False: #self.t('getSubRegions',0,0,linfo) return result cur = self.con.cursor() if nuts=='All': results = ['E','C','F'] elif nuts=='E': cur.execute("select distinct countryabbreviation from datastaging.country where eumember='EU member';") results = list(set([a[0] for a in cur.fetchall()])) elif nuts=='F': cur.execute("select distinct countryabbreviation from datastaging.country where eumember='Outside EU';") results = list(set([a[0] for a in cur.fetchall()])) elif nuts=='C': cur.execute("select distinct countryabbreviation from datastaging.country where eumember='Applicant country';") results = list(set([a[0] for a in cur.fetchall()])) else: if len(nuts)==2: if nuts in ['NO','CH','RO']: fuzzynuts = ''.join((nuts,'__')) else: fuzzynuts = ''.join((nuts,'_')) cur.execute("select distinct nuts1code,nuts2code,nuts3code from datastaging.nutslevel2,datastaging.nutslevel1,datastaging.nutslevel3 where datastaging.nutslevel2.nutslevel1id= datastaging.nutslevel1.nutslevel1id and datastaging.nutslevel3.nutslevel2id=datastaging.nutslevel2.nutslevel2id and (nuts1code like %(fuzzynuts)s or nuts2code like %(fuzzynuts)s);",locals()) else: cur.execute('select distinct nuts1code,nuts2code,nuts3code from datastaging.nutslevel2,datastaging.nutslevel1,datastaging.nutslevel3 where datastaging.nutslevel2.nutslevel1id= datastaging.nutslevel1.nutslevel1id and datastaging.nutslevel3.nutslevel2id=datastaging.nutslevel2.nutslevel2id and (nuts1code=%(nuts)s or nuts2code=%(nuts)s);',locals()) if nuts in ['NO','CH','RO']: results = list(set([a[1] for a in cur.fetchall()])) elif len(nuts)==3: results = list(set([a[1] for a in cur.fetchall() if a[1]])) elif len(nuts)==4: results = list(set([a[2] for a in cur.fetchall() if a[2]])) else: results = list(set([a[0] for a in cur.fetchall()])) self.__cached((nuts),self.cachednuts,results,nameofdumpfile='cachednuts',read=False) #self.t('getSubRegions',0,1,linfo) return results def getGroupContent(self,nace,nuts,testrun,mime,inclusion): """Retrieves group content from a given group Keyword arguments: nace -- Nace category nuts -- Nuts category testrun -- Testrun to get mime -- Mimetype to get inclusion -- Inclusion to get Returns groupcontent as float Examples: >>> onegroup = dwr.allgroups[0][0] >>> mimetype = dwr.allmimetypes[0] >>> inclusion = dwr.allinclusions[0] >>> onenace = dwr.allnace[0] >>> onenuts = dwr.allnuts[0] >>> content = dwr.getGroupContent(onenace,onenuts,dwr.lastesttestrun,onmime,) >>> type(content)==type(0.0) True """ #self.t('getGroupContent') if nace in ['Unknown',None]: nace = 'All' if nuts in ['Unknown',None]: nuts = 'All' if nuts=='E': nuts='EU' elif nuts=='F': nuts='NOTEU' elif nuts=='C': nuts='AC' elif nuts == 'U': nuts='UX' result = self.__cached((nace,nuts,testrun,mime,inclusion),self.sitecontent,nameofdumpfile='cachedsitecontent') linfo = localinfo(locals()) if result is not False: #self.t('getGroupContent',0,0,linfo) return result cur = self.con.cursor() if nuts == 'UX': cur.execute('select DgroupContent(nutsGroupEULevelSites(%(testrun)s),getMimeTypeID(%(mime)s),getInclusionTypeID(%(inclusion)s),%(testrun)s);',locals()) else: cur.execute('select DgroupContent(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),getMimeTypeID(%(mime)s),getInclusionTypeID(%(inclusion)s),%(testrun)s);',locals()) results = cur.fetchall()[0][0] if results: results = nfloat(results) self.__cached((nace,nuts,testrun,mime,inclusion),self.sitecontent,results,nameofdumpfile='cachedsitecontent',read=False) #self.t('getGroupContent',0,1,linfo) return results def getPageContent(self,scenarioid,testrun,mime,inclusion): """Retrieves page content from a given page Keyword arguments: scenarioid -- Scenarioid to get testrun -- Testrun to get mime -- Mimetype to get inclusion -- Inclusion to get Returns sitecontent as float Examples: >>> onedomain = dwr.alldomains[0] >>> scenarioid = dwr.getPageList(onedomain,dwr.currenttestrun) >>> mimetype = dwr.allmimetypes[0] >>> inclusion = dwr.allinclusions[0] >>> content = dwr.getPageContent(scenarioid,dwr.latesttestrun,mimetype,inclusion) >>> type(content)==type(0.0) True """ #self.t('getPageContent') result = self.__cached((scenarioid,testrun,mime,inclusion),self.sitecontent,nameofdumpfile='cachedsitecontent') if result is not False: #self.t('getPageContent',0,0,linfo) return result cur = self.con.cursor() cur.execute('select DpageContent(%(scenarioid)s,getMimeTypeID(%(mime)s),getInclusionTypeID(%(inclusion)s),%(testrun)s);',locals()) results = float(cur.fetchall()[0][0]) self.__cached((scenarioid,testrun,mime,inclusion),self.sitecontent,results,nameofdumpfile='cachedsitecontent',read=False) #self.t('getPageContent',0,1,linfo) return results def getSiteContent(self,domain,testrun,mime,inclusion): """Retrieves site content from a given site Keyword arguments: domain -- Domain to get testrun -- Testrun to get mime -- Mimetype to get inclusion -- Inclusion to get Returns sitecontent as float Examples: >>> onedomain = dwr.alldomains[0] >>> mimetype = dwr.allmimetypes[0] >>> inclusion = dwr.allinclusions[0] >>> content = dwr.getSiteContent(onedomain,dwr.latesttestrun,mimetype,inclusion) >>> type(content)==type(0.0) True """ #self.t('getSiteContent') #result = self.__cached((domain,testrun,mime,inclusion),self.sitecontent,nameofdumpfile='cachedsitecontent') if not testrun: testrun = self.latesttestrun linfo = localinfo(locals()) result = self.__cached((domain,testrun,mime,inclusion),self.sitecontent,nameofdumpfile='cachedsitecontent') if result is not False: #self.t('getSiteContent',0,0,linfo) return result results = nfloat(self.supercache("select DsiteContent(getSiteID('%(domain)s',%(testrun)s),getMimeTypeID('%(mime)s'),getInclusionTypeID('%(inclusion)s'),%(testrun)s);",locals())[0][0]) #cur = self.con.cursor() #cur.execute('select DsiteContent(getSiteID(%(domain)s,%(testrun)s),getMimeTypeID(%(mime)s),getInclusionTypeID(%(inclusion)s),%(testrun)s);',locals()) #results = float(cur.fetchall()[0][0]) self.__cached((domain,testrun,mime,inclusion),self.sitecontent,results,nameofdumpfile='cachedsitecontent',read=False) #self.t('getSiteContent',0,1,linfo) return results def getIndicatorsForPage(self,scenarioid,testrun,test): """Retrieves Indicator information from a page given a test Keyword arguments: scenarioid -- ScenarioID to get testrun -- Testrun to get test -- Test the current indicators are a basis for Returns a tuple as (groupTestRatio,groupTestMin,groupTestMax) Example: >>> scenarioid = dwr.getPageList(dwr.domains[0],dwr.currenttestrun) >>> testid = dwr.alluwemids[0] >>> result = dwr.getIndicatorsForPage(scenarioid,dwr.latesttestrun,testid) >>> type(result) == type(0.0) """ linfo = localinfo(locals()) #self.t('getIndicatorsForPage') if not scenarioid: return 0.0 result = self.__cached((scenarioid,testrun,test),self.indicatorsfortest,nameofdumpfile='cachedindicatorsfortest') if result is not False: #self.t('getIndicatorsForPage',0,0,linfo) return result cur = self.con.cursor() #results = nfloat(self.supercache("select DsiteContent(getSiteID('%(domain)s',%(testrun)s),getMimeTypeID('%(mime)s'),getInclusionTypeID('%(inclusion)s'),%(testrun)s);",locals())[0][0]) #print "select DpageTestRatio(%(scenarioid)s,getBarrierComputationID('%(test)s'),%(testrun)s);"%locals() results = nfloat(self.supercache("select DpageTestRatio(%(scenarioid)s,getBarrierComputationID('%(test)s'),%(testrun)s);",locals())[0][0]) #cur.execute('select DpageTestRatio(%(scenarioid)s,getBarrierComputationID(%(test)s),%(testrun)s);',locals()) #sitetestratio = nfloat(cur.fetchall()[0][0]) #results = sitetestratio self.__cached((scenarioid,testrun,test),self.indicatorsfortest,results,nameofdumpfile='cachedindicatorsfortest',read=False) #self.t('getIndicatorsForPage',0,1,linfo) return results def getIndicatorsForTestByGroup(self,nace,nuts,testrun,test): """Retrieves Indicator information from a site given a test Keyword arguments: nace -- Nace category nuts -- Nuts category testrun -- Testrun to get test -- Test the current indicators are a basis for Returns a tuple as (groupTestRatio,groupTestMin,groupTestMax) Example: >>> nace = dwr.allnace[0][0] >>> nuts = dwr.allnuts[0][0] >>> testid = dwr.alluwemids[0] >>> grouptestratio,grouptestmin,grouptestmax = dwr.getIndicatorsForTestByGroup(nace,nuts,dwr.latesttestrun,testid) >>> type(sitetestration) == type(0.0) >>> type(sitetestmin) == type(0.0) >>> type(sitetestmax) == type(0.0) """ #self.t('getIndicatorsForTestByGroup') if nuts=='E': nuts='EU' elif nuts=='F': nuts='NOTEU' elif nuts=='C': nuts='AC' elif nuts=='U': nuts='UX' linfo = localinfo(locals()) result = self.__cached((nace,nuts,testrun,test),self.indicatorsfortest,nameofdumpfile='cachedindicatorsfortest') if result is not False: #self.t('getIndicatorsForTestByGroup',0,0,linfo) return result cur = self.con.cursor() if nuts.lower() == 'ux': cur.execute('select DgroupTestRatio(nutsGroupEULevelSites(%(testrun)s),getBarrierComputationID(%(test)s),%(testrun)s);',locals()) grouptestratio = nfloat(cur.fetchall()[0][0]) cur.execute('select DgroupTestMin(nutsGroupEULevelSites(%(testrun)s),getBarrierComputationID(%(test)s),%(testrun)s);',locals()) grouptestmin = nfloat(cur.fetchall()[0][0]) cur.execute('select DgroupTestMax(nutsGroupEULevelSites(%(testrun)s),getBarrierComputationID(%(test)s),%(testrun)s);',locals()) grouptestmax = nfloat(cur.fetchall()[0][0]) else: cur.execute('select DgroupTestRatio(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),getBarrierComputationID(%(test)s),%(testrun)s);',locals()) grouptestratio = nfloat(cur.fetchall()[0][0]) cur.execute('select DgroupTestMin(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),getBarrierComputationID(%(test)s),%(testrun)s);',locals()) grouptestmin = nfloat(cur.fetchall()[0][0]) cur.execute('select DgroupTestMax(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),getBarrierComputationID(%(test)s),%(testrun)s);',locals()) grouptestmax = nfloat(cur.fetchall()[0][0]) results = (grouptestratio,grouptestmin,grouptestmax) self.__cached((nace,nuts,testrun,test),self.indicatorsfortest,results,nameofdumpfile='cachedindicatorsfortest',read=False) #self.t('getIndicatorsForTestByGroup',0,1,linfo) return results def getIndicatorsForTest(self,domain,testrun,test): """Retrieves Indicator information from a site given a test Keyword arguments: domain -- Domain to get testrun -- Testrun to get test -- Test the current indicators are a basis for Returns a tuple as (siteTestRatio,siteTestMin,siteTestMax) Example: >>> domain = dwr.domains[0] >>> testid = dwr.alluwemids[0] >>> sitetestratio,sitetestmin,sitetestmax = dwr.getIndicatorsForTest(domain,dwr.latesttestrun,testid) >>> type(sitetestration) == type(0.0) >>> type (sitetestmin) == type(0.0) >>> type (sitetestmax) == type(0.0) """ linfo = localinfo(locals()) #self.t('getIndicatorsForTest') result = self.__cached((domain,testrun,test),self.indicatorsfortest,nameofdumpfile='cachedindicatorsfortest') if result is not False: #self.t('getIndicatorsForTest',0,0,linfo) return result cur = self.con.cursor() cur.execute('select DsiteTestRatio(getSiteID(%(domain)s,%(testrun)s),getBarrierComputationID(%(test)s),%(testrun)s);',locals()) sitetestratio = nfloat(cur.fetchall()[0][0]) cur.execute('select DsiteTestMin(getSiteID(%(domain)s,%(testrun)s),getBarrierComputationID(%(test)s),%(testrun)s);',locals()) sitetestmin = nfloat(cur.fetchall()[0][0]) cur.execute('select DsiteTestMax(getSiteID(%(domain)s,%(testrun)s),getBarrierComputationID(%(test)s),%(testrun)s);',locals()) sitetestmax = nfloat(cur.fetchall()[0][0]) results = (sitetestratio,sitetestmin,sitetestmax) self.__cached((domain,testrun,test),self.indicatorsfortest,results,nameofdumpfile='cachedindicatorsfortest',read=False) #self.t('getIndicatorsForTest',0,1,linfo) return results def getLanguageStatisticsByGroup(self,nace,nuts,testrun,language): """Retrieves language information from a site Keyword arguments: nace -- Nace cateogory nuts -- Nuts cateogury testrun -- Testrun to get language -- Language to get Returns a tuple of (pagenumberlang, groupmeanlang, grouperrormarginlang, groupminlang, groupmaxlang) Examples: >>> nuts = dwr.allnuts[0] >>> nace = dwr.allnace[0] >>> language = dwr.alllanguages[0] >>> pagenumberlang, sitemeanlang, siteerrormarginlang, siteminlang, sitemaxlang = dwr.getLanguageStatisticsByGroup(nace,nuts,dwr.latesttestrun,language) >>> type(pagenumberlang) == type(0) True >>> type(sitemeanlang) == type(0.0) True >>> type(siteerrormarginlang) == type(0.0) True >>> type(siteminlang) == type(0.0) True >>> type(sitemaxlang) == type(0.0) True """ #self.t('getLanguageStatisticsByGroup') linfo = localinfo(locals()) if type(language)==type(()): language = language[1] result = self.__cached((nace,nuts,language,testrun),self.languagestatistics,nameofdumpfile='cachedlanguagestatistics') if result is not False: #self.t('getLanguageStatisticsByGroup',0,0,linfo) return result cur = self.con.cursor() cur.execute('select DpageNumberLangGroup(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),getLanguageID(%(language)s),%(testrun)s);',locals()) pagenumber = nint(cur.fetchall()[0][0]) cur.execute('select DgroupMeanLang(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),getLanguageID(%(language)s),0::smallint,%(testrun)s);',locals()) sitemean = nfloat(cur.fetchall()[0][0]) cur.execute('select DgroupErrorMarginLang(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),getLanguageID(%(language)s),0::smallint,%(testrun)s);',locals()) siteerrormargin = nfloat(cur.fetchall()[0][0]) cur.execute('select DgroupMinLang(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),getLanguageID(%(language)s),0::smallint,%(testrun)s);',locals()) sitemin = nfloat(cur.fetchall()[0][0]) cur.execute('select DgroupMaxLang(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),getLanguageID(%(language)s),0::smallint,%(testrun)s);',locals()) sitemax = nfloat(cur.fetchall()[0][0]) results = (pagenumber,sitemean,siteerrormargin,sitemin,sitemax) self.__cached((nace,nuts,language,testrun),self.languagestatistics,results,nameofdumpfile='cachedlanguagestatisitics',read=False) #self.t('getLanguageStatisticsByGroup',0,1,linfo) return results def getLanguageStatistics(self,domain,testrun,language): """Retrieves language information from a site Keyword arguments: domain -- Domain to get testrun -- Testrun to get language -- Language to get Returns a tuple of (pagenumberlang, sitemeanlang, siteerrormarginlang, siteminlang, sitemaxlang) Examples: >>> domain = dwr.alldomains[0] >>> language = dwr.alllanguages[0] >>> pagenumberlang, sitemeanlang, siteerrormarginlang, siteminlang, sitemaxlang = dwr.getLanguages(domain,dwr.latesttestrun,language) >>> type(pagenumberlang) == type(0) True >>> type(sitemeanlang) == type(0.0) True >>> type(siteerrormarginlang) == type(0.0) True >>> type(siteminlang) == type(0.0) True >>> type(sitemaxlang) == type(0.0) True """ #self.t('getLanguageStatistics') linfo = localinfo(locals()) if type(language)==type(()): language=language[1] result = self.__cached((domain,testrun),self.languagestatistics,nameofdumpfile='cachedlanguagestatistics') if result is not False: #self.t('getLanguageStatistics',0,0,linfo) return result cur = self.con.cursor() cur.execute('select DpageNumberLangSite(getSiteID(%(domain)s,%(testrun)s),getLanguageID(%(language)s),%(testrun)s);',locals()) pagenumber = int(cur.fetchall()[0][0]) cur.execute('select DsiteMeanLang(getSiteID(%(domain)s,%(testrun)s),getLanguageID(%(language)s),0::smallint,%(testrun)s);',locals()) sitemean = nfloat(cur.fetchall()[0][0]) cur.execute('select DsiteErrorMarginLang(getSiteID(%(domain)s,%(testrun)s),getLanguageID(%(language)s),0::smallint,%(testrun)s);',locals()) siteerrormargin = nfloat(cur.fetchall()[0][0]) cur.execute('select DsiteMinLang(getSiteID(%(domain)s,%(testrun)s),getLanguageID(%(language)s),0::smallint,%(testrun)s);',locals()) sitemin = nfloat(cur.fetchall()[0][0]) cur.execute('select DsiteMaxLang(getSiteID(%(domain)s,%(testrun)s),getLanguageID(%(language)s),0::smallint,%(testrun)s);',locals()) sitemax = nfloat(cur.fetchall()[0][0]) results = (pagenumber,sitemean,siteerrormargin,sitemin,sitemax) self.__cached((domain,testrun),self.languagestatistics,results,nameofdumpfile='cachedlanguagestatisitics',read=False) #self.t('getLanguageStatistics',0,1,linfo) return results def getMainGroupInformation(self,nace,nuts,testrun): """Retrieved main site information Keyword arguments: nace -- Nace cateogory nuts -- Nuts cateogory testrun -- Testrun to get returns a tuple of (sitenumber, groupMean, groupErrorMargin,groupMin, groupMax) Example: >>> nace = dwr.allnace[0][0] >>> nuts = dwr.allnuts[0][0] >>> sitenumber, groupmean, grouperrormargin,groupmin, groupmax = dwr.getMainGrouptInformation(nace,nuts,dwr.latesttestrun) >>> type(sitenumber) == type(0) True >>> type(groupmean) == type(0.0) True >>> type(grouperrormargn) == type(0.0) True >>> type(groupmin) == type(0.0) True >>> type(groupmax) == type(0.0) True >>> groupmax >= groupmin True """ #self.t('getMainGroupInformation') linfo = localinfo(locals()) if nace in [None,'Unknown']: nace = 'All' if nuts in [None,'Unknown']: nuts = 'All' if nuts=='E': nuts='EU' elif nuts=='F': nuts='NOTEU' elif nuts=='C': nuts='AC' elif nuts=='U': nuts='UX' result = self.__cached((nace,nuts,testrun),self.mainsiteinfo,nameofdumpfile='cachedmainsiteinfo') result = False if result is not False: #self.t('getMainGroupInformation',0,0,linfo) return result cur = self.con.cursor() nuts_s=nuts if nuts.lower() == 'ux': nuts = nuts.rstrip('xX') cur.execute('select DsiteNumber(nutsGroupEULevelSites(%(testrun)s),%(testrun)s);',locals()) sitenumber = nint(cur.fetchall()[0][0]) cur.execute('select DgroupMean(nutsGroupEULevelSites(%(testrun)s),0::smallint,%(testrun)s);',locals()) groupmean = nfloat(cur.fetchall()[0][0]) cur.execute('select DgroupErrorMargin(nutsGroupEULevelSites(%(testrun)s),0::smallint,%(testrun)s);',locals()) grouperrormargin = nfloat(cur.fetchall()[0][0]) cur.execute('select DgroupMin(nutsGroupEULevelSites(%(testrun)s),0::smallint,%(testrun)s);',locals()) groupmin = nfloat(cur.fetchall()[0][0]) cur.execute('select DgroupMax(nutsGroupEULevelSites(%(testrun)s),0::smallint,%(testrun)s);',locals()) groupmax = nfloat(cur.fetchall()[0][0]) change = '2' # TODO: This should be fixed... elif nuts.endswith('x') or nuts.endswith('X'): nuts = nuts.rstrip('xX') cur.execute('select DsiteNumber(nutsGroupUnknownNUTS1(%(nuts)s,%(testrun)s),%(testrun)s);',locals()) sitenumber = nint(cur.fetchall()[0][0]) cur.execute('select DgroupMean(nutsGroupUnknownNUTS1(%(nuts)s,%(testrun)s),0::smallint,%(testrun)s);',locals()) groupmean = nfloat(cur.fetchall()[0][0]) cur.execute('select DgroupErrorMargin(nutsGroupUnknownNUTS1(%(nuts)s,%(testrun)s),0::smallint,%(testrun)s);',locals()) grouperrormargin = nfloat(cur.fetchall()[0][0]) cur.execute('select DgroupMin(nutsGroupUnknownNUTS1(%(nuts)s,%(testrun)s),0::smallint,%(testrun)s);',locals()) groupmin = nfloat(cur.fetchall()[0][0]) cur.execute('select DgroupMax(nutsGroupUnknownNUTS1(%(nuts)s,%(testrun)s),0::smallint,%(testrun)s);',locals()) groupmax = nfloat(cur.fetchall()[0][0]) change = '2' # TODO: This should be fixed... elif len(nuts)==2: cur.execute('select DsiteNumber(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),%(testrun)s);',locals()) sitenumber = nint(cur.fetchall()[0][0]) cur.execute('select DgroupMean(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),0::smallint,%(testrun)s);',locals()) groupmean = nfloat(cur.fetchall()[0][0]) cur.execute('select DgroupErrorMargin(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),0::smallint,%(testrun)s);',locals()) grouperrormargin = nfloat(cur.fetchall()[0][0]) cur.execute('select DgroupMin(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),0::smallint,%(testrun)s);',locals()) groupmin = nfloat(cur.fetchall()[0][0]) cur.execute('select DgroupMax(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),0::smallint,%(testrun)s);',locals()) groupmax = nfloat(cur.fetchall()[0][0]) change = '2' # TODO: This should be fixed... else: cur.execute('select DsiteNumber(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),%(testrun)s);',locals()) sitenumber = nint(cur.fetchall()[0][0]) cur.execute('select DgroupMean(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),0::smallint,%(testrun)s);',locals()) groupmean = nfloat(cur.fetchall()[0][0]) cur.execute('select DgroupErrorMargin(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),0::smallint,%(testrun)s);',locals()) grouperrormargin = nfloat(cur.fetchall()[0][0]) cur.execute('select DgroupMin(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),0::smallint,%(testrun)s);',locals()) groupmin = nfloat(cur.fetchall()[0][0]) cur.execute('select DgroupMax(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),0::smallint,%(testrun)s);',locals()) groupmax = nfloat(cur.fetchall()[0][0]) change = '2' # TODO: This should be fixed... results = (sitenumber,groupmean,grouperrormargin,groupmin,groupmax,change) self.__cached((nace,nuts_s,testrun),self.mainsiteinfo,results,nameofdumpfile='cachedmainsiteinfo',read=False) #self.t('getMainGroupInformation',0,1,linfo) return results def getMainSiteInformation(self,domain,testrunid): """Retrieved main site information Keyword arguments: domain -- Domain to get testrun -- Testrun to get returns a tuple of (site,cwam,pageaverage,stddev,errormarging,mincwam,maxcwam,scenariocount,downloadcount,unavailablepages,unparablepages,tidypages,exahaustivescane,exhaustivesampled,nace,nuts3,nuts2,nuts1,country) """ #self.t('getMainSiteInformation') linfo = localinfo(locals()) if not domain: return (domain,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0) result = self.__cached((domain,testrunid),self.mainsiteinfo,nameofdumpfile='cachedmainsiteinfo') if result is not False: #self.t('getMainSiteInformation',0,0,linfo) return result cur = self.con.cursor() cur.execute('select site,cwam,pageaverage,stddev,errormargin,mincwam,maxcwam,scenariocount,downloadcount,unavailablepages,unparsablepages,tidypages,exhaustivescan,eiaodw.exhautivesampled(siteid,%(testrunid)s) from datastaging.site natural join matviews.site where site=%(domain)s and testrunid=%(testrunid)s;',locals()) results = cur.fetchall()[0] cur.execute('select nacecode,nuts3code,nuts2code,nuts1code,countryabbreviation FROM getNutsNace(getSiteID(%(domain)s,%(testrunid)s), %(testrunid)s);',locals()) results += cur.fetchall()[0] #cur.execute('select DpageNumber(getSiteID(%(domain)s,%(testrun)s::smallint),%(testrun)s::smallint)',locals()) #pagenumber = nint(cur.fetchall()[0][0]) #cur.execute('select DsiteMean(getSiteID(%(domain)s,%(testrun)s::smallint),0::smallint,%(testrun)s::smallint);',locals()) #sitemean = nfloat(cur.fetchall()[0][0]) #cur.execute('select DsiteErrorMargin(getSiteID(%(domain)s,%(testrun)s::smallint),0::smallint,%(testrun)s::smallint);',locals()) #siteerrormargin = nfloat(cur.fetchall()[0][0]) #cur.execute('select DsiteMin(getSiteID(%(domain)s,%(testrun)s::smallint),0::smallint,%(testrun)s::smallint);',locals()) #sitemin = nfloat(cur.fetchall()[0][0]) #cur.execute('select DsiteMax(getSiteID(%(domain)s,%(testrun)s::smallint),0::smallint,%(testrun)s::smallint);',locals()) #sitemax = nfloat(cur.fetchall()[0][0]) #change = '2' # TODO: This should be fixed... #results = (pagenumber,sitemean,siteerrormargin,sitemin,sitemax,change) self.__cached((domain,testrunid),self.mainsiteinfo,results,nameofdumpfile='cachedmainsiteinfo',read=False) #self.t('getMainSiteInformation',0,1,linfo) return results def getNumberSites(self,nace,nuts,testrun): """Retrieved the number of web sites from a group Keyword arguments: nace -- Nace category nuts -- Nuts category testrun -- testrun to get number of sites from Example >>> nace = dwr.allnace[0][0] >>> nuts = dwr.allnuts[0][0] >>> numbersites = dwr.getNumberSites(nace,nuts,dwr.latesttestrun) """ linfo = localinfo(locals()) if nuts in [None,'Unknown']: nuts = 'All' if nace in [None,'Unknown']: nace = 'All' if nuts=='E': nuts='EU' elif nuts=='F': nuts='NOTEU' elif nuts=='C': nuts='AC' elif nuts == 'U': nuts='UX' result = self.__cached((nace,nuts,testrun),self.cachednumbersites,nameofdumpfile='cachednumbersites') #Check the above if result is not False: return result cur = self.con.cursor() if nuts=='UX': cur.execute('select DSiteNumber(nutsGroupEULevelSites(%(testrun)s),%(testrun)s);',locals()) else: cur.execute('select DSiteNumber(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),%(testrun)s);',locals()) result = int(cur.fetchall()[0][0]) self.__cached((nace,nuts,testrun),self.cachednumbersites,result,nameofdumpfile='cachednumbersites') return result def getGroupStdDevLang(self,nace,nuts,language,testrun): """Retrieves the standard deviation of a language within a group Keyword arguments: nace -- Nace category nuts -- Nuts category testrun -- testrun to get standard deviaion from language -- language to get to get standard deviation from Examples: >>> nace = dwr.allnace[0][0] >>> nuts = dwr.allnuts[0][0] >>> language = dwr.alllanguages[0] >>> std = getSiteStdDevLang(nace,nuts,language,dwr.latesttestrun) """ #self.t('getSiteStdDevLangByGroup') linfo = localinfo(locals()) if nuts in [None,'Unknown']: nuts = 'All' if nace in [None,'Unknown']: nace = 'All' if type(language)==type(()): language = language[1] result = self.__cached((nace,nuts,language,testrun),self.stddevlang,nameofdumpfile='cachedstdlang') if result is not False: #self.t('getSiteStdDevLangByGroup',0,0,linfo) return result cur = self.con.cursor() cur.execute('select DgroupStddevLang(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),getLanguageID(%(language)s),0::smallint,%(testrun)s);',locals()) results = float(cur.fetchall()[0][0]) self.__cached((nace,nuts,language,testrun),self.stddevlang,results,nameofdumpfile='cachedstdlang',read=False) #self.t('getSiteStdDevLangByGroup',0,1,linfo) return results def getSiteStdDevLang(self,domain,language,testrun): """Retrieves the standard deviation of a language within a site Keyword arguments: domain -- domain to get testrun -- testrun to get language -- language to get Examples: >>> domain = dwr.domains[0] >>> language = dwr.alllanguages[0] >>> std = getSiteStdDevLang(domain,language,dwr.latesttestrun) """ #self.t('getSiteStdDevLang') linfo = localinfo(locals()) if type(language)==type(()): language = language[1] result = self.__cached((domain,language,testrun),self.stddevlang,nameofdumpfile='cachedstdlang') if result is not False: #self.t('getSiteStdDevLang',0,0,linfo) return result cur = self.con.cursor() cur.execute('select DsiteStddevLang(getSiteID(%(domain)s,%(testrun)s),getLanguageID(%(language)s),0::smallint,%(testrun)s);',locals()) results = nfloat(cur.fetchall()[0][0]) self.__cached((domain,language,testrun),self.stddevlang,results,nameofdumpfile='cachedstdlang',read=False) #self.t('getSiteStdDevLang',0,1,linfo) return results def getNutsNace(self,domain,testrunid=None): """Retrieves nuts and nace for a given site Keyword arguments: domain -- Site to get testrunid returns a tuple of (country,nuts1,nuts2,nuts3,nace) Examples: >>> onedomain = dwr.alldomains[0] >>> dwr.getNutsNace(onedomain) """ #self.t('getNutsNace') linfo = localinfo(locals()) if not testrunid: testrunid = self.latesttestrun result = self.__cached((domain,testrunid),self.cachednuts,nameofdumpfile='cachednuts') if result is not False: #self.t('getNutsNace',0,0,linfo) return result cur = self.con.cursor() cur.execute('SELECT * FROM getNutsNace(getSiteID(%(domain)s,%(testrunid)s::smallint), %(testrunid)s::smallint);',locals()) data = cur.fetchall()[0] results = (data[0],data[1],data[2],data[3],data[4]) self.__cached((domain,testrunid),self.cachednuts,results,nameofdumpfile='cachednuts',read=False) #self.t('getNutsNace',0,1,linfo) return results def getSiteList(self,nace,nuts,testrun): """Retrieves a list of sites within a group Keyword arguments: nace -- Nace category nuts -- Nuts catogory testrun -- testrun to get Examples: >>> nace = dwr.allnace[0][0] >>> nuts = dwr.allnuts[0][0] >>> sites = dwr.getSiteList(nace,nuts,dwr.latesttestrun) >>> type(sites) == type([]) """ #self.t('getSiteList') linfo = localinfo(locals()) if nuts=='E': nuts='EU' elif nuts=='F': nuts='NOTEU' elif nuts=='C': nuts='AC' elif nuts =='U': nuts='UX' cur = self.con.cursor() cur.execute('select * from DsiteList(nutsGroupEULevelSites(%(testrun)s),%(testrun)s);',locals()); results = list(set([res[0] for res in cur.fetchall()])) return results result = self.__cached((nace,nuts,testrun),self.sitelist,nameofdumpfile='cachedsitelist') if result is not False: #self.t('getSiteList',0,0,linfo) return result cur = self.con.cursor() cur.execute('select * from DsiteList(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),%(testrun)s);',locals()); results = list(set([res[0] for res in cur.fetchall()])) self.__cached((nace,nuts,testrun),self.sitelist,results,nameofdumpfile='cachedsitelist',read=False) #self.t('getSiteList',0,1,linfo) return results def getPageScore(self,scenarioid,testrun): """Retrieves page score from a URL Keyword arguments: scenarioid -- Scenariood to get testrun -- Testrun to get Examples: >>> scenarioid = dwr.getPageList(dwr.domains[0],dwr.latesttestrun)[0] >>> score = dwr.getPageScore(scenarioid,dwr.latesttestrun) """ #self.t('getPageScore') linfo = localinfo(locals()) result = self.__cached((scenarioid,scenarioid),self.cachedpagescore,nameofdumpfile='cachedpagescore') if result is not False: #self.t('getPageScore',0,0,linfo) return result cur = self.con.cursor() cur.execute('select CWAM_scenario(%(scenarioid)s,0::smallint,%(testrun)s);',locals()) result = cur.fetchall()[0][0] self.__cached((scenarioid,scenarioid),self.cachedpagescore,result,nameofdumpfile='cachedpagescore',read=False) #self.t('getPageScore',0,1,linfo) return result def getPageList(self,domain,testrun): """Retrieves a list of scenarioids within a site Keyword arguments: domain -- domain to get testrun -- testrun to get Examples: >>> domain = dwr.domains[0] >>> pages = dwr.getPageList(domain,dwr.latesttestrun) >>> type(pages) == type([]) True """ #self.t('getPageList') linfo = localinfo(locals()) result = self.__cached((domain,testrun),self.pagelist,nameofdumpfile='cachedpagelist') if result is not False: #self.t('getPageList',0,0,linfo) return result cur = self.con.cursor() cur.execute('select * from DpageList(getSiteID(%(domain)s,%(testrun)s),%(testrun)s);',locals()) results = [res[0] for res in cur.fetchall()] self.__cached((domain,testrun),self.pagelist,results,nameofdumpfile='cachedpagelist',read=False) #self.t('getPageList',0,1,linfo) return results def getPageListLangByGroup(self,nace,nuts,language,testrun): """Retrieves a list of pages within a site with a specific language Keyword arguments: nace -- Nace category nuts -- Nuts cateogyr testrun -- testrun to get to get pages from language -- language to get get pages from Examples: >>> nuts = dwr.allnuts[0][0] >>> nace = dwl.allnace[0][0] >>> language = dwr.alllanguages[0] >>> languages = dwr.getPageListLang(nace,nuts,language,dwr.latesttestrun) """ #self.t('getPageListLangByGroup') if nace in [None,'Unkmown']: nace = 'All' if nuts in [None,'Unknown']: nuts = 'All' if type(language)==type(()): language = language[1] result = self.__cached((nace,nuts,language,testrun),self.pagelist,nameofdumpfile='cachedpagelist') if result is not False: #self.t('getPageListLangByGroup',0,0,linfo) return result cur = self.con.cursor() cur.execute('select * FROM DpageListLangGroup(naceNutsGroup(%(nace)s,%(nuts)s,%(testrun)s),getLanguageID(%(language)s),%(testrun)s)',locals()) results = [res[0] for res in cur.fetchall()] self.__cached((nace,nuts,language,testrun),self.pagelist,results,nameofdumpfile='cachedpagelist',read=False) #self.t('getPageListLangByGroup',0,1,linfo) return results def getPageListLang(self,domain,language,testrun): """Retrieves a list of pages within a site with a specific language Keyword arguments: domain -- domain to get testrun -- testrun to get language -- language to get Examples: >>> domain = dwr.domains[0] >>> language = dwr.alllanguages[0] >>> languages = dwr.getPageListLang(domain,language,dwr.latesttestrun) """ #self.t('getPageListLang') linfo = localinfo(locals()) if type(language)==type(()): language = language[1] result = self.__cached((domain,language,testrun),self.pagelist,nameofdumpfile='cachedpagelist') if result is not False: #self.t('getPageListLang',0,0,linfo) return result cur = self.con.cursor() cur.execute('select * from DpageListLangSite(getSiteID(%(domain)s,%(testrun)s),getLanguageID(%(language)s),%(testrun)s)',locals()) results = [res[0] for res in cur.fetchall()] self.__cached((domain,language,testrun),self.pagelist,results,nameofdumpfile='cachedpagelist',read=False) #self.t('getPageListLang',0,1,linfo) return results def getSiteStdDev(self,domain,testrun): """Retrieves the standard deviation for a site Keyword arguments: domain -- domain to get testrun -- testrun to get Examples: >>> domain = dwr.domains[0] >>> std = getSiteStdDev(domain,dwr.latesttestrun) >>> type(std) == type (0.0) True """ #self.t('getSiteStdDev') linfo = localinfo(locals()) result = self.__cached((domain,testrun),self.stddev,nameofdumpfile='cachedstddev') if result is not False: #self.t('getSiteStdDev',0,0,linfo) return result cur = self.con.cursor() cur.execute('select DsiteStddev(getSiteID(%(domain)s,%(testrun)s),0::smallint,%(testrun)s)',locals()) results = float(cur.fetchall()[0][0]) self.__cached((domain,testrun),self.stddev,results,nameofdumpfile='cachedstddev',read=False) #self.t('getSiteStdDev',0,1,linfo) return results def currentMonth(self, testrunid=None): """Retrieves the date for the year and month of the current testrun and optionally the year and month for the second latest testrun. Because the month and year for the results are the same for the current testrun, it makes sense to store the results as temporary variables. Keyword arguments: testrunid -- [Optional] Latest testrunid if left empty Returns a tuple in the following format (year,monthid,month) Examples: >>> months = dwr.currentMonth(dwr.latesttestrun) #(year,month,year,month) >>> year,month,lastyear,lastmonth = months[0][0],months[0][1],months[1][0],months[1][1] >>> type(year)==type(1) True >>> month in (1,2,3,4,5,6,7,8,9,10,11,12) True >>> (type(lastyear)==type(1)) or (type(lastyear)==type(None)) True >>> (lastmonth in (1,2,3,4,5,6,7,8,9,10,11,12)) or (type(lastmonth)==type(None)) True """ #self.t('currentMonth') linfo = localinfo(locals()) if not testrunid: testrunid=self.latesttestrun result = self.__cached((testrunid,),self.cachemonthandyear,nameofdumpfile='cachedmonthandyear') if result is not False: #self.t('currentMonth',0,0,linfo) return result if not testrunid: testrunid=self.latesttestrun cur = self.con.cursor() results = [] cur.execute('select year,monthnumberinyear,monthname from TestResult_T,subject,date where TestResult_T.subjectid=subject.subjectid and date.dateid=TestResult_T.dateid and subject.testrunnumber=%(testrunid)s limit 1;',locals()) result = cur.fetchall() if result: year,month,monthname = result[0][0],result[0][1],result[0][2] else: year,month,monthname = None,None,None results = (year,month,monthname) self.__cached((testrunid,),self.cachemonthandyear,results,nameofdumpfile='cachedmonthandyear',read=False) #self.t('currentMonth',0,1,linfo) return results def currentTestRun(self): """Retrieves largest and second largest testrunid Returns a tuple of largest and second largest testrunid Examples: >>> runone,runtwo = dwr.currentTestRun() >>> type(runone) == type(1) True >>> type(runtwo) in (type(1),type(None)) True >>> runone==dwr.latesttestrun True >>> runtwo==dwr.secondlatesttestrun True """ #self.t('currentTestRun') result = self.__cached((None),self.cachedtestrunids,nameofdumpfile='cachedtestrunids') if result is not False: #self.t('currentTestRun',0) return result #cur = self.con.cursor() #cur.execute('select distinct testrunid from subject;') #result = cur.fetchall() #Hack to force selecting only from the large dataset #testruns = [res[0] for res in result] testruns = ['200805'] if len(testruns)<12: #We have less than 12 months with crawling testruns += [None for i in range(12-len(testruns))] self.__cached((None),self.cachedtestrunids,testruns,nameofdumpfile='cachedtestrunids',read=False) #self.t('currentTestRun',0,1) return testruns def uwemIds(self): """Returns the UWEM Id's present. This is static information with no need for input. Returns a list of UWEMIds Examples: >>> uwemids = dwr.uwemIds() >>> len(uwemids)>0 True >>> type(uwemids)==type([]) True >>> type(uwemids[0])==type('') True >>> dwr.alluwemids==uwemids True """ #self.t('uwemIds') result = self.__cached((None),self.cacheuwemids,nameofdumpfile='cacheuwemids') if result is not False: #self.t('uwemIds',0) return result cur = self.con.cursor() #cur.execute('select distinct uwemtestname from uwemtest order by uwemtestname;') cur.execute('select barriercomputationname from datastaging.barriercomputation;') result = [uwemid[0] for uwemid in cur.fetchall()] self.__cached((None),self.cacheuwemids,result,nameofdumpfile='cacheuwemids',read=False) #self.t('uwemIds',0,1) return result def countries(self): """Returns all countries. This is static information with no need for input. Returns a list of countries Examples: >>> countries = dwr.countries() >>> len(countries)>0 True >>> type(countries)==type([]) True >>> 'NO' in countries True >>> dwr.allcountries==countries True """ #self.t('countries') result = self.__cached((None),self.cachecountries,nameofdumpfile='cachecountries') if result is not False: #self.t('countries',0) return result cur = self.con.cursor() cur.execute('select distinct country,countryabbreviation,eumember from datastaging.country;') result = [(d[0],d[1],d[2]) for d in cur.fetchall()] self.__cached((None),self.cachecountries,result,nameofdumpfile='cachecountries',read=False) #self.t('countries',0,1) return result def CWAM_EU(self,member='EU',testrunid=None): """Retrieved the CWAM results for EU members keyword arguments: member -- [Optional] String value to view if member of EU or not. EU as default. testrunid -- [Optional] Testrunid of which the data should be retrieved. Latest testrun as default """ #self.t('CWAM_EU') result = self.__cached((member,testrunid),self.cachedeu,nameofdumpfile='cacheeu') if result is not False: #self.t('CWAM_EU',0) return result result = [] if member=='EU': memberquery = 'EU member' elif member=='Non-EU': memberquery = 'Outside EU' elif member=='EFTA': memberquery = 'Outside EU' else: memberquery = 'Applicant country' if not testrunid: testrunid = self.latesttestrun cur.execute("select DgroupMean(EUMembershipGroup(%(memberquery)s, %(testrunid)s::smallint);),0::smallint,%(testrunid)s::smallint);",locals()) result = cur.fetchall()[0][0] self.__cached((member,testrunid),self.cachedeu,result,nameofdumpfile='cacheeu',read=False) #self.t('CWAM_EU',0,1) return result def inclusions(self): """Returns the inclusion types. This is static information with no need for input. Returns all inclusion types present in the latest testrun. Examples: >>> inclusions = dwr.inclusions() >>> len(inclusions)>0 True >>> type(inclusions)==type([]) True >>> dwr.allinclusions==inclusions True """ #self.t('inclusions') result = self.__cached((None),self.cachedinclusiontypes,nameofdumpfile='cachedinclusiontypes') if result is not False: #self.t('inclusions',0) return result cur = self.con.cursor() cur.execute('select InclusionType from InclusionType;') result = [d[0] for d in cur.fetchall()] self.__cached((None),self.cachedinclusiontypes,result,nameofdumpfile='cachedinclusiontypes',read=False) #self.t('inclusions',0,1) return result def mimetypes(self): """Returns the mimetypes. This is static information with no need for input. Returns all mimetypes present in the latest testrun. Examples: >>> mimetypes = dwr.mimetypes() >>> len(mimetypes)>0 True >>> type(mimetypes)==type([]) True >>> dwr.allmimetypes==mimetypes True """ #self.t('mimetypes') result = self.__cached((None),self.cachedmimetypes,nameofdumpfile='cachedmimetypes') if result is not False: #self.t('mimetypes',0) return result cur = self.con.cursor() cur.execute("select MimeType||'/'||MimeSubType from MimeType where mimetypeid>0;") result = [d[0] for d in cur.fetchall()] self.__cached((None),self.cachedmimetypes,result,nameofdumpfile='cachedmimetypes',read=False) #self.t('mimetypes',0,1) return result def languages(self): """Returns the languages. This is static information with no need for input. Returns all domains present in the latest testrun. Examples: >>> languages = dwr.languages() >>> len(languages)>0 True >>> type(languages)==type([]) True >>> dwr.alllanguages==languages True """ #self.t('languages') result = self.__cached((None),self.cachedlanguages,nameofdumpfile='cachedlanguages') if result is not False: #self.t('languages',0) return result cur = self.con.cursor() cur.execute('select distinct language,languageabbreviation from datastaging.language;') result = [(d[0],d[1]) for d in cur.fetchall()] self.__cached((None),self.cachedlanguages,result,nameofdumpfile='cachedlanguages',read=False) #self.t('languages',0,1) return result def domains(self): """Returns the domains. This is static information with no need for input. Returns all domains present in the latest testrun. Examples: >>> domains = dwr.domains() >>> len(domains)>0 True >>> type(domains)==type([]) True >>> dwr.alldomains==domains True """ #self.t('domains') result = self.__cached((None),self.cacheddomains,nameofdumpfile='cacheddomains') if result is not False: #self.t('domains',0) return result cur = self.con.cursor() #cur.execute('select distinct site from subject;') cur.execute('select distinct site from datastaging.site;') result = [d[0] for d in cur.fetchall()] self.__cached((None),self.cacheddomains,result,nameofdumpfile='cacheddomains',read=False) #self.t('domains',0,1) return result def getChange(self,function,**parameters): """Returning the changed values between two testruns. The value changed is set to None if only the latest testrun exists. Keyword arguments: function -- Function to use for retrieaving results. parameters -- Parameters to push to the function. Returns a list of dictionaries of results as the following; [{...,'Change':0.5,...},{...}] Examples: >>> changedresult = dwr.getChange(dwr.CWAM_EU,member='EU') """ #self.t('getChange') parameters['testrunid']=self.latesttestrun latest = function(**parameters) parameters['testrunid']=self.secondlatesttestrun secondlatest = function(**parameters) changedlist = [] #Calculating the changes between the testruns for current in latest: result = current.pop('Result') found = False for l in secondlatest: if not [c for c in current.items() if c not in l.items()]: thisresult = l['Result'] current['Result'] = result current['Change'] = result-thisresult found = True if not found: current['Result'] = result current['Change'] = None changedlist.append(current) #self.t('getChange',0,1) return changedlist if __name__ == "__main__": #dwr = DWReader2(cached=True) #import pdb #pdb.set_trace() #print dwr.getIndicatorsForTest('www.sdm.dk',dwr.latesttestrun,dwr.alluwemids[1])# #'12.4_HTML_02') #for id in dwr.alluwemids: # dwr.getIndicatorsForPage('50275','120',id) #print id,dwr.getIndicatorsForPage('50274','120',id) #print dwr.getIndicatorsForPage('65283','120','UWEM.B.10.7.2.2.CSS.DEF.2') #dwr.stop() """ region = 'NO0' regions = {'All':'All regions','CC':'Candidate countries','EU':'EU countries','FT':'EFTA countries','AT':'Austria','IT':'Italy','GR':'Greece','BE':'Belgium','SE':'Sweden','DK':'Denmark','PT':'Portugal','IE':'Ireland','FI':'Finland','CH':'Switzerland','LU':'Luxemburg','IS':'Iceland','DE':'Germany','UK':'United Kingdom','ES':'Spain','FR':'France','NL':'The Netherlands','NO':'Norway','LI':'Liechtenstein','CY':'Cyprus','CZ':'Czech Republic','EE':'Estonia','HU':'Hungary','LV':'Latvia','LT':'Lithuania','MT':'Malta','PL':'Poland','SK':'Slovakia','SI':'Slovenia','BG':'Bulgaria','HR':'Croatia','RO':'Romania','TR':'Turkey',None:'Unknown Sector'} regionname = regions.get(region,False) regionname: print regionname sys.exit(0) allnuts = dict([(nuts[0],nuts[3]) for nuts in dwr.allnuts] + [(nuts[1],nuts[4]) for nuts in dwr.allnuts] + [(nuts[2],nuts[4]) for nuts in dwr.allnuts]) regionname = allnuts.get(region,False) print regionname sys.exit(0) for nuts in dwr.allnuts: if nuts[0]==region: print nuts[3] sys.exit(0) #return nuts[3] for nuts in dwr.allnuts: if nuts[1]==region: print nuts[4] sys.exit(0) #return nuts[4] for nuts in dwr.allnuts: if nuts[2]==region: #return nuts[5] print nuts[5] sys.exit(0) import sys sys.exit(0) import time i = 0 for domain in dwr.alldomains: pre = time.time() print i,domain,dwr.getMainSiteInformation(domain,dwr.latesttestrun) print time.time()-pre i +=1 i = 0 pre = time.time() for domain in dwr.alldomains: for language in dwr.alllanguages: pre = time.time() print i,dwr.getLanguageStatistics(domain,dwr.latesttestrun,language), print time.time()-pre i = 0 for nace in dwr.allnace: for nuts in dwr.allnuts: pre = time.time() print i, dwr.getMainGroupInformation(nace[0],nuts[0],dwr.latesttestrun) print time.time()-pre i +=1 print dwr.allcountries """