# -*- coding: UTF-8 -*-
"""Unit test for the ETL extraction functions"""
# Copyright 2005, 2006 EIAO Consoritum
# This program is distributed under the terms of the GNU General
# Public License.
#
# This file is part of the European Internet Accessibility Observatory
# (EIAO)
#
# EIAO is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# EIAO is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with EIAO; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301 USA
__author__ = 'Morten Goodwin Olsen, Jens Frøkjær, Tom Oddershede'
__maintainer__ = 'Nils Ulltveit-Moe'
__version__ = "$Id$"
import doctest, unittest
import RDF
from RDFTraverser import *
rdfheader = """
"""
rdffooter = ''
result = []
class dummyrdfreaderwriter:
"""Class that only reads writes RDF to memory. Only used for unit testing of the ETL functions
"""
def write(self,RDFXML):
"""Method for simulating writing of RDF
RDFXML -- RDF as RDF/XML
Returns None
"""
self.model = RDF.Model()
pars = RDF.RDFXMLParser()
pars.parse_string_into_model(self.model, RDFXML, base_uri="http://www.eiao.net/rdf/2.0#")
def read(self,subject,predicate,object):
"""Method for reading RDF
Keyword arguments:
subject -- Subject as string or None for all
predicate -- Predicate as string or None for all
object -- Object as string or None for all
"""
alltriples = []
if subject:
subject = RDF.Uri(subject)
if predicate:
predicate = RDF.Uri(predicate)
if str(object).startswith('http'):
object = RDF.Uri(object)
stat = RDF.Statement(subject=subject, predicate=predicate, object=object)
for statements in self.model.find_statements(stat):
subject = statements.subject
predicate = statements.predicate
object = statements.object
if subject.type==1:
subject = str(subject.uri)
else:
subject = str(subject.literal_value)
if predicate.type==1:
predicate = str(predicate.uri)
else:
predicate = str(predicate.literal_value)
if object.type==1:
object = str(object.uri)
else:
object = str(object.literal_value['string'])
alltriples.append({'subject':subject,'predicate':predicate,'object':object})
return alltriples
class TestSequenceFunctions(unittest.TestCase):
def setUp(self):
self.rdfrw = dummyrdfreaderwriter()
def testearlresult(self):
rdf = """
0.8
109
501
12
104
12
102
"""
self.rdfrw.write(rdfheader + rdf + rdffooter)
results = getResultsFromPageScenario(self.rdfrw.read,'http://www.eiao.net/rdf/2.0#PageSurvey_http://www.eiao.net/rdf/2.0/PageSurvey_0')
self.assert_(('fail',0.8,12,102) in results)
self.assert_(('fail',None,12,104) in results)
self.assert_(('pass',None,109,501) in results)
def testmetadatawithninawebpage(self):
rdf = """
application/pdf
application/img
EN
all,aural,braille
application/pdf
application/pdf
2833
12288
13
12
36
14
1
12
97
112
97
122
"""
self.rdfrw.write(rdfheader + rdf + rdffooter)
metadata = getMetaDataFromPageSurvey(self.rdfrw.read,'http://www.eiao.net/rdf/2.0#PageSurvey_http://www.eiao.net/rdf/2.0/PageSurvey_0')
self.assert_(('technology','application/img',97,112,'http://www.example.org/index.html') in metadata)
self.assert_(('technology','application/pdf',97,122,'http://www.example.org/index.html') in metadata)
self.assert_(('language','EN',1,12,'http://www.example.org/index.html') in metadata)
self.assert_(('mediatype','all,aural,braille',36,14,'http://www.example.org/index.html') in metadata)
self.assert_(('externalLinks','application/pdf',13,12,'http://www.example.org/index.html') in metadata)
self.assert_(('internalLinks','application/pdf',2833,12288,'http://www.example.org/index.html') in metadata)
#Testing that restriction works
language = getMetaDataFromPageSurvey(self.rdfrw.read,'http://www.eiao.net/rdf/2.0#PageSurvey_http://www.eiao.net/rdf/2.0/PageSurvey_0',restriction='language')
self.assert_(('language','EN',1,12,'http://www.example.org/index.html') in language)
self.assert_(('mediatype','all,aural,braille',36,14,'http://www.example.org/index.html') not in language)
def testtestsubjectfrompagesurvey(self):
rdf = """
2
1
238
239
Automatically sampled by EIAO Observatory
2006-11-15 02:37+0100
"""
self.rdfrw.write(rdfheader + rdf + rdffooter)
testsubject = getTestSubjectFromPageSurvey(self.rdfrw.read,'http://www.eiao.net/rdf/2.0#PageSurvey_http://www.eiao.net/rdf/2.0/PageSurvey_0')
self.assert_(testsubject=='http://odin.dep.no/odinarkiv/norsk/jagland/ld/dok/nn.html')
def testscenariodata(self):
rdf = """
pagescenario
0.0975
"""
self.rdfrw.write(rdfheader + rdf + rdffooter)
typeofscenario,rangelocation,barrierindicator,pagesurvey = getScenarioData(self.rdfrw.read,'http://www.eiao.net/rdf/2.0#Scenario_1')
self.assert_(typeofscenario=='pagescenario')
self.assert_(rangelocation=='http://www.eiao.net/rdf/2.0#Scenario_1_range-1')
self.assert_(round(barrierindicator,10)==round(0.0975,10))
self.assert_(pagesurvey=='http://www.eiao.net/rdf/2.0#PageSurvey_http://www.eiao.net/rdf/2.0/PageSurvey_0')
def testsitesurveydata(self):
rdf = """
/var/local/cache/0/
http://example.com/
0.29385738292649372
0.010944420079764546
0.049026470588235393
583
"""
self.rdfrw.write(rdfheader + rdf + rdffooter)
basedownloaddir, website, barrierindicator, variance, errormargin, urlcount = getSiteSurveyData(self.rdfrw.read,'http://www.eiao.net/rdf/2.0/SiteSurvey_0')
self.assert_(basedownloaddir=='/var/local/cache/0/')
self.assert_(website=='http://example.com/')
self.assert_(round(barrierindicator,10)==round(0.29385738292649372,10))
self.assert_(round(variance,10)==round(0.010944420079764546,10))
self.assert_(round(errormargin,10)==round(0.049026470588235393,10))
self.assert_(urlcount==583)
def testmetadatarfomtestsubject(self):
rdf = """
2
1
238
239
Automatically sampled by EIAO Observatory
2006-11-15 02:37+0100
"""
self.rdfrw.write(rdfheader + rdf + rdffooter)
uncheckable, unavailable, contentlength, calculatedsize, title, date = getDataFromTestSubject(self.rdfrw.read,'http://odin.dep.no/odinarkiv/norsk/jagland/ld/dok/nn.html')
self.assert_(uncheckable==2)
self.assert_(unavailable==1)
self.assert_(contentlength==238)
self.assert_(calculatedsize==239)
self.assert_(title=='Automatically sampled by EIAO Observatory')
self.assert_(date=='2006-11-15 02:37+0100')
if __name__ == "__main__":
result += unittest.main()