-- Create the datastaging schema. This schema is only used by the ETL tool -- during load of the DW and should *not* be used in any other situation! drop schema if exists datastaging cascade; create schema datastaging; revoke all on schema datastaging from public; -- Populate the datastaging schema. -- Sources for the time dimensions -- No sequence for this table which will be prefilled create table datastaging.Hour( HourID int2, Hour char(2) ); -- No sequence for this table which will be prefilled create table datastaging.Minute( MinuteID int2, Minute char(2), HourID int2 ); -- Sources for the date dimensions -- No sequence for this table. The ID will be calculated by the ETL tool create table datastaging.Year( YearID int2, Year int2 ); -- No sequence for this table. The ID will be calculated by the ETL tool create table datastaging.Quarter( QuarterID int2, QuarterName varchar(7), QuarterNumberInYear int2, YearID int2 ); -- No sequence for this table. The ID will be calculated by the ETL tool create table datastaging.Month( MonthID int2, MonthNumberInYear int2, MonthName varchar(9), DaysInMonth int2, MonthNumberAfterEIAOStart int2, QuarterID int2 ); -- No sequence for this table. The ID will be calculated by the ETL tool create table datastaging.Week( WeekID int2, WeekNumberInYear int2, YearID int2 ); -- No sequence for this table. The ID will be calculated by the ETL tool create table datastaging.Date( DateID int2, DayNumberInMonth int2, DayNumberInWeek int2, DayName varchar(9), Date date, MonthID int2, WeekID int2 ); -- Sources for Subject dimension -- No sequence for this table which will be prefilled create table datastaging.Country( CountryID int2, Country varchar(128), CountryAbbreviation varchar(2), EUMember varchar(17) ); -- No sequence for this table which will be prefilled create table datastaging.NutsLevel1( NutsLevel1ID int2, Nuts1Code varchar(3), Nuts1Name varchar(256), CountryID int2 ); -- No sequence for this table which will be prefilled create table datastaging.NutsLevel2( NutsLevel2ID int2, Nuts2Code varchar(4), Nuts2Name varchar(256), NutsLevel1ID int2 ); -- No sequence for this table which will be prefilled create table datastaging.NutsLevel3( NutsLevel3ID int2, Nuts3Code varchar(5), Nuts3Name varchar(256), NutsLevel2ID int2 ); create sequence datastaging.NaceID_seq; create table datastaging.Nace( NaceID int2 default nextval('datastaging.NaceID_seq'), NaceCode varchar(32), NaceDescription varchar(1024) ); create sequence datastaging.TopLevelDomainID_seq; create table datastaging.TopLevelDomain( TopLevelDomainID int2 default nextval('datastaging.TopLevelDomainID_seq'), TopLevelDomain varchar(10) -- currently the longest ("museum") is 6 ); create index TopLevelDomain_TopLevelDomain_idx on datastaging.TopLevelDomain(TopLevelDomain varchar_pattern_ops); create sequence datastaging.SecondLevelDomainID_seq; create table datastaging.SecondLevelDomain( SecondLevelDomainID int2 default nextval('datastaging.SecondLevelDomainID_seq'), SecondLevelDomain varchar(74), -- 63 (max. label length) + 10 (top level) + 1 (dot) TopLevelDomainID int2 ); create index SecondLevelDomain_SecondLevelDomain_idx on datastaging.SecondLevelDomain(SecondLevelDomain varchar_pattern_ops); create sequence datastaging.SiteID_seq; create table datastaging.Site( SiteID int2 default nextval('datastaging.SiteID_seq'), Site varchar(255), SiteTitle varchar(1024), AddedBy varchar(256), SecondLevelDomainID int2, NutsLevel3ID int2, NaceID int2 ); create index Site_DomainID_idx on datastaging.Site(SecondLevelDomainID); create sequence datastaging.ResourceID_seq; create table datastaging.Resource( ResourceID int4 default nextval('datastaging.ResourceID_seq'), URL varchar(4096), SiteID int2 ); create index Resource_URL_idx on datastaging.Resource(URL varchar_pattern_ops); create sequence datastaging.LanguageFamilyID_seq; create table datastaging.LanguageFamily( LanguageFamilyID int2 default nextval('datastaging.LanguageFamilyID_seq'), LanguageFamily varchar(50), LanguageFamilyAbbreviation varchar(2) ); create index LanguageFamily_LanguageFamilyAbbreviation_idx on datastaging.LanguageFamily(LanguageFamilyAbbreviation varchar_pattern_ops); create sequence datastaging.LanguageID_seq; create table datastaging.Language( LanguageID int2 default nextval('datastaging.LanguageID_seq'), Language varchar(100), LanguageAbbreviation varchar(5), LanguageFamilyID int2 ); create index Language_LanguageAbbreviation_idx on datastaging.Language(LanguageAbbreviation varchar_pattern_ops); -- No sequence for this table which will be prefilled create table datastaging.OperatingSystemFamily( OperatingSystemFamilyID int2, OperatingSystemFamily varchar(20) ); create sequence datastaging.ServerID_seq; create table datastaging.Server( ServerID int2 default nextval('datastaging.ServerID_seq'), Server varchar(256), ServerVersion varchar(256), CompleteServerName varchar(1024), OperatingSystemFamilyID int2 ); create index Server_CompleteServerName_idx on datastaging.Server(CompleteServerName varchar_pattern_ops); -- No sequence for this table. Currently, TestRunID is given by TestRunNumber (but this might change later on!) create table datastaging.TestRun( TestRunID int4, TestRunNumber int4 ); create sequence datastaging.ResourceVersionID_seq; create table datastaging.ResourceVersion( ResourceVersionID int4 default nextval('datastaging.ResourceVersionID_seq'), Doctype varchar(1024), Encoding varchar(20), ContentType varchar(64), ContentLength int4, ActualSize int4, CopyLocation varchar(4096), AuthoringTool varchar(1024), ResourceID int4, ServerID int2, TestRunID int4, MinuteID int2, DateID int2 ); create index ResourceVersion_ResourceID_CopyLocation_idx on datastaging.ResourceVersion(ResourceID, CopyLocation varchar_pattern_ops); create table datastaging.LanguageUsage( ResourceVersionID int4, LanguageID int2 ); create sequence datastaging.SubjectID_seq maxvalue 1073741823 cache 5000; create sequence datastaging.SubjectID_seq_for_ETL_tool increment by 10000 minvalue 1073741824; create table datastaging.Subject( SubjectID int4 default nextval('datastaging.SubjectID_seq'), Line int4, Col int4, ResourceVersionID int4 ); create index Subject_ResourceVersionID_Line_Col_idx on datastaging.Subject(ResourceVersionID, Line, Col); -- MediaType dimension create sequence datastaging.MediaTypeID_seq; create table datastaging.MediaType( MediaTypeID int2, MediaType varchar(256) ); create table datastaging.ResourceMediaTypeUsage( ResourceVersionID int4, MediaTypeID int2 ); create table datastaging.ScenarioMediaTypeUsage( ScenarioID int4, MediaTypeID int2 ); -- Sources for Scenario dimension create sequence datastaging.ScenarioID_seq maxvalue 1073741823; create sequence datastaging.ScenarioID_seq_for_ETL_tool increment by 10000 minvalue 1073741824; create table datastaging.Scenario( ScenarioID int4 default nextval('datastaging.ScenarioID_seq'), SiteID int2, TestRunID int4 ); create table datastaging.ScenarioCoverage( ScenarioID int4, ResourceVersionID int4 ); -- Sources for Result dimension -- No sequence for this table which will be prefilled create table datastaging.ResultType( ResultTypeID int2, ResultType varchar(10) ); -- No sequence for this table which will be prefilled create table datastaging.Result( ResultID int2, TextualDescription varchar(1024), ShortDescription varchar(128), ResultTypeID int2 ); -- Sources for UWEMTest dimension -- No sequence for this table which will be prefilled create table datastaging.UWEMTestType( UWEMTestTypeID int2, UWEMTestType varchar(64) ); -- No sequence for this table which will be prefilled create table datastaging.UWEMTest( UWEMTestID int2, UWEMTestName varchar(64), UWEMTestTypeID int2, BarrierComputationID int2 ); create index UWEMTest_UWEMTestName on datastaging.UWEMTest(UWEMTestName varchar_pattern_ops); -- Source for DisabilityGroup dimension -- No sequence for this table which will be prefilled create table datastaging.DisabilityGroup( DisabilityGroupID int2, DisabilityGroup varchar(128) ); create table datastaging.DisabilityGroupRelevance_Fcui( DisabilityGroupID int2, BarrierComputationID int2, BarrierProbability real ); -- Sources for BarrierComputationVersion dimension -- No sequence for this table which will be prefilled create table datastaging.WCAGType( WCAGTypeID int2, WCAGType char(3) ); -- No sequence for this table which will be prefilled create table datastaging.WCAGMajor( WCAGMajorID int2, WCAGMajor int2, Guideline varchar(1024), GuidelineURL varchar(256), WCAGTypeID int2 ); -- No sequence for this table which will be prefilled create table datastaging.WCAGMinor( WCAGMinorID int2, WCAGMinor int2, Priority int2, Checkpoint varchar(1024), CheckpointURL varchar(256), WCAGMajorID int2 ); create table datastaging.Technique( TechniqueID int2, Technique varchar(255) ); create table datastaging.TestMode( TestModeID int2, TestMode varchar(255) ); -- No sequence for this table which will be prefilled create table datastaging.BarrierComputation( BarrierComputationID int2, BarrierComputationName varchar(256), BarrierComputationRelease int2, BarrierComputationProducer varchar(256), Iterator int2, WCAGMinorID int2, TechniqueID int2, TestModeID int2 ); create index BarrierComputation_BarrierComputationName on datastaging.BarrierComputation(BarrierComputationName varchar_pattern_ops); create sequence datastaging.BarrierComputationVersionID_seq; create table datastaging.BarrierComputationVersion( BarrierComputationVersionID int2 default nextval('datastaging.BarrierComputationVersionID_seq'), BarrierComputationVersionName varchar(256), Version int2, FalseNegativeProb real, FalsePositiveProb real, BarrierComputationID int2 ); create sequence datastaging.ETLRunID_seq; create table datastaging.ETLRun( ETLRunID int2, SourceModel varchar(256), SourceDB varchar(256), TimeUsage int4, ETLVersionID int2, DateID int2, MinuteID int2 ); create sequence datastaging.ETLVersionID_seq; create table datastaging.ETLVersion( ETLVersionID int2, Major int2, Minor int2, Build int2 ); -- Fact table create table datastaging.TestResult( Probability real, SubjectID int4, MinuteID int2, DateID int2, ResultID int2, BarrierComputationVersionID int2, ETLRunID int2, TestRunID int4 ); -- TechnologyFinding fact table create sequence datastaging.MimeTypeID_seq; create table datastaging.MimeType( MimeTypeID int2, MimeType varchar(256), MimeSubtype varchar(256) ); create table datastaging.InclusionType( InclusionTypeID int2, InclusionType varchar(256) ); create table datastaging.TechnologyFinding( MimeTypeID int2, InclusionTypeID int2, SubjectID int4, DateID int2, MinuteID int2, TestRunID int4 ); -- bridgeDataValid will check if the data sets in the bridge tables -- languageusage, resourcemediatypeusage and scenariomediatypeusage are valid. -- In other words, the following is done: -- It is tested that either a scenario or resource belongs to both -- None or unknown and at the same time also belongs to another result CREATE OR REPLACE FUNCTION datastaging.bridgeDataValid() RETURNS BOOLEAN AS $$ DECLARE data RECORD; res BOOLEAN := 't'; BEGIN FOR data IN SELECT a.LanguageID, a.ResourceVersionID FROM datastaging.LanguageUsage a JOIN datastaging.LanguageUsage b ON (a.ResourceVersionID = b.ResourceVersionID) WHERE (b.LanguageID = -1 -- for Unknown OR b.LanguageID = -2) -- for None AND a.LanguageID != b.LanguageID LOOP RAISE NOTICE 'ResourceVersion with ID % has language Unknown or None but also a language with ID %', data.ResourceVersionID, data.LanguageID; res := 'f'; END LOOP; FOR data IN SELECT a.MediaTypeID, a.ResourceVersionID FROM datastaging.ResourceMediaTypeUsage a JOIN datastaging.ResourceMediaTypeUsage b ON (a.ResourceVersionID = b.ResourceVersionID) WHERE (b.MediaTypeID = -1 -- for Unknown OR b.MediaTypeID = -2) -- for None AND a.MediaTypeID != b.MediaTypeID LOOP RAISE NOTICE 'ResourceVersion with ID % has MediaType Unknown or None but also a MediaType with ID %', data.ResourceVersionID, data.MediaTypeID; res := 'f'; END LOOP; FOR data IN SELECT a.MediaTypeID, a.ScenarioID FROM datastaging.ScenarioMediaTypeUsage a JOIN datastaging.ScenarioMediaTypeUsage b ON (a.ScenarioID = b.ScenarioID) WHERE (b.MediaTypeID = -1 -- for Unknown OR b.MediaTypeID = -2) -- for None AND a.MediaTypeID != b.MediaTypeID LOOP RAISE NOTICE 'Scenario with ID % has MediaType Unknown or None but also a MediaType with ID %', data.ScenarioID, data.MediaTypeID; res := 'f'; END LOOP; RETURN res; END; $$ LANGUAGE plpgsql; -- Returns the number of ambiguous tests, this should always be 0. -- An ambiguous test is one that has two different outcomes, this -- could be different types of failed, or two different outcomes -- altogether passed/failed/cannotTell. CREATE OR REPLACE FUNCTION datastaging.NumberOfAmbiguousTests() RETURNS INT AS $$ DECLARE result INT := -1; BEGIN SELECT count(*) INTO result FROM datastaging.TestResult as tr1, datastaging.TestResult as tr2 WHERE tr1.SubjectID=tr2.SubjectID AND tr1.BarrierComputationVersionID=tr2.BarrierComputationVersionID AND tr1.ResultID