drop SCHEMA eiaourlrep CASCADE; create schema eiaourlrep; grant all on schema eiaourlrep to eiaourlrep; grant all on database eiaourlrep to eiaourlrep; create table eiaourlrep.Site( -- Table containting site information siteid serial, -- Dumb ID for site to increase performance, domain varchar(1024) not null, -- Domain URL, title varchar(1024) not null, -- Title of the Site, isActive boolean default TRUE, -- Boolaen value if the site is active or not, smallSite boolean default False, -- Boolean value when a site is small or not source varchar(1024) not null, -- Name of the person added the URL, nuts varchar(11) not null, -- continent, country, nuts3 - E.g. EU-AT-AT010 nace varchar(5) not null, --categoryTop, categorySecond - E.g. 47.91 for Retail sale via mail order houses or via Internet sampleCount int2, -- Number of samples extracted for the site at the last crawl schedulingerror int2 default 0, --Number of times a site failed after rescheduling timeout int2 -- If a timeout has happened or not. ); create table eiaourlrep.inclusionexclusion( patternid serial, -- Dumb ID to increase performance patterntype varchar(1), -- + for inclusion, - for exlusion regexp text, -- actual regular expression orderid smallint, -- order of the regular expressions siteid int2-- ID of the corresponding site. ); -- Note that the rules of inclusionexlusion have to be processed starting with the most specific moving to -- the more general. Example of such a rules for a site with ID 1234 is (note that patternid is not used for anything). -- In the following example, the scope rules include everything from the site http://www.example.com/site1/ except -- http://www.example.com/site1/admin/. However, the parts of the sites under /admin/private/ and /admin/public/ should -- be included: -- patternid, patterntype, regexp,orderid,siteid -- 0, +, http://www\.example\.com/site1/admin/private/.*,0, 1234 -- 1, +, http://www\.example\.com/site1/admin/public/.*, 1, 1234 -- 2, -, http://www\.example\.com/site1/admin/.*, 2, 1234 -- 3, +, http://www\.example\.com/site1/.*, 3, 1234 alter table eiaourlrep.inclusionexclusion add primary key(patternid); alter table eiaourlrep.Site add primary key(domain); create table eiaourlrep.Page( -- Table containing page information domain character varying(1024) not null, -- Domain URL url character varying(1024) not null, -- URL of part of the page, e.g. (X)HTML or CSS. typeofpage int2 default 0, -- type of page 0 for HTMl 1 for PDF unavailable int2 default 0, -- Number of times a page is unavailable pageindex integer not null, -- Position of the page in the scenario - The order of which URLs are imported in the sceanrios timestamplastmodified double precision default 0, -- Timestamp of last-modified if any timestampdownloaded double precision not null default 0, --Timestamp of last time downloaded isdownloaded boolean default false, -- If a page is downloaded or not - deprecated testrunid character varying(8) default '0', -- ID of the testrun filename character varying(1024), --filename location (optional) id serial, -- Stupid primary key pagehash numeric not null default 0, -- hash of page, scenarioid numeric not null default 0 -- ID of scenario ); create table eiaourlrep.TypeOfPage( typeofdocument character varying(1024) not null, typeofpage int2 not null ); insert into eiaourlrep.TypeOfPage (typeofdocument,typeofpage) values('HTML',0); insert into eiaourlrep.TypeOfPage (typeofdocument,typeofpage) values('PDF',1); create table eiaourlrep.SiteExhaustiveScan( domain character varying(1024) not null, -- Domain URL testrunid character varying(8) default '0', -- ID of the testrun isexhaustive boolean default False ); create table eiaourlrep.Domains( -- Table containing domain informaion domain varchar(1024) not null, -- Domain URL alias varchar(1024) not null --Alias URL of the domain ); create index Page_domain_alias_IDX on eiaourlrep.Domains(domain); create index Page_domain_IDX on eiaourlrep.Page(domain,testrunid); create UNIQUE index page_scenriourl on eiaourlrep.Page("domain", url, testrunid, scenarioid); alter table eiaourlrep.Page add foreign key(domain) references eiaourlrep.Site(domain); grant all on TABLE eiaourlrep.Site to eiaourlrep; grant all on TABLE eiaourlrep.Page to eiaourlrep; grant all on TABLE eiaourlrep.Domains to eiaourlrep; grant all on eiaourlrep.site_siteid_seq to eiaourlrep; grant all on eiaourlrep.page_id_seq to eiaourlrep;