Tuesday, March 11, 2014

Mysql script for importing OECD regpat


OECD regpat is a set plug and play tables that set on Patstat adding a batch of useful info as NUTS code, IPC, counts etc (see for more info http://www.oecd.org/sti/inno/oecdpatentdatabases.htm)

I put here a small script for importing in mysql 2 of the gived tables: ep_app_reg and ep_inv_reg giving especially regional NUTS3 code for EP inventors and applicants.

just be careful to replace YOURDIRECTORYHERE with the folder where you put the files from regpat.



-- Import REGPAT Mysql script
-- List of EPO applicants

drop table if exists EP_APP_REG;
drop table if exists EP_INV_REG;

create table  EP_APP_REG
(App_nbr varchar(13),
Appln_id varchar(11),
Pub_nbr varchar(11), -- EPO patent publication number
Person_id varchar(11), -- Surrogate key - person identifier in PATSTAT, October 2013
App_name varchar(300), -- Applicantfs name
Address varchar(500), -- Address
Reg_code varchar(10), -- NUTS3/TL3 region code
Ctry_code varchar(2), -- ISO 2 country code
Reg_share2 varchar(7), -- Multiple allocation to a region - Share . 1
App_share3 varchar(7)) -- Applicantfs share . 1
ENGINE=MyISAM
CHARACTER SET utf8 COLLATE utf8_general_ci;

create table  EP_INV_REG
(App_nbr varchar(13),
Appln_id varchar(11),
Pub_nbr varchar(11), -- EPO patent publication number
Person_id varchar(11), -- Surrogate key - person identifier in PATSTAT, October 2013
Inv_name varchar(300), -- Applicantfs name
Address varchar(500), -- Address
Reg_code varchar(10), -- NUTS3/TL3 region code
Ctry_code varchar(2), -- ISO 2 country code
Reg_share2 varchar(7), -- Multiple allocation to a region - Share . 1
Inv_share3 varchar(7)) -- Applicantfs share . 1
ENGINE=MyISAM
CHARACTER SET utf8 COLLATE utf8_general_ci;

LOAD DATA INFILE 'YOURDIRECTORYHERE/201401_EPO_App_reg.txt' INTO TABLE EP_APP_REG
FIELDS TERMINATED BY '|' OPTIONALLY ENCLOSED BY '"'
lines terminated by '\r\n'
IGNORE 1 LINES ;

LOAD DATA INFILE 'patstat/raw/201401_EPO_Inv_reg.txt' INTO TABLE EP_INV_REG
FIELDS TERMINATED BY '|' OPTIONALLY ENCLOSED BY '"'
lines terminated by '\r\n'
IGNORE 1 LINES -- fixes format data: default import = text

ALTER TABLE ep_app_reg MODIFY COLUMN Appln_id INT(11) UNSIGNED DEFAULT NULL,
 MODIFY COLUMN Person_id INT(11) UNSIGNED DEFAULT NULL,
 MODIFY COLUMN Reg_share2 DOUBLE DEFAULT NULL,
 MODIFY COLUMN App_share3 DOUBLE DEFAULT NULL;

ALTER TABLE ep_inv_reg MODIFY COLUMN Appln_id INT(11) UNSIGNED DEFAULT NULL,
 MODIFY COLUMN Person_id INT(11) UNSIGNED DEFAULT NULL,
 MODIFY COLUMN Reg_share2 DOUBLE DEFAULT NULL,
 MODIFY COLUMN Inv_share3 DOUBLE DEFAULT NULL;

No comments:

Post a Comment