2012-03-20 8 views
1

MySQLのCatalog of Lifeデータベース(http://www.catalogueoflife.org/services/index.html)に対処しようとしています。このデータベースには、種の科学的名前、有効な受け入れられた名前、無効なまたは古くなった同義語に関する情報が含まれています。この(http://www.catalogueoflife.org/services/Database_documentation.pdf)PDFどのようにデータベースの動作を説明すると以下の表を書いて、埋めるために使用されるコードです:生命データベース(MySQL)のカタログと種名を結合するには

CREATE DATABASE icol2011ac; 
USE icol2011ac; 
-- 
-- Table structure for table `author_string` 
-- 
DROP TABLE IF EXISTS `author_string`; 
/*!40101 SET @saved_cs_client  = @@character_set_client */; 
/*!40101 SET character_set_client = utf8 */; 
CREATE TABLE `author_string` (
`id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT, 
`string` varchar(255) NOT NULL COMMENT 'Name of author(s), who described the taxon or published the current combination and the year when appropriate.', 
PRIMARY KEY (`id`), 
UNIQUE KEY `string` (`string`) 
) ENGINE=MyISAM AUTO_INCREMENT=79193 DEFAULT CHARSET=utf8 COMMENT='Author citations of taxa and synonyms'; 
/*!40101 SET character_set_client = @saved_cs_client */; 
-- 
-- Table structure for table `hybrid` 
-- 
DROP TABLE IF EXISTS `hybrid`; 
/*!40101 SET @saved_cs_client  = @@character_set_client */; 
/*!40101 SET character_set_client = utf8 */; 
CREATE TABLE `hybrid` (
    `taxon_id` int(10) UNSIGNED NOT NULL, 
    `parent_taxon_id` int(10) UNSIGNED NOT NULL COMMENT 'References two (or three) parent taxon ids', 
    PRIMARY KEY (`taxon_id`,`parent_taxon_id`), 
    KEY `parent_taxon_id` (`parent_taxon_id`) 
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Links to parent taxa of hybrids'; 
/*!40101 SET character_set_client = @saved_cs_client */; 
-- 
-- Table structure for table `reference` 
-- 
DROP TABLE IF EXISTS `reference`; 
/*!40101 SET @saved_cs_client  = @@character_set_client */; 
/*!40101 SET character_set_client = utf8 */; 
CREATE TABLE `reference` (
    `id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT, 
    `authors` varchar(255) DEFAULT NULL COMMENT 'Complete author string', 
    `year` varchar(25) DEFAULT NULL COMMENT 'Year(s) of publication', 
    `title` varchar(255) DEFAULT NULL COMMENT 'Title of the publication', 
    `text` text COMMENT 'Additional information pertaining to the publication', 
    `uri_id` int(10) UNSIGNED DEFAULT NULL COMMENT 'Link to downloadable version', 
    PRIMARY KEY (`id`), 
    KEY `authors` (`authors`), 
    KEY `year` (`year`), 
    KEY `uri_id` (`uri_id`) 
) ENGINE=MyISAM AUTO_INCREMENT=60462 DEFAULT CHARSET=utf8 COMMENT='References used for taxa, common names and synonyms'; 
/*!40101 SET character_set_client = @saved_cs_client */; 
-- 
-- Table structure for table `reference_to_synonym` 
-- 
DROP TABLE IF EXISTS `reference_to_synonym`; 
/*!40101 SET @saved_cs_client  = @@character_set_client */; 
/*!40101 SET character_set_client = utf8 */; 
CREATE TABLE `reference_to_synonym` (
    `reference_id` int(10) UNSIGNED NOT NULL, 
    `synonym_id` int(10) UNSIGNED NOT NULL, 
    PRIMARY KEY (`reference_id`,`synonym_id`), 
    KEY `synonym_id` (`synonym_id`) 
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Links references to synonyms'; 
/*!40101 SET character_set_client = @saved_cs_client */; 
-- 
-- Table structure for table `reference_to_taxon` 
-- 
DROP TABLE IF EXISTS `reference_to_taxon`; 
/*!40101 SET @saved_cs_client  = @@character_set_client */; 
/*!40101 SET character_set_client = utf8 */; 
CREATE TABLE `reference_to_taxon` (
    `reference_id` int(10) UNSIGNED NOT NULL, 
    `taxon_id` int(10) UNSIGNED NOT NULL, 
    PRIMARY KEY (`reference_id`,`taxon_id`), 
    KEY `taxon_id` (`taxon_id`) 
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Links references to taxa'; 
/*!40101 SET character_set_client = @saved_cs_client */; 
-- 
-- Table structure for table `scientific_name_element` 
-- 
DROP TABLE IF EXISTS `scientific_name_element`; 
/*!40101 SET @saved_cs_client  = @@character_set_client */; 
/*!40101 SET character_set_client = utf8 */; 
CREATE TABLE `scientific_name_element` (
    `id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT, 
    `name_element` varchar(100) NOT NULL COMMENT 'Basic element of a scientific name; e.g. the epithet argentatus as used in Larus argentatus argenteus', 
    PRIMARY KEY (`id`), 
    UNIQUE KEY `name_element` (`name_element`) 
) ENGINE=MyISAM AUTO_INCREMENT=204459 DEFAULT CHARSET=utf8 COMMENT='Individual elements used to generate a scientific name'; 
/*!40101 SET character_set_client = @saved_cs_client */; 
-- 
-- Table structure for table `scientific_name_status` 
-- 
DROP TABLE IF EXISTS `scientific_name_status`; 
/*!40101 SET @saved_cs_client  = @@character_set_client */; 
/*!40101 SET character_set_client = utf8 */; 
CREATE TABLE `scientific_name_status` (
    `id` tinyint(2) UNSIGNED NOT NULL AUTO_INCREMENT, 
    `name_status` varchar(50) NOT NULL COMMENT 'Name status of a taxon', 
    PRIMARY KEY (`id`), 
    UNIQUE KEY `name_status` (`name_status`) 
) ENGINE=MyISAM AUTO_INCREMENT=6 DEFAULT CHARSET=utf8 COMMENT='Predetermined list of name statuses'; 
/*!40101 SET character_set_client = @saved_cs_client */; 
-- 
-- Table structure for table `scrutiny` 
-- 
DROP TABLE IF EXISTS `scrutiny`; 
/*!40101 SET @saved_cs_client  = @@character_set_client */; 
/*!40101 SET character_set_client = utf8 */; 
CREATE TABLE `scrutiny` (
    `id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT, 
    `scrutiny_date` date DEFAULT NULL COMMENT 'Most recent date a taxon name was verified; must parse correctly', 
    `original_scrutiny_date` varchar(100) DEFAULT NULL COMMENT 'Date as used in the original database; may be incomplete', 
    `specialist_id` int(10) UNSIGNED NOT NULL COMMENT 'Link to the specialist who examined the validity of a taxon', 
    PRIMARY KEY (`id`), 
    UNIQUE KEY `unique` (`scrutiny_date`,`specialist_id`,`original_scrutiny_date`), 
    KEY `scrutiny_date` (`scrutiny_date`), 
    KEY `specialist_id` (`specialist_id`) 
) ENGINE=MyISAM AUTO_INCREMENT=1271 DEFAULT CHARSET=utf8 COMMENT='Latest scrutiny date of a taxon'; 
/*!40101 SET character_set_client = @saved_cs_client */; 
-- 
-- Table structure for table `source_database` 
-- 
DROP TABLE IF EXISTS `source_database`; 
/*!40101 SET @saved_cs_client  = @@character_set_client */; 
/*!40101 SET character_set_client = utf8 */; 
CREATE TABLE `source_database` (
    `id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT, 
    `name` varchar(255) NOT NULL COMMENT 'Full name of the source database', 
    `abbreviated_name` varchar(50) DEFAULT NULL COMMENT 'Abbreviated name of the source database', 
    `group_name_in_english` varchar(255) DEFAULT NULL COMMENT 'Name in English of the group(s) treated in the database', 
    `authors_and_editors` varchar(255) DEFAULT NULL COMMENT 'Optional author(s) and editor(s) of the source database', 
    `organisation` varchar(255) DEFAULT NULL COMMENT 'Optional organisation which has compiled or is owning the source database', 
    `contact_person` varchar(255) DEFAULT NULL COMMENT 'Optional contact person of the source database', 
    `version` varchar(25) DEFAULT NULL COMMENT 'Optional version number of the source database', 
    `release_date` date DEFAULT NULL COMMENT 'Optional most recent release date of the source database', 
    `abstract` text COMMENT 'Optional free text field describing the source database', 
    #`taxonomic_coverage` text, 
    PRIMARY KEY (`id`), 
    UNIQUE KEY `name` (`name`,`abbreviated_name`) 
) ENGINE=MyISAM AUTO_INCREMENT=79 DEFAULT CHARSET=utf8 COMMENT='Information about source databases'; 
/*!40101 SET character_set_client = @saved_cs_client */; 
-- 
-- Table structure for table `specialist` 
-- 
DROP TABLE IF EXISTS `specialist`; 
/*!40101 SET @saved_cs_client  = @@character_set_client */; 
/*!40101 SET character_set_client = utf8 */; 
CREATE TABLE `specialist` (
    `id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT, 
    `name` varchar(100) NOT NULL, 
    PRIMARY KEY (`id`), 
    UNIQUE KEY `name` (`name`) 
) ENGINE=MyISAM AUTO_INCREMENT=182 DEFAULT CHARSET=utf8 COMMENT='Specialists who have verified the validity of taxa'; 
/*!40101 SET character_set_client = @saved_cs_client */; 
-- 
-- Table structure for table `synonym` 
-- 
DROP TABLE IF EXISTS `synonym`; 
/*!40101 SET @saved_cs_client  = @@character_set_client */; 
/*!40101 SET character_set_client = utf8 */; 
CREATE TABLE `synonym` (
    `id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT, 
    `taxon_id` int(10) UNSIGNED NOT NULL COMMENT 'Link to valid taxon to which the synonym relates', 
    `author_string_id` int(10) UNSIGNED DEFAULT NULL COMMENT 'Link to author citation of the synonym', 
    `scientific_name_status_id` tinyint(2) UNSIGNED NOT NULL COMMENT 'Link to the name status of the synonym', 
    `original_id` varchar(100) DEFAULT NULL, 
    PRIMARY KEY (`id`), 
    KEY `taxon_id` (`taxon_id`), 
    KEY `author_string_id` (`author_string_id`), 
    KEY `scientific_name_status_id` (`scientific_name_status_id`) 
) ENGINE=MyISAM AUTO_INCREMENT=7618428 DEFAULT CHARSET=utf8 COMMENT='Synonym details linked to a valid taxon'; 
/*!40101 SET character_set_client = @saved_cs_client */; 
-- 
-- Table structure for table `synonym_name_element` 
-- 
DROP TABLE IF EXISTS `synonym_name_element`; 
/*!40101 SET @saved_cs_client  = @@character_set_client */; 
/*!40101 SET character_set_client = utf8 */; 
CREATE TABLE `synonym_name_element` (
    `taxonomic_rank_id` tinyint(3) UNSIGNED NOT NULL, 
    `scientific_name_element_id` int(10) UNSIGNED NOT NULL, 
    `synonym_id` int(10) UNSIGNED NOT NULL, 
    `hybrid_order` tinyint(1) UNSIGNED DEFAULT NULL COMMENT 'Order of parents if synonym is a hybrid; see documentation for details', 
    UNIQUE KEY `unique` (`taxonomic_rank_id`,`synonym_id`), 
    KEY `taxonomic_rank_id` (`taxonomic_rank_id`), 
    KEY `scientific_name_element_id` (`scientific_name_element_id`), 
    KEY `synonym_id` (`synonym_id`) 
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Name elements of a complete synonym'; 
/*!40101 SET character_set_client = @saved_cs_client */; 
-- 
-- Table structure for table `taxon` 
-- 
DROP TABLE IF EXISTS `taxon`; 
/*!40101 SET @saved_cs_client  = @@character_set_client */; 
/*!40101 SET character_set_client = utf8 */; 
CREATE TABLE `taxon` (
    `id` int(10) UNSIGNED NOT NULL AUTO_INCREMENT, 
    `taxonomic_rank_id` tinyint(3) UNSIGNED NOT NULL, 
    `source_database_id` int(10) UNSIGNED DEFAULT NULL, 
    `original_id` varchar(100) DEFAULT NULL, 
    PRIMARY KEY (`id`), 
    KEY `taxonomic_rank_id` (`taxonomic_rank_id`), 
    KEY `source_database_id` (`source_database_id`) 
) ENGINE=MyISAM AUTO_INCREMENT=7618427 DEFAULT CHARSET=utf8 COMMENT='Scientific name elements and hierarchy of a taxon'; 
/*!40101 SET character_set_client = @saved_cs_client */; 
-- 
-- Table structure for table `taxonomic_coverage` 
-- 
DROP TABLE IF EXISTS `taxonomic_coverage`; 
CREATE TABLE `taxonomic_coverage` (
    `source_database_id` int(10) NOT NULL, 
    `taxon_id` int(10) NOT NULL, 
    `sector` tinyint(2) NOT NULL, 
    `point_of_attachment` tinyint(1) NOT NULL DEFAULT '0', 
    KEY `source_database_id` (`source_database_id`), 
    KEY `sector` (`sector`), 
    KEY `taxon_id` (`taxon_id`) 
) ENGINE=MyISAM DEFAULT CHARSET=utf8; 
-- 
-- Table structure for table `taxon_detail` 
-- 
DROP TABLE IF EXISTS `taxon_detail`; 
/*!40101 SET @saved_cs_client  = @@character_set_client */; 
/*!40101 SET character_set_client = utf8 */; 
CREATE TABLE `taxon_detail` (
    `taxon_id` int(10) UNSIGNED NOT NULL, 
    `author_string_id` int(10) UNSIGNED DEFAULT NULL COMMENT 'Link to author citation of the taxon', 
    `scientific_name_status_id` tinyint(2) UNSIGNED NOT NULL, 
    `scrutiny_id` int(10) UNSIGNED DEFAULT NULL, 
    `additional_data` text COMMENT 'Optional free text field describing the taxon', 
    PRIMARY KEY (`taxon_id`), 
    KEY `author_string_id` (`author_string_id`), 
    KEY `taxononomic_status_id` (`scientific_name_status_id`), 
    KEY `scrutiny_id` (`scrutiny_id`) 
) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='Details pertaining to species and infraspecies'; 
/*!40101 SET character_set_client = @saved_cs_client */; 
-- 
-- Table structure for table `taxon_name_element` 
-- 
DROP TABLE IF EXISTS `taxon_name_element`; 
/*!40101 SET @saved_cs_client  = @@character_set_client */; 
/*!40101 SET character_set_client = utf8 */; 
CREATE TABLE `taxon_name_element` (
    `taxon_id` int(10) UNSIGNED NOT NULL, 
    `scientific_name_element_id` int(10) UNSIGNED NOT NULL, 
    `parent_id` int(10) UNSIGNED DEFAULT NULL, 
    PRIMARY KEY (`taxon_id`), 
    KEY `scientific_name_element_id` (`scientific_name_element_id`), 
    KEY `parent_id` (`parent_id`) 
) ENGINE=MyISAM DEFAULT CHARSET=utf8; 
/*!40101 SET character_set_client = @saved_cs_client */; 
-- 
-- Table structure for table `taxonomic_rank` 
-- 
DROP TABLE IF EXISTS `taxonomic_rank`; 
/*!40101 SET @saved_cs_client  = @@character_set_client */; 
/*!40101 SET character_set_client = utf8 */; 
CREATE TABLE `taxonomic_rank` (
    `id` tinyint(3) UNSIGNED NOT NULL AUTO_INCREMENT, 
    `rank` varchar(50) NOT NULL COMMENT 'Taxonomic rank (e.g. family, subspecies)', 
    `marker_displayed` varchar(50) DEFAULT NULL, 
    `standard` tinyint(1) NOT NULL DEFAULT '0', 
    PRIMARY KEY (`id`), 
    UNIQUE KEY `rank` (`rank`) 
) ENGINE=MyISAM AUTO_INCREMENT=132 DEFAULT CHARSET=utf8 COMMENT='Predetermined list of taxonomic ranks'; 
/*!40101 SET character_set_client = @saved_cs_client */; 

/*!40101 SET [email protected]_SQL_MODE */; 
/*!40014 SET [email protected]_FOREIGN_KEY_CHECKS */; 
/*!40014 SET [email protected]_UNIQUE_CHECKS */; 
/*!40101 SET [email protected]_CHARACTER_SET_CLIENT */; 
/*!40101 SET [email protected]_CHARACTER_SET_RESULTS */; 
/*!40101 SET [email protected]_COLLATION_CONNECTION */; 
/*!40111 SET [email protected]_SQL_NOTES */; 

-- Dump completed on 2010-12-16 15:47:12 

-- Added quick fix for adding non-ISO countries and languages to ISO tables 

ALTER TABLE `language` ADD `standard` TINYINT(1) NOT NULL DEFAULT '1'; 
ALTER TABLE `country` ADD `standard` TINYINT(1) NOT NULL DEFAULT '1'; 
ALTER TABLE `country` CHANGE `iso` `iso` CHAR(3) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT 'ISO 3166-1-Alpha-2 code'; 
ALTER TABLE `common_name` CHANGE `country_iso` `country_iso` CHAR(3) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT 'Optional country code if usage is restricted to a particular country' ; 


#PART2: importing files into MySQL 

LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/author_string.txt' INTO TABLE author_string FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'; 
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/hybrid.txt' INTO TABLE hybrid FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'; 
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/reference.txt' INTO TABLE reference FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'; 
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/reference_to_synonym.txt' INTO TABLE reference_to_synonym FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'; 
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/reference_to_taxon.txt' INTO TABLE reference_to_taxon FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'; 
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/region.txt' INTO TABLE region FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'; 
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/scientific_name_element.txt' INTO TABLE scientific_name_element FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'; 
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/scientific_name_status.txt' INTO TABLE scientific_name_status FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'; 
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/scrutiny.txt' INTO TABLE scrutiny FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'; 
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/source_database.txt' INTO TABLE source_database FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'; 
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/specialist.txt' INTO TABLE specialist FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'; 
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/synonym.txt' INTO TABLE synonym FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'; 
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/synonym_name_element.txt' INTO TABLE synonym_name_element FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'; 
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/taxon.txt' INTO TABLE taxon FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'; 
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/taxon_detail.txt' INTO TABLE taxon_detail FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'; 
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/taxon_name_element.txt' INTO TABLE taxon_name_element FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'; 
LOAD DATA INFILE 'C:/Quasi Eigene Dateien/Catalogue of Life/2011AC_baseschema_dump/taxonomic_rank.txt' INTO TABLE taxonomic_rank FIELDS TERMINATED BY '\t' LINES TERMINATED BY '\n'; 

私はさらに持っています私がCatalog of Lifeデータベースと照合する必要がある学名(有効な名前と無効な名前の両方)を含む博物館標本の長いテーブル。したがって、私のテーブルの各名前については、有効な名前であるかどうかをチェックする必要があります。無効なシノニムの場合は、現在有効な名前を割り当てます。私の博物館の標本テーブルの

名は次の形式を持っています。 属の種は形容亜種小名の著者/年 例えばベリスperennis - L. 各行は常に属および種の情報が含まれています、亜種および著者/年に関する情報は常に与えられるとは限らない。

この分類学的マッチアップのために必要なクエリコードは、1つのテーブルから最大3つの要素(属名要素、種名要素、時には亜種の名前要素)の組み合わせだけであると考えることは非常に困難です。 taxon id、taxonomic rank、およびtaxon name statusに関する情報は、他のテーブルから有効な名前と同義語を作成します。

私の博物館の標本は、属名、種名、時には亜種名および/または著者がそれぞれ異なる列に格納された1つの大きなテーブルに格納されています。

それに応じてクエリコードを構造化するソリューションは、非常に高く評価されます。これまでに時間をかけてくれてありがとう!

答えて

0

私はあなたの問題をMySQLのクエリで解決しようとしません。手続き型プログラミング言語を使用して、記述する複雑さに対応できるプログラムを作成します。あなた(またはあなたの周りの人)が慣れているかどうかに応じて、perl、python、またはPHPを使ってコードを書くことができます。

このアプリケーションは、あなたの博物館のテーブルの各ラインを通過し、生涯のカタログに一致するように1行に多数の試行を行うことができます。複数の行にマッチするかもしれませんが、亜種を追加することによって曖昧さを取り除く必要があるかもしれませんが、正確には一致するが部分的に一致するものは見つからないかもしれません。あなたのプログラムは、これらのケースのそれぞれを包括的に扱い、最も適した出力を提供します。

関連する問題