2016-12-23 9 views
-1

パンダのデータフレームのファイルからdouble serializeされたjsonオブジェクトを読みたいです。 jsonの例は以下である 、使用パンダデータフレームでこれを読むパンダのデータフレームでシリアル化されたjsonを読み取る

{"input":"8\t140630920\t.\tC\tT\t840.948\t.","assembly_name":"GRCh37","end":140630920,"seq_region_name":"8","transcript_consequences":[{"source":"Ensembl","variant_allele":"T","cdna_end":770,"phenotypes":[{"source":"MIM_disease","end":140715299,"seq_region_name":"8","attrib_type":"Gene","external_id":612292,"strand":"-","phenotype":"BIRK-BAREL MENTAL RETARDATION DYSMORPHISM SYNDROME","type":"Gene","id":"ENSG00000169427","start":140613081},{"source":"OMIM","risk_allele":1,"end":140630920,"seq_region_name":"8","strand":"+","phenotype":"BIRK-BAREL SYNDROME","associated_gene":"KCNK9","variation_names":"rs121908332","type":"Variation","id":"rs121908332","start":140630920},{"source":"ClinVar","clinvar_clin_sig":"pathogenic","review_status":"no assertion criteria provided","risk_allele":"T","end":140630920,"seq_region_name":"8","external_id":"RCV000005007.1","associated_gene":"KCNK9","phenotype":"Birk Barel mental retardation dysmorphism syndrome","strand":"+","type":"Variation","id":"rs121908332","start":140630920}],"codons":"Ggg/Agg","protein_end":236,"strand":-1,"amino_acids":"G/R","cdna_start":770,"transcript_id":"ENST00000520439","cds_start":706,"gene_id":"ENSG00000169427","protein_start":236,"cds_end":706,"consequence_terms":["missense_variant"],"impact":"MODERATE"},{"source":"RefSeq","variant_allele":"T","cdna_end":770,"phenotypes":[{"source":"MIM_disease","end":140715299,"seq_region_name":"8","attrib_type":"Gene","external_id":612292,"strand":"-","phenotype":"BIRK-BAREL MENTAL RETARDATION DYSMORPHISM SYNDROME","type":"Gene","id":"ENSG00000169427","start":140613081},{"source":"OMIM","risk_allele":1,"end":140630920,"seq_region_name":"8","strand":"+","phenotype":"BIRK-BAREL SYNDROME","associated_gene":"KCNK9","variation_names":"rs121908332","type":"Variation","id":"rs121908332","start":140630920},{"source":"ClinVar","clinvar_clin_sig":"pathogenic","review_status":"no assertion criteria provided","risk_allele":"T","end":140630920,"seq_region_name":"8","external_id":"RCV000005007.1","associated_gene":"KCNK9","phenotype":"Birk Barel mental retardation dysmorphism syndrome","strand":"+","type":"Variation","id":"rs121908332","start":140630920}],"codons":"Ggg/Agg","protein_end":236,"strand":-1,"amino_acids":"G/R","cdna_start":770,"transcript_id":"NM_016601.2","cds_start":706,"gene_id":51305,"protein_start":236,"cds_end":706,"consequence_terms":["missense_variant"],"impact":"MODERATE"},{"source":"RefSeq","variant_allele":"T","cdna_end":764,"phenotypes":[{"source":"MIM_disease","end":140715299,"seq_region_name":"8","attrib_type":"Gene","external_id":612292,"strand":"-","phenotype":"BIRK-BAREL MENTAL RETARDATION DYSMORPHISM SYNDROME","type":"Gene","id":"ENSG00000169427","start":140613081},{"source":"OMIM","risk_allele":1,"end":140630920,"seq_region_name":"8","strand":"+","phenotype":"BIRK-BAREL SYNDROME","associated_gene":"KCNK9","variation_names":"rs121908332","type":"Variation","id":"rs121908332","start":140630920},{"source":"ClinVar","clinvar_clin_sig":"pathogenic","review_status":"no assertion criteria provided","risk_allele":"T","end":140630920,"seq_region_name":"8","external_id":"RCV000005007.1","associated_gene":"KCNK9","phenotype":"Birk Barel mental retardation dysmorphism syndrome","strand":"+","type":"Variation","id":"rs121908332","start":140630920}],"codons":"Ggg/Agg","protein_end":236,"strand":-1,"amino_acids":"G/R","cdna_start":764,"transcript_id":"XM_005250954.1","cds_start":706,"gene_id":51305,"protein_start":236,"cds_end":706,"consequence_terms":["missense_variant"],"impact":"MODERATE"},{"source":"Ensembl","variant_allele":"T","cdna_end":755,"phenotypes":[{"source":"MIM_disease","end":140715299,"seq_region_name":"8","attrib_type":"Gene","external_id":612292,"strand":"-","phenotype":"BIRK-BAREL MENTAL RETARDATION DYSMORPHISM SYNDROME","type":"Gene","id":"ENSG00000169427","start":140613081},{"source":"OMIM","risk_allele":1,"end":140630920,"seq_region_name":"8","strand":"+","phenotype":"BIRK-BAREL SYNDROME","associated_gene":"KCNK9","variation_names":"rs121908332","type":"Variation","id":"rs121908332","start":140630920},{"source":"ClinVar","clinvar_clin_sig":"pathogenic","review_status":"no assertion criteria provided","risk_allele":"T","end":140630920,"seq_region_name":"8","external_id":"RCV000005007.1","associated_gene":"KCNK9","phenotype":"Birk Barel mental retardation dysmorphism syndrome","strand":"+","type":"Variation","id":"rs121908332","start":140630920}],"codons":"Ggg/Agg","protein_end":236,"strand":-1,"amino_acids":"G/R","cdna_start":755,"transcript_id":"ENST00000522317","cds_start":706,"gene_id":"ENSG00000169427","protein_start":236,"cds_end":706,"consequence_terms":["missense_variant","NMD_transcript_variant"],"impact":"MODERATE"},{"source":"Ensembl","variant_allele":"T","cdna_end":770,"phenotypes":[{"source":"MIM_disease","end":140715299,"seq_region_name":"8","attrib_type":"Gene","external_id":612292,"strand":"-","phenotype":"BIRK-BAREL MENTAL RETARDATION DYSMORPHISM SYNDROME","type":"Gene","id":"ENSG00000169427","start":140613081},{"source":"OMIM","risk_allele":1,"end":140630920,"seq_region_name":"8","strand":"+","phenotype":"BIRK-BAREL SYNDROME","associated_gene":"KCNK9","variation_names":"rs121908332","type":"Variation","id":"rs121908332","start":140630920},{"source":"ClinVar","clinvar_clin_sig":"pathogenic","review_status":"no assertion criteria provided","risk_allele":"T","end":140630920,"seq_region_name":"8","external_id":"RCV000005007.1","associated_gene":"KCNK9","phenotype":"Birk Barel mental retardation dysmorphism syndrome","strand":"+","type":"Variation","id":"rs121908332","start":140630920}],"codons":"Ggg/Agg","protein_end":236,"strand":-1,"amino_acids":"G/R","cdna_start":770,"transcript_id":"ENST00000303015","cds_start":706,"gene_id":"ENSG00000169427","protein_start":236,"cds_end":706,"consequence_terms":["missense_variant"],"impact":"MODERATE"},{"gene_id":"ENSG00000169427","source":"Ensembl","distance":1672,"variant_allele":"T","phenotypes":[{"source":"MIM_disease","end":140715299,"seq_region_name":"8","attrib_type":"Gene","external_id":612292,"strand":"-","phenotype":"BIRK-BAREL MENTAL RETARDATION DYSMORPHISM SYNDROME","type":"Gene","id":"ENSG00000169427","start":140613081},{"source":"OMIM","risk_allele":1,"end":140630920,"seq_region_name":"8","strand":"+","phenotype":"BIRK-BAREL SYNDROME","associated_gene":"KCNK9","variation_names":"rs121908332","type":"Variation","id":"rs121908332","start":140630920},{"source":"ClinVar","clinvar_clin_sig":"pathogenic","review_status":"no assertion criteria provided","risk_allele":"T","end":140630920,"seq_region_name":"8","external_id":"RCV000005007.1","associated_gene":"KCNK9","phenotype":"Birk Barel mental retardation dysmorphism syndrome","strand":"+","type":"Variation","id":"rs121908332","start":140630920}],"consequence_terms":["upstream_gene_variant"],"strand":-1,"transcript_id":"ENST00000523477","impact":"MODIFIER"},{"gene_id":"ENSG00000169427","source":"Ensembl","distance":2630,"variant_allele":"T","phenotypes":[{"source":"MIM_disease","end":140715299,"seq_region_name":"8","attrib_type":"Gene","external_id":612292,"strand":"-","phenotype":"BIRK-BAREL MENTAL RETARDATION DYSMORPHISM SYNDROME","type":"Gene","id":"ENSG00000169427","start":140613081},{"source":"OMIM","risk_allele":1,"end":140630920,"seq_region_name":"8","strand":"+","phenotype":"BIRK-BAREL SYNDROME","associated_gene":"KCNK9","variation_names":"rs121908332","type":"Variation","id":"rs121908332","start":140630920},{"source":"ClinVar","clinvar_clin_sig":"pathogenic","review_status":"no assertion criteria provided","risk_allele":"T","end":140630920,"seq_region_name":"8","external_id":"RCV000005007.1","associated_gene":"KCNK9","phenotype":"Birk Barel mental retardation dysmorphism syndrome","strand":"+","type":"Variation","id":"rs121908332","start":140630920}],"consequence_terms":["upstream_gene_variant"],"strand":-1,"transcript_id":"ENST00000519923","impact":"MODIFIER"}],"strand":1,"id":"8_140630920_C/T","allele_string":"C/T","most_severe_consequence":"missense_variant","start":140630920} 

、テーブル内

dft = pd.read_json(filename, lines = True) 

結果、以下に示すような、しかし Table generated by pandas read-json

は、Iから情報を抽出します['transcript_consequences']カラム、また['phenotypes']から['transcript_consequences']カラムまでです。

パンダのデータフレームでこれを達成する方法は?

+0

"二重のシリアル化された" とはどういう意味か? –

+0

@John Zwinck私はjsonの例を挙げました。私は処理したいと思っています。ありがとう – nilesh

+0

@nileshは私の提案がうまくいったのですか?あなたの実際のデータとの違いは何ですか? – erasmortg

答えて

0

オプションは、次のようになります。ここでは

>>> import pandas as pd 
>>> jsona = pd.read_json('jsona.json') #here the file is named 'jsona.json' 
>>> transcript_consequences = jsona['transcript_consequences'].apply(pd.Series) 

私はそれが意図的であるかどうかわからないが、transcript_consequences[0]transcript_consequences[6]と同じであると思われます。

あなたは、次の操作を行うことができます:

>>> phenotypes0 = pd.DataFrame(transcript_consequences.phenotypes[0]) #and so on 

>>> isinstance(phenotypes0,pd.DataFrame) 
True 
>>> isinstance(transcript_consequences,pd.DataFrame) 
True 
>>> isinstance(jsona,pd.DataFrame) 
True 

#in order to get one dataframe, concatenate and pass the dataframes in a list, like so: 
>>> pd.concat([transcript_consequences, phenotypes0], axis=1) #with more elements (more phenotypes) add them to the list 
関連する問題