2017-06-11 11 views
1

avro仕様では、一致した場合に異なる書き込みと読み取りスキーマを使用できます。この仕様では、エイリアスが読み取りスキーマと書き込みスキーマの違いに対応できるようになりました。次のpython 2.7ではこれを説明しようとしています。python avroライブラリを使用するときのスキーマの読み取りと書き込み

import uuid 
import avro.schema 
import json 
from avro.datafile import DataFileReader, DataFileWriter 
from avro.io import DatumReader, DatumWriter 


write_schema = { 
    "namespace": "example.avro", 
    "type": "record", 
    "name": "User", 
    "fields": [ 
     {"name": "name", "type": "string"}, 
     {"name": "favorite_number", "type": ["int", "null"]}, 
     {"name": "favorite_color", "type": ["string", "null"]} 
    ] 
} 
writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(write_schema)) 
writer.append({"name": "Alyssa", "favorite_number": 256}) 
writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) 
writer.close() 

read_schema = { 
    "namespace": "example.avro", 
    "type": "record", 
    "name": "User", 
    "fields": [ 
     {"name": "first_name", "type": "string", "aliases": ["name"]}, 
     {"name": "favorite_number", "type": ["int", "null"]}, 
     {"name": "favorite_color", "type": ["string", "null"]} 
    ] 
} 

# 1. open avro and extract passport + data 
reader = DataFileReader(open("users.avro", "rb"), DatumReader(write_schema, read_schema)) 
reader.close() 

このコードは、次のエラーメッセージがあります。それが正常に動作します。このライン

reader = DataFileReader(open("users.avro", "rb"), DatumReader()) 

を使用して、異なるスキーマなしで実行されて

/Library/Frameworks/Python.framework/Versions/2.7/bin/python2.7 /Users/simonshapiro/python_beam/src/avrov_test.py 
Traceback (most recent call last): 
    File "/Users/simonshapiro/python_beam/src/avrov_test.py", line 67, in <module> 
    writer.append({"name": "Alyssa", "favorite_number": 256}) 
    File "/Library/Python/2.7/site-packages/avro/datafile.py", line 196, in append 
    self.datum_writer.write(datum, self.buffer_encoder) 
    File "/Library/Python/2.7/site-packages/avro/io.py", line 768, in write 
    if not validate(self.writers_schema, datum): 
    File "/Library/Python/2.7/site-packages/avro/io.py", line 103, in validate 
    schema_type = expected_schema.type 
AttributeError: 'dict' object has no attribute 'type' 

Process finished with exit code 1 

答えて

1

もう少し作業をしても、スキーマが正しく設定されていないことがわかりました。このコードは意図したとおりに動作します。

import uuid 
import avro.schema 
import json 
from avro.datafile import DataFileReader, DataFileWriter 
from avro.io import DatumReader, DatumWriter 


write_schema = avro.schema.parse(json.dumps({ 
    "namespace": "example.avro", 
    "type": "record", 
    "name": "User", 
    "fields": [ 
     {"name": "name", "type": "string"}, 
     {"name": "favorite_number", "type": ["int", "null"]}, 
     {"name": "favorite_color", "type": ["string", "null"]} 
    ] 
})) 

writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), write_schema) 
writer.append({"name": "Alyssa", "favorite_number": 256}) 
writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) 
writer.close() 

read_schema = avro.schema.parse(json.dumps({ 
    "namespace": "example.avro", 
    "type": "record", 
    "name": "User", 
    "fields": [ 
     {"name": "first_name", "type": "string", "default": "", "aliases": ["name"]}, 
     {"name": "favorite_number", "type": ["int", "null"]}, 
     {"name": "favorite_color", "type": ["string", "null"]} 
    ] 
})) 

# 1. open avro and extract passport + data 
reader = DataFileReader(open("users.avro", "rb"), DatumReader(write_schema, read_schema)) 
new_schema = reader.get_meta("avro.schema") 
users = [] 
for user in reader: 
    users.append(user) 
reader.close() 
関連する問題