Apache Beamでローカルにパイプラインを実行しようとしていますが、Google BigQueryのI/O APIを使用しています。モジュールオブジェクトに属性がありません。BigqueryV2 - ローカルApache Beam
私はVirtualenvを使って自分の環境をBeam Python quickstartのように解決し、wordcount.pyの例を実行できます。 beam.Create
とbeam.ParDo
でカスタムパイプラインを正しく実行することもできます。
しかし、BigQuery I/Oでパイプラインを実行することはできません。私が間違ってやっていることについてのアイデアは?
pythonスクリプトは次のとおりです。
import apache_beam as beam
from apache_beam.utils.pipeline_options import PipelineOptions
from apache_beam.io import WriteToText
class MyDoFn(beam.DoFn):
def process(self, element):
return element
def run():
opts = {
'project': 'gc-project-name'
}
p = beam.Pipeline(options=PipelineOptions(**opts))
input_query = "SELECT name FROM `gc-project-name.dataset_name.table_name`"
(p
| beam.io.Read(beam.io.BigQuerySource(query=input_query))
| beam.ParDo(MyDoFn())
| beam.io.WriteToText('output.txt')
)
result = p.run()
result.wait_until_finish()
if __name__ == '__main__':
run()
実行すると、次のエラーが発生します。
WARNING:root:Task failed: Traceback (most recent call last):
File "/Users/localuser/Virtualenvs/abeam/lib/python2.7/site-packages/apache_beam/runners/direct/executor.py", line 300, in __call__
result = evaluator.finish_bundle()
File "/Users/localuser/Virtualenvs/abeam/lib/python2.7/site-packages/apache_beam/runners/direct/transform_evaluator.py", line 208, in finish_bundle
with self._source.reader() as reader:
File "/Users/localuser/Virtualenvs/abeam/lib/python2.7/site-packages/apache_beam/io/gcp/bigquery.py", line 590, in __enter__
self.client = BigQueryWrapper(client=self.test_bigquery_client)
File "/Users/localuser/Virtualenvs/abeam/lib/python2.7/site-packages/apache_beam/io/gcp/bigquery.py", line 682, in __init__
self.client = client or bigquery.BigqueryV2(
AttributeError: 'module' object has no attribute 'BigqueryV2'
Traceback (most recent call last):
File "/Users/localuser/Virtualenvs/abeam/lib/python2.7/site-packages/apache_beam/runners/direct/executor.py", line 300, in __call__
result = evaluator.finish_bundle()
File "/Users/localuser/Virtualenvs/abeam/lib/python2.7/site-packages/apache_beam/runners/direct/transform_evaluator.py", line 208, in finish_bundle
with self._source.reader() as reader:
File "/Users/localuser/Virtualenvs/abeam/lib/python2.7/site-packages/apache_beam/io/gcp/bigquery.py", line 590, in __enter__
self.client = BigQueryWrapper(client=self.test_bigquery_client)
File "/Users/localuser/Virtualenvs/abeam/lib/python2.7/site-packages/apache_beam/io/gcp/bigquery.py", line 682, in __init__
self.client = client or bigquery.BigqueryV2(
AttributeError: 'module' object has no attribute 'BigqueryV2'
WARNING:root:A task failed with exception.
'module' object has no attribute 'BigqueryV2'
Traceback (most recent call last):
File "frombigquery.py", line 54, in <module>
run()
File "frombigquery.py", line 51, in run
result.wait_until_finish()
File "/Users/localuser/Virtualenvs/abeam/lib/python2.7/site-packages/apache_beam/runners/direct/direct_runner.py", line 157, in wait_until_finish
self._executor.await_completion()
File "/Users/localuser/Virtualenvs/abeam/lib/python2.7/site-packages/apache_beam/runners/direct/executor.py", line 335, in await_completion
self._executor.await_completion()
File "/Users/localuser/Virtualenvs/abeam/lib/python2.7/site-packages/apache_beam/runners/direct/executor.py", line 300, in __call__
result = evaluator.finish_bundle()
File "/Users/localuser/Virtualenvs/abeam/lib/python2.7/site-packages/apache_beam/runners/direct/transform_evaluator.py", line 208, in finish_bundle
with self._source.reader() as reader:
File "/Users/localuser/Virtualenvs/abeam/lib/python2.7/site-packages/apache_beam/io/gcp/bigquery.py", line 590, in __enter__
self.client = BigQueryWrapper(client=self.test_bigquery_client)
File "/Users/localuser/Virtualenvs/abeam/lib/python2.7/site-packages/apache_beam/io/gcp/bigquery.py", line 682, in __init__
self.client = client or bigquery.BigqueryV2(
AttributeError: 'module' object has no attribute 'BigqueryV2'
は、問題を解決しました。 – MRvaino