0

Streamflowモードでデータフローパイプラインを実行すると、java.io.IOExceptionはGoogle Storageの親ディレクトリを見つけることができません。Google Cloud DataflowがGoogle Storageの親ディレクトリを見つけることができません

gsutil lsを使用してファイルを表示でき、パイプラインがバッチモードで実行されているときにこれが動作するため、これはアクセス権の問題ではないと思います。

何が原因でしょうか?

[WARNING] 
java.lang.reflect.InvocationTargetException 
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) 
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 
    at java.lang.reflect.Method.invoke(Method.java:498) 
    at org.codehaus.mojo.exec.ExecJavaMojo$1.run(ExecJavaMojo.java:293) 
    at java.lang.Thread.run(Thread.java:748) 
Caused by: java.lang.IllegalStateException: Failed to validate gs://my-bucket/*.txt 
--streaming 
    at com.google.cloud.dataflow.sdk.io.TextIO$Read$Bound.apply(TextIO.java:328) 
    at com.google.cloud.dataflow.sdk.io.TextIO$Read$Bound.apply(TextIO.java:213) 
    at com.google.cloud.dataflow.sdk.runners.PipelineRunner.apply(PipelineRunner.java:75) 
    at com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.apply(DataflowPipelineRunner.java:474) 
    at com.google.cloud.dataflow.sdk.Pipeline.applyInternal(Pipeline.java:368) 
    at com.google.cloud.dataflow.sdk.Pipeline.applyTransform(Pipeline.java:275) 
    at com.google.cloud.dataflow.sdk.values.PBegin.apply(PBegin.java:47) 
    at com.google.cloud.dataflow.sdk.Pipeline.apply(Pipeline.java:157) 
    at com.google.cloud.dataflow.examples.complete.AutoComplete.main(AutoComplete.java:287) 
    ... 6 more 
Caused by: java.io.IOException: Unable to find parent directory of gs://my-bucket/*.txt 
--streaming 
    at com.google.cloud.dataflow.sdk.util.FileIOChannelFactory.match(FileIOChannelFactory.java:59) 
    at com.google.cloud.dataflow.sdk.io.TextIO$Read$Bound.apply(TextIO.java:323) 
    ... 14 more 
[INFO] ------------------------------------------------------------------------ 
[INFO] BUILD FAILURE 
[INFO] ------------------------------------------------------------------------ 
[INFO] Total time: 3.784 s 
[INFO] Finished at: 2017-10-11T10:19:05-04:00 
[INFO] Final Memory: 30M/541M 
[INFO] ------------------------------------------------------------------------ 
[ERROR] Failed to execute goal org.codehaus.mojo:exec-maven-plugin:1.4.0:java (default-cli) on project google-cloud-dataflow-java-examples-all: An exception occured while executing the Java class. null: InvocationTargetException: Failed to validate gs://my-bucket/*.txt 
[ERROR] --streaming: Unable to find parent directory of gs://my-bucket/*.txt 
[ERROR] --streaming 
[ERROR] -> [Help 1] 
org.apache.maven.lifecycle.LifecycleExecutionException: Failed to execute goal org.codehaus.mojo:exec-maven-plugin:1.4.0:java (default-cli) on project google-cloud-dataflow-java-examples-all: An exception occured while executing the Java class. null 
    at org.apache.maven.lifecycle.internal.MojoExecutor.execute(MojoExecutor.java:212) 
    at org.apache.maven.lifecycle.internal.MojoExecutor.execute(MojoExecutor.java:153) 
    at org.apache.maven.lifecycle.internal.MojoExecutor.execute(MojoExecutor.java:145) 
    at org.apache.maven.lifecycle.internal.LifecycleModuleBuilder.buildProject(LifecycleModuleBuilder.java:116) 
    at org.apache.maven.lifecycle.internal.LifecycleModuleBuilder.buildProject(LifecycleModuleBuilder.java:80) 
    at org.apache.maven.lifecycle.internal.builder.singlethreaded.SingleThreadedBuilder.build(SingleThreadedBuilder.java:51) 
    at org.apache.maven.lifecycle.internal.LifecycleStarter.execute(LifecycleStarter.java:128) 
    at org.apache.maven.DefaultMaven.doExecute(DefaultMaven.java:307) 
    at org.apache.maven.DefaultMaven.doExecute(DefaultMaven.java:193) 
    at org.apache.maven.DefaultMaven.execute(DefaultMaven.java:106) 
    at org.apache.maven.cli.MavenCli.execute(MavenCli.java:863) 
    at org.apache.maven.cli.MavenCli.doMain(MavenCli.java:288) 
    at org.apache.maven.cli.MavenCli.main(MavenCli.java:199) 
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) 
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 
    at java.lang.reflect.Method.invoke(Method.java:498) 
    at org.codehaus.plexus.classworlds.launcher.Launcher.launchEnhanced(Launcher.java:289) 
    at org.codehaus.plexus.classworlds.launcher.Launcher.launch(Launcher.java:229) 
    at org.codehaus.plexus.classworlds.launcher.Launcher.mainWithExitCode(Launcher.java:415) 
    at org.codehaus.plexus.classworlds.launcher.Launcher.main(Launcher.java:356) 
Caused by: org.apache.maven.plugin.MojoExecutionException: An exception occured while executing the Java class. null 
    at org.codehaus.mojo.exec.ExecJavaMojo.execute(ExecJavaMojo.java:345) 
    at org.apache.maven.plugin.DefaultBuildPluginManager.executeMojo(DefaultBuildPluginManager.java:134) 
    at org.apache.maven.lifecycle.internal.MojoExecutor.execute(MojoExecutor.java:207) 
    ... 20 more 
Caused by: java.lang.reflect.InvocationTargetException 
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) 
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 
    at java.lang.reflect.Method.invoke(Method.java:498) 
    at org.codehaus.mojo.exec.ExecJavaMojo$1.run(ExecJavaMojo.java:293) 
    at java.lang.Thread.run(Thread.java:748) 
Caused by: java.lang.IllegalStateException: Failed to validate gs://my-bucket/*.txt 
--streaming 
    at com.google.cloud.dataflow.sdk.io.TextIO$Read$Bound.apply(TextIO.java:328) 
    at com.google.cloud.dataflow.sdk.io.TextIO$Read$Bound.apply(TextIO.java:213) 
    at com.google.cloud.dataflow.sdk.runners.PipelineRunner.apply(PipelineRunner.java:75) 
    at com.google.cloud.dataflow.sdk.runners.DataflowPipelineRunner.apply(DataflowPipelineRunner.java:474) 
    at com.google.cloud.dataflow.sdk.Pipeline.applyInternal(Pipeline.java:368) 
    at com.google.cloud.dataflow.sdk.Pipeline.applyTransform(Pipeline.java:275) 
    at com.google.cloud.dataflow.sdk.values.PBegin.apply(PBegin.java:47) 
    at com.google.cloud.dataflow.sdk.Pipeline.apply(Pipeline.java:157) 
    at com.google.cloud.dataflow.examples.complete.AutoComplete.main(AutoComplete.java:287) 
    ... 6 more 
Caused by: java.io.IOException: Unable to find parent directory of gs://my-bucket/*.txt 
--streaming 
    at com.google.cloud.dataflow.sdk.util.FileIOChannelFactory.match(FileIOChannelFactory.java:59) 
    at com.google.cloud.dataflow.sdk.io.TextIO$Read$Bound.apply(TextIO.java:323) 
    ... 14 more 
[ERROR] 
[ERROR] 
[ERROR] For more information about the errors and possible solutions, please read the following articles: 
[ERROR] [Help 1] http://cwiki.apache.org/confluence/display/MAVEN/MojoExecutionException 
+0

使用しているSDKのバージョンはどれですか? –

+0

バージョン1.9.1を使用しています。 –

答えて

1

TextIOの実装では、glob(* .txt)はバケットではなくディレクトリ内に配置する必要があります。だからgs://my-bucket/object/*.txtはうまくいくはずです。具体的には、グロブに親オブジェクトが必要です(ここでは「親」はgs://my-bucket/objectとなります)。

+0

これは、SDKのバージョン2.0で修正されていると思います。 – jkff

関連する問題