ApacheFlink 1.11无法通过Java Flink Streaming作业中的SQL函数DDL使用Python UDF

BatchTableEnvironment tEnv = BatchTableEnvironment.create(env); tEnv.getConfig().getConfiguration().setString("python.files", "/home/my/test1.py"); tEnv.getConfig().getConfiguration().setString("python.client.executable", "python3"); tEnv.sqlUpdate("create temporary system function func1 as 'test1.func1' language python"); Table table = tEnv.fromDataSet(env.fromElements("1", "2", "3")).as("str").select("func1(str)"); tEnv.toDataSet(table, String.class).collect();

final StreamTableEnvironment fsTableEnv = StreamTableEnvironment.create(EnvironmentConfiguration.getEnv(), fsSettings); fsTableEnv.getConfig().getConfiguration().setString("python.files", "/Users/jf/Desktop/flink/fca/test.py"); fsTableEnv.getConfig().getConfiguration().setString("python.client.executable", "/Users/jf/opt/anaconda3/bin/python"); fsTableEnv.sqlUpdate("CREATE TEMPORARY SYSTEM FUNCTION func1 AS 'test.func1' LANGUAGE PYTHON"); Table table = fsTableEnv.fromValues("1", "2", "3").as("str").select("func1(str)"); /* Missing line */

final StreamTableEnvironment fsTableEnv = StreamTableEnvironment.create(EnvironmentConfiguration.getEnv(), fsSettings); fsTableEnv.getConfig().getConfiguration().setString("python.files", "/Users/jf/Desktop/flink/fca/test.py"); fsTableEnv.getConfig().getConfiguration().setString("python.client.executable", "/Users/jf/opt/anaconda3/bin/python"); fsTableEnv.sqlUpdate("CREATE TEMPORARY SYSTEM FUNCTION func1 AS 'test.func1' LANGUAGE PYTHON"); final Table table = fsTableEnv.fromDataStream(stream_filtered.map(x->x.idsUmid)).select("func1(f0)").as("umid"); System.out.println("Result --> " + table.select($("umid")) + " --> End of Result");

from pyflink.table.types import DataTypes from pyflink.table.udf import udf from os import getcwd @udf(input_types=[DataTypes.STRING()], result_type=DataTypes.STRING()) def func1(line): print(line) print(getcwd()) with open("test.txt", "a") as myfile: myfile.write(line) return line

1条回答

网友
1楼 · 发布于 2024-09-23 22:23:40

您可以尝试以下方法：
final StreamTableEnvironment fsTableEnv = StreamTableEnvironment.create(EnvironmentConfiguration.getEnv(), fsSettings); fsTableEnv.getConfig().getConfiguration().setString("python.files", "/Users/jf/Desktop/flink/fca/test.py"); fsTableEnv.getConfig().getConfiguration().setString("python.client.executable", "/Users/jf/opt/anaconda3/bin/python"); // You need to specify the python interpreter used to run the python udf on cluster. // I assume this is a local program so it is the same as the "python.client.executable". fsTableEnv.getConfig().getConfiguration().setString("python.executable", "/Users/jf/opt/anaconda3/bin/python"); fsTableEnv.sqlUpdate("CREATE TEMPORARY SYSTEM FUNCTION func1 AS 'test.func1' LANGUAGE PYTHON"); final Table table = fsTableEnv.fromDataStream(stream_filtered.map(x->x.idsUmid)).select("func1(f0)").as("umid"); // 'table.select($("umid"))' will not trigger job execution. You need to call the "execute()" method explicitly. table.execute().print();

相关问题更多 >

编程相关推荐

热门问题

热门文章