有 Java 编程相关的问题?

你可以在下面搜索框中键入要查询的问题!

SQL Spark Java拆分连接的字符串

我正在尝试selectExpr{}函数,但是我的表看起来像这样:

+--------------------+--------------------+--------------------+
|              genres|              genres|             genres1|
+--------------------+--------------------+--------------------+
|Adventure|Animati...|[A, d, v, e, n, t...|[A, d, v, e, n, t...|
|Adventure|Childre...|[A, d, v, e, n, t...|[A, d, v, e, n, t...|
|      Comedy|Romance|[C, o, m, e, d, y...|[C, o, m, e, d, y...|
|Comedy|Drama|Romance|[C, o, m, e, d, y...|[C, o, m, e, d, y...|
|              Comedy|[C, o, m, e, d, y, ]|[C, o, m, e, d, y, ]|
|Action|Crime|Thri...|[A, c, t, i, o, n...|[A, c, t, i, o, n...|
|      Comedy|Romance|[C, o, m, e, d, y...|[C, o, m, e, d, y...|
|  Adventure|Children|[A, d, v, e, n, t...|[A, d, v, e, n, t...|
|              Action|[A, c, t, i, o, n, ]|[A, c, t, i, o, n, ]|
|Action|Adventure|...|[A, c, t, i, o, n...|[A, c, t, i, o, n...|
|Comedy|Drama|Romance|[C, o, m, e, d, y...|[C, o, m, e, d, y...|
|       Comedy|Horror|[C, o, m, e, d, y...|[C, o, m, e, d, y...|
|Adventure|Animati...|[A, d, v, e, n, t...|[A, d, v, e, n, t...|
|               Drama|   [D, r, a, m, a, ]|   [D, r, a, m, a, ]|
|Action|Adventure|...|[A, c, t, i, o, n...|[A, c, t, i, o, n...|
|         Crime|Drama|[C, r, i, m, e, |...|[C, r, i, m, e, |...|
|       Drama|Romance|[D, r, a, m, a, |...|[D, r, a, m, a, |...|
|              Comedy|[C, o, m, e, d, y, ]|[C, o, m, e, d, y, ]|
|              Comedy|[C, o, m, e, d, y, ]|[C, o, m, e, d, y, ]|
|Action|Comedy|Cri...|[A, c, t, i, o, n...|[A, c, t, i, o, n...|
+--------------------+--------------------+--------------------+

我的代码是:

movies.selectExpr("genres", "split (genres, '\\|') as genres","split (genres, '\\|') as genres1").show();

单词应为全长,不得用逗号逐字母拆分


共 (1) 个答案

  1. # 1 楼答案

    你逃得不够。请注意,您的代码在空模式的替代方案上拆分:

    scala> spark.range(1).selectExpr("split('Action|Comedy|Drama', '\\|')").show(false)
    +                             -+
    |split(Action|Comedy|Drama, |)                              |
    +                             -+
    |[A, c, t, i, o, n, |, C, o, m, e, d, y, |, D, r, a, m, a, ]|
    +                             -+
    

    虽然您需要:

    scala> spark.range(1).selectExpr("split('Action|Comedy|Drama', '\\\\|')").show(false)
    +               +
    |split(Action|Comedy|Drama, \|)|
    +               +
    |[Action, Comedy, Drama]       |
    +               +
    

    细微但重要的区别