擅长:python、mysql、java
<p>我在这里用注释应用了一系列转换。不过有点“黑”</p>
<pre class="lang-py prettyprint-override"><code>from pyspark.sql import functions as F
(df
.withColumn('content', F.hex('content')) # convert bytes to hex: 0125EB8C4889
.withColumn('content', F.regexp_replace('content', '(\w{2})', '$1,')) # split hex to chunks: 01,25,EB,8C,48,89,
.withColumn('content', F.expr('substring(content, 0, length(content) - 1)')) # remove redundent comma: 01,25,EB,8C,48,89
.withColumn('content', F.split('content', ',')) # split hex values by comma: [01, 25, EB, 8C, 48, 89]
.withColumn('content', F.explode('content')) # explode hex values to multiple rows
.withColumn('content', F.conv('content', 16, 10)) # convert hex to dec
.show(10, False)
)
# Output
# + -+
# |content|
# + -+
# |1 |
# |37 |
# |235 |
# |140 |
# |72 |
# |137 |
# + -+
</code></pre>