<pre><code>>>> from pyspark.sql import *
#Input DataFrame
>>> df.show()
+ + -+ -+ -+ -+ -+ +
| id| x| a| b| c|country| param|
+ + -+ -+ -+ -+ -+ +
|40.0|9.0|5.284|5.047|6.405| 13.0|avg_length|
+ + -+ -+ -+ -+ -+ +
>>> avgDF = df.groupBy(df["id"],df["x"],df["a"],df["b"],df["c"],df["country"]).pivot("param").agg(concat_ws("",collect_list(to_json(struct("id","x","a","b","c","country"))))).drop("id","x","a","b","c","country")
>>> avgDF.show(2,False)
+ +
|avg_length |
+ +
|{"id":"40.0","x":"9.0","a":"5.284","b":"5.047","c":"6.405","country":"13.0"}|
+ +
>>> finalDF = avgDF.withColumn("value", explode(split(regexp_replace(col("avg_length"),"""[\\{ " \\}]""",""),","))).withColumn("avg_length", split(col("value"), ":")[1]).withColumn("col_names", split(col("value"), ":")[0]).drop("value")
>>> finalDF.show(10,False)
+ + -+
|avg_length|col_names|
+ + -+
|40.0 |id |
|9.0 |x |
|5.284 |a |
|5.047 |b |
|6.405 |c |
|13.0 |country |
+ + -+
#other dataframe
>>> df2.show()
+ -+ +
|col_names|dtypes|
+ -+ +
| id|string|
| x| int|
| a|string|
| b|string|
| c|string|
| country|string|
+ -+ +
>>> df2.join(finalDF,"col_names").show(10,False)
+ -+ + +
|col_names|dtypes|avg_length|
+ -+ + +
|id |string|40.0 |
|x |int |9.0 |
|a |string|5.284 |
|b |string|5.047 |
|c |string|6.405 |
|country |string|13.0 |
+ -+ + +
</code></pre>