<p>我就是这样做的</p>
<pre><code>sc = SparkContext('local[*]')
spark = SparkSession(sparkContext=sc)
df2 = spark.read.options(inferSchema='True',delimiter=',',header='True').csv("D:\\bop\\small_input_spark.csv")
df1 = spark.read.options(inferSchema='True',delimiter=',',header='True').csv("D:\\bop\\player_points.csv")
# start = time.time()
player_name = df1.select('Playername').collect()
points = df1.select('points').collect()
dictn = {row['Playername']:row['points'] for row in df1.collect()}
print(dictn)
# user_func = udf(lambda x: dictn.get(x), IntegerType())
# newdf = df2.withColumn('p1','p2',user_func(df2.p1,df2.p2))
dictn = {k:str(v) for k,v in zip(dictn.keys(),dictn.values())}
df3 = df2.na.replace(dictn,1,("captain","v-captain","MoM","p1","p2","p3","p4","p5","p6","p7","p8","p9","p10","p11"))
integer_type = ["captain","v-captain","MoM","p1","p2","p3","p4","p5","p6","p7","p8","p9","p10","p11"]
for c in integer_type:
df3 = df3.withColumn(c, df3[c].cast(IntegerType()))
numeric_col_list=df3.schema.names
numeric_col_list=numeric_col_list[4:]
df3 = df3.withColumn('v-captain', ((col('v-captain') / 2 )))
df3 = df3.withColumn('MoM', ((col('MoM') * 2 )))
df3 = df3.withColumn('points',reduce(add, [col(x) for x in numeric_col_list]))
</code></pre>