擅长:python、mysql、java
<p>看看这个。您可以使用spark sql实现它</p>
<pre><code> from pyspark.sql import SparkSession
spark = SparkSession.builder \
.appName('SO')\
.getOrCreate()
sc= spark.sparkContext
df = sc.parallelize([([3]), ([4]), ([5])]).toDF(["age"])
df1 = spark.createDataFrame([(0, 10), (7, 15), (5, 10), (3, 20), (5, 35), (4, 5),]
, ['age_start', 'age_end'])
df.createTempView("table1")
df1.createTempView("table2")
spark.sql('select t1.age as age_id, count(*) as count from table1 t1 join table2 t2 on t1.age >=t2.age_start and t1.age<=t2.age_end group by t1.age order by count').show()
# + + -+
# |age_id|count|
# + + -+
# | 3| 2|
# | 4| 3|
# | 5| 5|
# + + -+
</code></pre>