你能帮我解决Pypark(齐柏林飞艇)中的这个错误吗
java.net.ConnectException: Connection refused (Connection refused)
at java.net.PlainSocketImpl.socketConnect(Native Method)
at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)
at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)
at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)
at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
at java.net.Socket.connect(Socket.java:589)
at org.apache.thrift.transport.TSocket.open(TSocket.java:182)
at org.apache.zeppelin.interpreter.remote.ClientFactory.create(ClientFactory.java:51)
at org.apache.zeppelin.interpreter.remote.ClientFactory.create(ClientFactory.java:37)
at org.apache.commons.pool2.BasePooledObjectFactory.makeObject(BasePooledObjectFactory.java:60)
at org.apache.commons.pool2.impl.GenericObjectPool.create(GenericObjectPool.java:861)
at org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:435)
at org.apache.commons.pool2.impl.GenericObjectPool.borrowObject(GenericObjectPool.java:363)
at org.apache.zeppelin.interpreter.remote.RemoteInterpreterProcess.getClient(RemoteInterpreterProcess.java:62)
at org.apache.zeppelin.interpreter.remote.RemoteInterpreterProcess.callRemoteFunction(RemoteInterpreterProcess.java:133)
at org.apache.zeppelin.interpreter.remote.RemoteInterpreter.open(RemoteInterpreter.java:139)
at org.apache.zeppelin.interpreter.remote.RemoteInterpreter.getFormType(RemoteInterpreter.java:299)
at org.apache.zeppelin.notebook.Paragraph.jobRun(Paragraph.java:408)
at org.apache.zeppelin.scheduler.Job.run(Job.java:188)
at org.apache.zeppelin.scheduler.RemoteScheduler$JobRunner.run(RemoteScheduler.java:315)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
我正在尝试运行此PySpark代码:
%pyspark
df = pd.read_csv('/datos/cite75_99.txt.bz2', compression='bz2', header=0, sep=',', quotechar='"')
df.show()
我在Docker compose上构建的齐柏林飞艇笔记本上运行这段代码
这是docker-compose.yml文件包含的内容:
version: '2'
services:
zeppelin:
build: .
ports:
- "8080:8080"
- "4040:4040"
volumes:
- ./logs:/logs
- ./notebook:/notebook
- ./datos:/datos
environment:
- ZEPPELIN_LOG_DIR=/logs
- ZEPPELIN_NOTEBOOK_DIR=/notebook
齐柏林飞艇服务的Docker文件如下:
FROM apache/zeppelin:0.8.2
MAINTAINER tf.pena@usc.es
USER root
ENV LANG=es_ES.UTF-8 \
JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
ENV ZEPPELIN_ADDR=0.0.0.0
RUN echo "Instalando locales" && \
apt-get -y update && \
apt-get install -y locales
RUN locale-gen $LANG &&\
update-locale LANG=$LANG
RUN echo "Instalando paquetes adicionales" && \
apt-get install -y netcat-openbsd
ENV SPARK_VERSION=2.2.0 \
HADOOP_VERSION=2.7 \
SPARK_DIR=/usr/local
ENV SPARK_HOME=${SPARK_DIR}/spark
RUN echo "Instalando Spark" && \
mkdir -p ${SPARK_DIR} && \
wget -O /tmp/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
tar -zxf /tmp/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
rm -rf /tmp/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
mv -f spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} ${SPARK_DIR} && \
ln -s ${SPARK_DIR}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} ${SPARK_HOME}
EXPOSE 8080
EXPOSE 4040
值得一提的是,系统外壳代码成功地运行在同一个齐柏林飞艇笔记本上
期待您的意见
谢谢大家!
目前没有回答
相关问题 更多 >
编程相关推荐