mkdir /tmp/spark-events
/usr/local/share/spark/sbin/start-history-server.sh
/usr/local/share/spark/bin/spark-shell \
--conf "spark.eventLog.enabled=true" \
--conf "spark.eventLog.dir=file:///tmp/spark-events" \
--conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
--conf "spark.sql.crossJoin.enabled=true" \
--conf "spark.kryo.registrator=net.sansa_stack.rdf.spark.io.JenaKryoRegistrator,net.sansa_stack.query.spark.ontop.OntopKryoRegistrator,net.sansa_stack.query.spark.sparqlify.KryoRegistratorSparqlify" \
--conf "spark.kryoserializer.buffer.max=2000" \
--executor-memory 1G \
--num-executors 1 \
--jars <PATH_SANSA>/sansa-ml-spark_2.12-0.8.0-RC3-SNAPSHOT-jar-with-dependencies.jar
Il faut adapter les options “executor-memory” et “num-executors” et fournir le jar assemblé (ici tp.jar)
/usr/local/share/spark/bin/spark-submit \
--conf "spark.eventLog.enabled=true" \
--conf "spark.eventLog.dir=file:///tmp/spark-events" \
--executor-memory 1G \
--num-executors 1 \
--jars ./sansa-ml-spark_2.12-0.8.0-RC3-SNAPSHOT-jar-with-dependencies.jar \
assembly/tp.jar