tt
#!/usr/bin/env python3
# -*- coding: utf-8 -*-from pyspark import SparkContext ,SparkConfconf=SparkConf().setAppName("miniProject").setMaster("local[4]")import os#import os
#os.environ['JAVA_HOME'] = '/opt/usr/lib/jdk1.8.0_211' # 這里的路徑為java的bin目錄所在路徑#conf=SparkConf().setAppName("lg").setMaster("spark://192.168.10.182:7077")
sc = SparkContext(conf=conf)con = {"es.resource" : "index/type"} # assume Elasticsearch is running on localhost defaultsrdd = sc.newAPIHadoopRDD("org.elasticsearch.hadoop.mr.EsInputFormat","org.apache.hadoop.io.NullWritable","org.elasticsearch.hadoop.mr.LinkedMapWritable",conf=con)
總結
- 上一篇: word2vect负采样
- 下一篇: PySpark中RDD与DataFram