安装好HDFS环境,输入测试命令查看是否安装成功:
hdfs dfs -help
spark中将MysqL表存放到HDFS:
def getAsciiPartitions(ID:String,num:Int):Array[String]={ var sql_partition:String = "" val list = List("0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f") if(num == 1){ for(i <- list){ sql_partition = sql_partition + ID + " LIKE " + "\'"+ i +"%\'" + "/" } } if(num == 2){ for(i <- list){ for( j <- list){ sql_partition = sql_partition + ID + " LIKE " + "\'"+ i + j +"%\'" + "/" } } } if(num == 3){ for(i <- list){ for( j <- list){ for(x <- list){ sql_partition = sql_partition + ID + " LIKE " + "\'"+ i + j + x +"%\'" + "/" } } } } if(num == 4){ for(i <- list){ for(j <- list) { for(x <- list){ for (y <- list ){ sql_partition = sql_partition + ID + " LIKE " + "\'"+ i + j + x + y +"%\'" + "/" println(sql_partition) } } } } } sql_partition.split("/") }单表数据分片
def loadDataWithTimePartion(tableName: String , predicates: Array[String] ):DataFrame = { val spark= SparkConfig._spark val properties: Properties = new Properties() properties.setProperty("user",SparkConfig.user) properties.setProperty("password",SparkConfig.pwd) properties.setProperty("driver","com.MysqL.cj.jdbc.Driver") properties.setProperty("isolationLevel","READ_UNCOMMITTED") var df:DataFrame = null df = spark.read.jdbc(SparkConfig.host,tableName,predicates,properties) df }
time hdfs dfs -get hdfs://mycluster/estee/test20210201/spark/BCONSUMER
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。