import org.apache.spark.sql.{DataFrame, Row, SparkSession} import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} import org.apache.spark.{SparkConf, SparkContext} object PaymentFile extends App { implicit val spark = SparkSession.builder().appName("PaymentFile") .config("spark.master", "local") .getOrCreate() val rdd = spark.sparkContext.textFile("C:\\Senthil\\SenStudy\\Scala\\Files\\multidelimiter.txt") val header = rdd.filter(_.contains("input")).map(line => line.split("\\!\\~")).first() val schema = StructType(header.map(cols => StructField(cols,StringType)).toSeq) val data = spark.createDataFrame(rdd.filter(!_.contains("input")) .map(line => Row.fromSeq(line.split("\\!\\~").toSeq)), schema) data.write.partitionBy("input").mode("overwrite").parquet("C:\\Senthil\\SenStudy\\Scala\\Files\\multidelimiter") }
Sunday, April 28, 2019
Multi delmiter spark
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment