SparkConf spconf = new SparkConf();
spconf.set("spark.driver.maxResultSize", "3g");
JavaSparkContext sc=null;
try
{
sc = new JavaSparkContext(spconf);
SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc);
//HiveContext sqlContext = new org.apache.spark.sql.hive.HiveContext(sc.sc());
DataFrame df = sqlContext.read().parquet("/path/hive/warehouse/nextgen_rjr.db/source.parquet");
df.printSchema();
//df.write().partitionBy("col_A","col_B").parquet("/targetPath/source_partitioned.parquet");
df.write().partitionBy("col_A").parquet("/targetPath/source_partitioned_by_single_col.parquet");
}
finally
{
sc.close();
}
System.out.println("Partitioned Parquet File created");
No comments:
Post a Comment