Wednesday, July 20, 2016

How to read a Parquet file and make a dataframe and create Hive temp table


package packagename;



import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;

import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.hive.HiveContext;
import org.apache.spark.sql.hive.thriftserver.*;



public class SparkReadParquetAndRegTempTable {


public static void main(String[] args) throws ClassNotFoundException {


SparkConf spconf = new SparkConf();

spconf.set("spark.driver.maxResultSize", "3g");

JavaSparkContext sc=null;
try
{
sc = new JavaSparkContext(spconf);

//SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc);

HiveContext sqlContext = new org.apache.spark.sql.hive.HiveContext(sc.sc());


DataFrame df = sqlContext.read().parquet("/path/parquetFolderName.parquet");


df.printSchema();


//To Query the table via beeline as Spark Hive table

df.registerTempTable("tempTable_spark");

HiveThriftServer2.startWithContext(sqlContext);

}
catch(Exception e)
{
System.out.print("Error is"+e.toString());
}



}

 


}


How to submit the job

hadoop_classpath=$(hadoop classpath)
HBASE_CLASSPATH=$(hbase classpath)

sudo -u userName /spark/spark-1.5.2/bin/spark-submit   --name tempSparkTable     --class packageName.SparkReadParquetAndRegTempTable  --master local[4]   --num-executors 8    --executor-cores 8    --executor-memory 8G   --conf "spark.executor.extraClassPath=${HBASE_CLASSPATH}"   --conf "spark.driver.extraClassPath=${HBASE_CLASSPATH}"    --conf "spark.executor.extraClassPath=${hadoop_classpath}"
 --jars /path/projectName-0.0.1-SNAPSHOT-jar-with-dependencies.jar
/path/projectName-0.0.1-SNAPSHOT.jar

Make sure, there is no Hive Thrift server running in port 10000 in the machine, where you run this program


Connect via /opt/mapr/hive/hive-1.2/bin/beeline -u jdbc:hive2://serverName:10000 -n UserName

Show tables;

should list your table name in the list

tempTable_spark

Then you can run the queries against this temp table


Maven Dependencies:

<dependencies>

  <dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-sql_2.10</artifactId>
    <version>1.5.2</version>
</dependency>

 <dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-hive_2.10</artifactId>
    <version>1.5.2</version>
</dependency>

 <dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-client</artifactId>
    <version>0.98.12-hadoop2</version>
</dependency>

<dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-common</artifactId>
    <version>0.98.12-hadoop2</version>
</dependency>

<dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-examples</artifactId>
    <version>0.98.12-hadoop2</version>
</dependency>

<dependency>
    <groupId>org.postgresql</groupId>
    <artifactId>postgresql</artifactId>
    <version>9.4.1208</version>
</dependency>

<dependency>
    <groupId>com.databricks</groupId>
    <artifactId>spark-csv_2.11</artifactId>
    <version>1.2.0</version>
</dependency>

<dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-hive-thriftserver_2.10</artifactId>
    <version>1.5.2</version>
</dependency>

</dependencies>





How to create Spark Dataframe from (Read) PostgreSql and write processed data frame to PostgreSql/MySql

package com.packagename;


import java.util.HashMap;
import java.util.Map;
import java.util.Properties;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.SQLContext;

public class SparkReadFromPostgres {

public static void main(String[] args) {

Map<String, String> options = new HashMap<String, String>();

options.put("url", "jdbc:postgresql://servername:5432/dbname");
options.put("user", "username");
options.put("password", "<somePassword>");
options.put("driverClassName", "org.postgresql.Driver");
options.put("dbtable", "schema.tableName");

SparkConf spconf = new SparkConf();

spconf.set("spark.driver.maxResultSize", "3g");

JavaSparkContext sc=null;

try
{
sc = new JavaSparkContext(spconf);

SQLContext sqlContext = new org.apache.spark.sql.SQLContext(sc);

DataFrame jdbcDF = sqlContext.read().format("jdbc").options(options).load();

jdbcDF.printSchema();

jdbcDF.show(10);


//if you want to create the table again in Postgres

createPostGresTable(jdbcDF,"testTable");

}
finally
{
sc.close();
}

}

public static void createPostGresTable(DataFrame output, String postgresTableName)
{
String url = "jdbc:postgresql://serverName:5432/dbName";
Properties props = new Properties();
props.setProperty("user","userName");
props.setProperty("password","password");
//props.setProperty("ssl","true");
props.setProperty("driverClassName","org.postgresql.Driver");

//Connection conn = DriverManager.getConnection(url, props);

//String postgresTable="TableName";

output.write().mode("overwrite").jdbc(url, postgresTableName, props);

}


}

Command to Run the Spark Program

hadoop_classpath=$(hadoop classpath)
HBASE_CLASSPATH=$(hbase classpath)


sudo -u userId  /spark/spark-1.5.2/bin/spark-submit   --name SparkReadFromPostgres     --class com.packagename.SparkReadFromPostgres   --master yarn   --deploy-mode client   --num-executors 8    --executor-cores 8    --executor-memory 4G   --conf "spark.executor.extraClassPath=${HBASE_CLASSPATH}"   --conf "spark.driver.extraClassPath=${HBASE_CLASSPATH}"    --conf "spark.executor.extraClassPath=${hadoop_classpath}"  --conf "spark.executor.extraClassPath=/sharedpath/postgresql-9.4.1208.jar"   --conf "spark.driver.extraClassPath=/sharedpath/postgresql-9.4.1208.jar"  --jars /projectName-0.0.1-SNAPSHOT-jar-with-dependencies.jar  /projectName-0.0.1-SNAPSHOT.jar

pom.xml dependencies:

<dependencies>

  <dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-sql_2.10</artifactId>
    <version>1.5.2</version>
</dependency>

 <dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-hive_2.10</artifactId>
    <version>1.5.2</version>
</dependency>

 <dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-client</artifactId>
    <version>0.98.12-hadoop2</version>
</dependency>

<dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-common</artifactId>
    <version>0.98.12-hadoop2</version>
</dependency>

<dependency>
    <groupId>org.apache.hbase</groupId>
    <artifactId>hbase-examples</artifactId>
    <version>0.98.12-hadoop2</version>
</dependency>

<dependency>
    <groupId>org.postgresql</groupId>
    <artifactId>postgresql</artifactId>
    <version>9.4.1208</version>
</dependency>

<dependency>
    <groupId>com.databricks</groupId>
    <artifactId>spark-csv_2.11</artifactId>
    <version>1.2.0</version>
</dependency>

<dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-hive-thriftserver_2.10</artifactId>
    <version>1.5.2</version>
</dependency>

</dependencies>


Thursday, April 25, 2013

How to view your movies in TV from your computer or mobile without connecting the Computer using cable

Hi All,

I was tiered of connecting the computer using HDMI cable or VGA (Projector cable from my computer)  cable to my TV to watch the videos.

I searched online to find a wireless solution, I want to download the movie in my computer and want to play in TV without connecting my computer to TV. The same way I wanted to play the youtube videos, Daily motion and any videos from mobile.

Ok no more stories,

If you want to play your movies from your Laptop

If you want to play your movies or videos from iPhone or Android



1) You need a Streaming player. I have Roku 2.0

You can buy it from here.


http://www.roku.com/roku-products

It is available in amazon and other sites as well .


2) Obviously a TV

3) iPhone or Android or a computer (Mac or Windows)


To Play video from your phone, there is an App called "Twonky" you need to install this in your Roku and also you need to install Twonky beam in your iPhone or Android.

Youtube video explain the steps to install Twonky and configure in iPad


I will take one more video to show how to play your videos from your computer via Roku.

There is a app called Plex, I will post it soon..here..

Saturday, September 17, 2011

How to make a cassette adapter to work / fix Auto Reverse Problem / Cure Flip-flop (to turn off Auto Reverse in Audio Player) in Toyota Audio player

Hey there


I was having a problem in playing a cassette Adapter in my Old Toyota Audio player , It was auto reversing and not playing both the sides and ejecting the cassette. I did a work around for that . I wasted 3 adapters to learn this..I thought this will help you guys ..to fix this.. To make this pleas follow this steps



Step 1. Buy a Philips Cassette adapter from Walmart ( its 10$ and stereo ..if you buy online for 2 or 3 $ ..it will be mono ..and you will be hearing only the music or the voice of a song and annoying noise in back ground)



Step 2. You need to remove the gear from the cassette that triggers the Auto Reverse function in the Player

Step 3. Take a small screw driver and Unscrew the Cassettes Screws, Unscrew the Bigger screw using a penny.

step 4. Remove the Bottom part and Care fully remove the upper part

Step 5. Dont touch the Head or the wire , Just remove the gear which is connected with the Rubber wheel ( to see the exact gear view the video that i uploaded in the youtube , Note : The video won't tell you all the steps , i have uploaded the video just to show which gear needs to be removed )

Step 6. Change the wire to the opposite side of the cassette ( Opposite to the default side given by te cassette ) to avoid the noise . if you use the default side ..it will play you the music but you will be hearing an additional noise.

step 7. Now your cassette adapter is ready to play the Songs from your iPod or Phone in Old cassette car Audio player.