hive seq 파일 생성하기
#/bin/bash
JAR_HOME="/data10/conf"
HADOOP_HOME="/home/hadoop/hadoop"
for f in $JAR_HOME/*
do
CLASSPATH=$CLASSPATH:$f
done
export CLASSPATH=$CLASSPATH:$JAR_HOME/hadoop-0.20.2-core.jar
text2seq () {
#export CLASSPATH=$CLASSPATH:$JAR_HOME/conf
java -cp "$CLASSPATH" net.newdaysoft.meerkat.tool.Text2Sequence $*
}
srcDir="/data5/sdclog/json"
dstDir="/data5/sdclog/seq"
lst=`cat /data4/sdclog/lst1`
date
for file in $lst
do
echo $file
text2seq $srcDir/$file.json $dstDir/$file.json.seq
date
echo ""
done
echo "========== End =========="
package net.newdaysoft.meerkat.tool;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.PrintStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.io.Text;
public class Text2Sequence
{
private static void usage()
{
System.err.println("<Usage> Text2Sequence inputFile outputFile");
}
public static void main(String[] args) throws Exception
{
if (args.length != 2) {
usage();
System.exit(1);
}
String inFile = args[0];
String outFile = args[1];
BufferedReader reader = new BufferedReader(new FileReader(inFile));
Configuration conf = new Configuration();
Path outPath = new Path(outFile);
FileSystem fs = FileSystem.get(outPath.toUri(), conf);
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outPath, LongWritable.class, Text.class, SequenceFile.CompressionType.BLOCK);
String line = null;
int lineNum = 0;
while ((line = reader.readLine()) != null)
{
LongWritable key = new LongWritable(lineNum++);
Text value = new Text(line);
writer.append(key, value);
}
reader.close();
writer.close();
}
}