티스토리 뷰
#/bin/bash
JAR_HOME="/data10/conf"
HADOOP_HOME="/home/hadoop/hadoop"
for f in $JAR_HOME/*
do
CLASSPATH=$CLASSPATH:$f
done
export CLASSPATH=$CLASSPATH:$JAR_HOME/hadoop-0.20.2-core.jar
text2seq () {
#export CLASSPATH=$CLASSPATH:$JAR_HOME/conf
java -cp "$CLASSPATH" net.newdaysoft.meerkat.tool.Text2Sequence $*
}
srcDir="/data5/sdclog/json"
dstDir="/data5/sdclog/seq"
lst=`cat /data4/sdclog/lst1`
date
for file in $lst
do
echo $file
text2seq $srcDir/$file.json $dstDir/$file.json.seq
date
echo ""
done
echo "========== End =========="
package net.newdaysoft.meerkat.tool;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.PrintStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.io.Text;
public class Text2Sequence
{
private static void usage()
{
System.err.println("<Usage> Text2Sequence inputFile outputFile");
}
public static void main(String[] args) throws Exception
{
if (args.length != 2) {
usage();
System.exit(1);
}
String inFile = args[0];
String outFile = args[1];
BufferedReader reader = new BufferedReader(new FileReader(inFile));
Configuration conf = new Configuration();
Path outPath = new Path(outFile);
FileSystem fs = FileSystem.get(outPath.toUri(), conf);
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outPath, LongWritable.class, Text.class, SequenceFile.CompressionType.BLOCK);
String line = null;
int lineNum = 0;
while ((line = reader.readLine()) != null)
{
LongWritable key = new LongWritable(lineNum++);
Text value = new Text(line);
writer.append(key, value);
}
reader.close();
writer.close();
}
}
'개발 노트 > Hive' 카테고리의 다른 글
hive json 형태 사용하기 (0) | 2014.09.15 |
---|---|
Hive CLI 옵션 사용 팁 (0) | 2013.07.15 |
- Total
- Today
- Yesterday
- grep
- ssh
- HTTPS
- 웹보안
- gz
- Strings
- 32bit
- OpenSSL
- 리터럴
- bz2
- 풀이
- X32
- Linux
- BASE64
- 압축파일
- nc
- Natas
- find
- over the wire
- java
- Bandit
- solution
- OverTheWire
- 웹보안공부
- Encode
- SSL
- tr
- 리눅스
- tar
- natas7
일 | 월 | 화 | 수 | 목 | 금 | 토 |
---|---|---|---|---|---|---|
1 | 2 | |||||
3 | 4 | 5 | 6 | 7 | 8 | 9 |
10 | 11 | 12 | 13 | 14 | 15 | 16 |
17 | 18 | 19 | 20 | 21 | 22 | 23 |
24 | 25 | 26 | 27 | 28 | 29 | 30 |