-
Hdfs的特點:
數據冗余,硬件容錯(3個備份)
流式的數據訪問(一次寫入,多次讀取,無法刪改,只能通過寫入到新的塊刪除舊文件)
存儲大文件 (否則namenode消耗高,頭大身?。?br />
實用性和局限性:
適合數據批量讀寫,吞吐量高
不適合交互式應用,低延遲很難滿足
適合一次寫入多次讀取,順序讀寫
不支持多用戶并發寫相同文件
查看全部 -
Hdfs文件讀取流程:
Hdfs文件寫入流程:
查看全部 -
Hdfes數據管理:
?數據塊副本:每個數據塊3個副本,分布在2機架3節點上(容錯性)
2.?心跳檢測:DataNode定期向NameNode發心跳消息,刷新存在感!!
?
3.?二級NameNode:NameNdoe定期同步元數據映像文件到二級NameNode(secondryNameNode),一旦故障,備胎轉正,成正宮娘娘!
查看全部 -
HDFS的文件被分成塊進行存儲,HDFS塊默認大小是64MB,快是整個文件存儲處理的邏輯單元
HDFS中有兩類節點NameNode和DataNode
namenode是管理節點,存放文件元數據,元數據包含兩個部分
文件與數據快的映射表
數據塊與數據節點的映射表
namenode是唯一的管理節點,里面存放大量元數據,客戶進行訪問請求,首先會到namenode查看元數據,這個文件放在哪些節點上面然后從這些節點拿數據塊,然后組裝成想要的文件
DateNode是HDFS的工作節點,存放數據塊
查看全部 -
Linux下Java程序運行:
先創建Java程序,
然后編譯:
后打包:
查看全部 -
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class WordCount {
public static class WordCountMap extends
Mapper<LongWritable, Text, Text, IntWritable> {
private final IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
StringTokenizer token = new StringTokenizer(line);
while (token.hasMoreTokens()) {
word.set(token.nextToken());
context.write(word, one);
}
}
}
public static class WordCountReduce extends
Reducer<Text, IntWritable, Text, IntWritable> {
public void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
context.write(key, new IntWritable(sum));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf);
job.setJarByClass(WordCount.class);
job.setJobName("wordcount");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(WordCountMap.class);
job.setReducerClass(WordCountReduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
查看全部 -
void main()
{
int x;
x = 0;
function(1,2,3);
x =x+ 1;
printf("%d\n",x);
}
void function(int a, int b, int c)
{
char buffer1[15];
char buffer2[10];
int *ret;
ret = buffer1 + 28;
(*ret) += 5;
}查看全部 -
void main()
{
int x;
x = 0;
function(1,2,3);
x = 1;
printf("%d\n",x);
}void function(int a, int b, int c)
{
char buffer1[5];
char buffer2[10];
int *ret;
ret = buffer1 + 12;
(*ret) += 8;
}查看全部 -
void function(int a, int b, int c)
{
char buffer1[5];
char buffer2[10];
int *ret; ret = buffer1 + 28;
// (*ret) += 8;
printf(“%237x%hnn\n”,0,
(int*)&ret);}
void main()
{
int x;
x = 0;
function(1,2,3);
x = 1;
printf("%d\n",x);
}查看全部 -
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class Sort {
public static class Map extends
Mapper<Object, Text, IntWritable, IntWritable> {
private static IntWritable data = new IntWritable();
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
data.set(Integer.parseInt(line));
context.write(data, new IntWritable(1));
}
}
public static class Reduce extends
Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
private static IntWritable linenum = new IntWritable(1);
public void reduce(IntWritable key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
for (IntWritable val : values) {
context.write(linenum, key);
linenum = new IntWritable(linenum.get() + 1);
}
}
}
public static class Partition extends Partitioner<IntWritable, IntWritable> {
@Override
public int getPartition(IntWritable key, IntWritable value,
int numPartitions) {
int MaxNumber = 65223;
int bound = MaxNumber / numPartitions + 1;
int keynumber = key.get();
for (int i = 0; i < numPartitions; i++) {
if (keynumber < bound * i && keynumber >= bound * (i - 1))
return i - 1;
}
return 0;
}
}
/**
* @param args
*/
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage WordCount <int> <out>");
System.exit(2);
}
Job job = new Job(conf, "Sort");
job.setJarByClass(Sort.class);
job.setMapperClass(Map.class);
job.setPartitionerClass(Partition.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
查看全部 -
3
查看全部 -
需要配置Hadoop的環境變量
查看全部 -
hdfs:
高成本
低成本
成熟生態圈
hive
查看全部 -
Hadoop
查看全部 -
大數據 是個好東西
查看全部
舉報