ruby
#!/usr/bin/env ruby
STDIN.each_line do |line|
val = line
year, temp, q = val[15,4], val[87,5], val[92,1]
puts "#{year}t#{temp}" if (temp != "+9999" && q =~ /[01459]/)
end
ruby
#!/usr/bin/env ruby
last_key, max_val = nil, -1000000
STDIN.each_line do |line|
key, val = line.split("t")
if last_key && last_key != key
puts "#{last_key}t#{max_val}"
last_key, max_val = key, val.to_i
else
last_key, max_val = key, [max_val, val.to_i].max
end
end
puts "#{last_key}t#{max_val}" if last_key
% hadoop jar $HADOOP_INSTALL/contrib/streaming/hadoop-*-streaming.jar -input input/ncdc/sample.txt -output output -mapper ch02/src/main/ruby/max_temperature_map.rb -reducer ch02/src/main/ruby/max_temperature_reduce.rb
#!/usr/bin/env python
import re
import sys
for line in sys.stdin:
val = line.strip()
(year, temp, q) = (val[15:19], val[87:92], val[92:93])
if (temp != "+9999" and re.match("[01459]", q)):
print "%st%s" % (year, temp)
#!/usr/bin/env python
import sys
(last_key, max_val) = (None, -sys.maxint)
for line in sys.stdin:
(key, val) = line.strip().split("t")
if last_key and last_key != key:
print "%st%s" % (last_key, max_val)
(last_key, max_val) = (key, int(val))
else:
(last_key, max_val) = (key, max(max_val, int(val)))
if last_key:
print "%st%s" % (last_key, max_val)
% hadoop jar $HADOOP_INSTALL/contrib/streaming/hadoop-*-streaming.jar -input input/ncdc/sample.txt -output output -mapper ch02/src/main/ruby/max_temperature_map.py -reducer ch02/src/main/ruby/max_temperature_reduce.py
#!/usr/bin/env bash # NLineInputFormat gives a single line: key is offset, value is S3 URI read offset s3file # Retrieve file from S3 to local disk echo "reporter:status:Retrieving $s3file" >&2 $HADOOP_INSTALL/bin/hadoop fs -get $s3file . # Un-bzip and un-tar the local file target=`basename $s3file .tar.bz2` mkdir -p $target echo "reporter:status:Un-tarring $s3file to $target" >&2 tar jxf `basename $s3file` -C $target # Un-gzip each station file and concat into one file echo "reporter:status:Un-gzipping $target" >&2 for file in $target/*/* do gunzip -c $file >> $target.all echo "reporter:status:Processed $file" >&2 done # Put gzipped version into HDFS echo "reporter:status:Gzipping $target and putting in HDFS" >&2 gzip -c $target.all | $HADOOP_INSTALL/bin/hadoop fs -put - gz/$target.gz
% hadoop jar $HADOOP_INSTALL/contrib/streaming/hadoop-*-streaming.jar -D mapred.reduce.tasks=0 -D mapred.map.tasks.speculative.execution=false -D mapred.task.timeout=12000000 -input ncdc_files.txt -inputformat org.apache.hadoop.mapred.lib.NLineInputFormat -output output -mapper load_ncdc_map.sh -file load_ncdc_map.sh
% hadoop jar $HADOOP_INSTALL/contrib/streaming/hadoop-*-streaming.jar -input input/ncdc/all -output output -mapper "ch02/src/main/ruby/max_temperature_map.rb | sort | ch02/src/main/ruby/max_temperature_reduce.rb" -reducer ch02/src/main/ruby/max_temperature_reduce.rb -file ch02/src/main/ruby/max_temperature_map.rb -file ch02/src/main/ruby/max_temperature_reduce.rb
机械节能产品生产企业官网模板...
大气智能家居家具装修装饰类企业通用网站模板...
礼品公司网站模板
宽屏简约大气婚纱摄影影楼模板...
蓝白WAP手机综合医院类整站源码(独立后台)...苏ICP备2024110244号-2 苏公网安备32050702011978号 增值电信业务经营许可证编号:苏B2-20251499 | Copyright 2018 - 2025 源码网商城 (www.ymwmall.com) 版权所有