1.1. Pandas分析步骤
- 载入数据
- 将 access_time 的日期进行 COUNT。类似如下SQL:
SELECT DATE_FORMAT(access_time, '%H'), count(*) FROM log GROUP BY DATE_FORMAT(access_time, '%H');
1.2. 代码
cat pd_ng_log_stat.py #!/usr/bin/env python #-*- coding: utf-8 -*- from ng_line_parser import NgLineParser import pandas as pd import socket import struct class PDNgLogStat(object): def __init__(self): self.ng_line_parser = NgLineParser() def _log_line_iter(self, pathes): """解析文件中的每一行并生成一个迭代器""" for path in pathes: with open(path, 'r') as f: for index, line in enumerate(f): self.ng_line_parser.parse(line) yield self.ng_line_parser.to_dict() def load_data(self, path): """通过给的文件路径加载数据生成 DataFrame""" self.df = pd.DataFrame(self._log_line_iter(path)) def pv_hour(self): """计算在一天当中每个时段的访问情况""" group_by_cols = ['access_time'] # 需要分组的列,只计算和显示该列 # 下面我们是按 hh(小时) 形式来分组的, 所以需要定义分组策略: # 分组策略为: self.df['access_time'].map(lambda x: x.split().pop().split(':')[0]) pv_hour_grp = self.df[group_by_cols].groupby( self.df['access_time'].map(lambda x: x.split().pop().split(':')[0])) return pv_hour_grp.agg(['count']) def main(): file_pathes = ['www.ttmark.com.access.log'] pd_ng_log_stat = PDNgLogStat() pd_ng_log_stat.load_data(file_pathes) # 统计每小时 pv print pd_ng_log_stat.pv_hour() if __name__ == '__main__': main()
运行统计和输出结果
python pd_ng_log_stat.py access_time count access_time 00 31539 01 34824 02 27895 03 29669 04 27742 05 26797 06 29384 07 31102 08 38257 09 43060 10 48064 11 57923 12 56413 13 57971 14 47260 15 46364 16 45721 17 48884 18 49318 19 49162 20 43641 21 42525 22 40371 23 34953
昵称: HH文章源自运维生存时间-https://www.ttlsa.com/python/python-big-data-analysis-point-time-pv-pandas/
QQ: 275258836文章源自运维生存时间-https://www.ttlsa.com/python/python-big-data-analysis-point-time-pv-pandas/
ttlsa群交流沟通(QQ群②: 6690706 QQ群③: 168085569 QQ群④: 415230207(新) 微信公众号: ttlsacom)文章源自运维生存时间-https://www.ttlsa.com/python/python-big-data-analysis-point-time-pv-pandas/
感觉本文内容不错,读后有收获?文章源自运维生存时间-https://www.ttlsa.com/python/python-big-data-analysis-point-time-pv-pandas/
逛逛衣服店,鼓励作者写出更好文章。文章源自运维生存时间-https://www.ttlsa.com/python/python-big-data-analysis-point-time-pv-pandas/ 文章源自运维生存时间-https://www.ttlsa.com/python/python-big-data-analysis-point-time-pv-pandas/

我的微信
微信公众号
扫一扫关注运维生存时间公众号,获取最新技术文章~
评论