【数据分析&数据挖掘】pandas时间数据

jzlixiao 2019-12-29

import pandas as pd
"""
pandas默认支持的时间点类型——Timestamp
pandas默认支持的时间序列类型——DatetimeIndex
numpy默认支持的时间点数据类型——datetime64
"""

# 可以使用pd.to_datetime 将时间点转化为pandas默认支持的时间点类型
res = pd.to_datetime("2019-11-11")
print("res: \n", res)
print("res的类型: \n", type(res))

# 可以使用pd.to_datetime 将时间序列转化为pandas支持的时间序列类型
res = pd.to_datetime(["2019-11-11", "2019-12-12", "2020-02-14", "2020-03-07"])
print("res: \n", res)
print("res的类型: \n", type(res))

# 可以使用pd.DatetimeIndex 将时间序列转化为pandas支持的时间序列类型, 不能转化时间点
res = pd.DatetimeIndex(["2019-11-11", "2019-12-12", "2020-02-14", "2020-03-07"])
print("res: \n", res)
print("res的类型: \n", type(res))

# 加载detail
detail = pd.read_excel("../day05/meal_order_detail.xlsx")
print("detail: \n", detail)
print("detail的列名称: \n", detail.columns)
print(detail.dtypes)

# 将 place_order_time 转化为pandas默认支持的时间序列类型
detail.loc[:, "place_order_time"] = pd.to_datetime(detail.loc[:, "place_order_time"])
print(detail.dtypes)
# 可以提取出时间序列中的属性

# 年属性
year = [i.year for i in detail.loc[:, "place_order_time"]]
print("year: \n", year)

# 月属性
month = [i.month for i in detail.loc[:, "place_order_time"]]
print("month: \n", month)

# 日属性
day = [i.day for i in detail.loc[:, "place_order_time"]]
print("day: \n", day)

# 周属性——一年的第N周
week = [i.week for i in detail.loc[:, "place_order_time"]]
print("week: \n", week)

week_of_year = [i.weekofyear for i in detail.loc[:, "place_order_time"]]
print("week_of_year: \n", week_of_year)

day_of_year = [i.dayofyear for i in detail.loc[:, "place_order_time"]]
print("day_of_year: \n", day_of_year)

# 获取一周中的第N天
day_of_week = [i.dayofweek for i in detail.loc[:, "place_order_time"]]
print("day_of_week: \n", day_of_week)

# 获取周几
weekday = [i.weekday for i in detail.loc[:, "place_order_time"]]
print("weekday: \n", weekday)

weekday_name = [i.weekday_name for i in detail.loc[:, "place_order_time"]]
print("weekday_name: \n", weekday_name)

# 获取第几季度
quarter = [i.quarter for i in detail.loc[:, "place_order_time"]]
print("quarter: \n", quarter)

# 时间数据的运算
res = pd.to_datetime("2019-11-11") + pd.Timedelta(days=2)
res = pd.to_datetime("2019-11-11") + pd.Timedelta(weeks=1)
res = pd.to_datetime("2019-11-11") + pd.Timedelta(weeks=-1)

# 时间差——返回days
res = pd.to_datetime("2019-11-11") - pd.to_datetime("2002-1-8")
print("res: \n", res)
res = res.days
print("res: \n", res)
res = res/365
print("年龄: \n", res)

# 还可以获取本机的最初始时间、最大时间
print("本机的最小时间: \n", pd.Timestamp.min)
print("本机的最大时间: \n", pd.Timestamp.max)

# 生成时间数据的API
# start——开始日期
# end——结束日期
# periods——如果end不传, 生成时间数据的数量
# freq——默认按天
res = pd.date_range(start="2019-11-11", periods=5)
res = pd.date_range(start="2019-11-11", end="2019-11-16")  # end和period不能同时传
# 生成频次为36天
res = pd.date_range(start="2019-11-11", end="2020-11-16", freq="36D")
print(res)

相关推荐