HMHYY 2020-06-28
# -*- coding: utf-8 -*- import pandas as pd from collections import defaultdict import json from pathlib import Path DATA = "6.28" dd = defaultdict(list) def save_file(l, filename="sss"): _temp = Path().cwd() file_path = _temp / f"{filename}_list.txt" with file_path.open("w", encoding="utf-8") as fs: fs.write("\n".join(l)) def excel_to_list(): sExcelFile = "./update_video_task.xlsx" df = pd.read_excel(sExcelFile, sheet_name=‘sheet1‘) df = df.fillna(method=‘ffill‘) df = df.loc[:, ~df.columns.str.contains(‘^Unnamed‘)] d = df.to_dict("records") return d def gen_new_list(l, data=DATA): for d in l: if str(d["data"]) == data: name = d["name"] source_token = int(d["token_id"]) mid = d["mid"] yield {"name": name, "source_token": int(source_token), "mid": mid} def create_task_list(): d = excel_to_list() weibo_list = [] bilibili_list = [] for item in gen_new_list(d): name = item["name"] mid = item["mid"] source_token = item["source_token"] dd[name].append((source_token, mid)) for k, v in dd.items(): mid_list = [_id[1] for _id in v] source_token = v[0][0] item = {"source_token": source_token, "mid_list": mid_list} url = f‘http://{{"data":[{json.dumps(item)}]}}‘ task = f"{k} {url}" if k.startswith("zzz"): zzz_list.append(task) elif k.startswith("sss"): sss_list.append(task) save_file(ssss_list, filename="sss") save_file(zzz_list, filename="zzz") if __name__ == ‘__main__‘: create_task_list()
计算的时候总共分3步,1到2是第二组......lower: i. 这组数据中的小值 higher: j. 这组数据中的大值,fraction 是第三步中的小数部分,意思是当前这组数据的0到1的分位数
Series是一种类似于一维数组的对象,由一组数据以及一组与之对应的索引组成。 index: 索引序列,必须是唯一的,且与数据的长度相同. 如果没有传入索引参数,则默认会自动创建一个从0~N的整数索引