zhangpan 2020-09-10
# -*- coding: utf-8 -*- import json import os from time import sleep from urllib import parse import schedule import time import requests import re class Pusher(object): def __init__(self): print('init_') def getSitemapIndex(self, sitemapIndexUrl): print('getSitemapIndex:' + sitemapIndexUrl) result = requests.get(sitemapIndexUrl) print(result) # print(result.content) sitemapUrls = re.findall('<loc>(.*?)</loc>', result.content.decode('utf-8'), re.S) print(sitemapUrls) return sitemapUrls def getSitemap(self, sitemapUrl): print("getSitemap:" + sitemapUrl) result = requests.get(sitemapUrl) self.urls = re.findall('<loc>(.*?)</loc>', result.content.decode('utf-8'), re.S) print(self.urls) def postBaidu(self): print("postBaidu:=================================") header_baidu = { "User-Agent": "curl/7.12.1", "Host": "data.zz.baidu.com", "Content-Type": "text/plain", "Content-Length": "83" } file = open('push_baidu.txt', 'a+') file.seek(0, 0) # 游标移动到第一行,继续读,否则读取到的是空 content = file.read() # self.f.close() print("content:" + content) for url in self.urls: if url in content: print("已经推送过:" + url) pass else: try: result = requests.post(push_baidu, data=url, headers=header_baidu) print(url + result.content.decode('utf-8')) if '"success":1' in result.content.decode('utf-8'): file.write(url + '\n') file.flush() if '{"remain":0' in result.content.decode('utf-8'): break # break except Exception as e: print(e) sleep(1) file.close() def postShenma(self): print("postShenma:=================================") header_baidu = { "User-Agent": "curl/7.12.1", "Host": "data.zhanzhang.sm.cn", "Content-Type": "text/plain" } file = open('push_shenma.txt', 'a+') file.seek(0, 0) # 游标移动到第一行,继续读,否则读取到的是空 content = file.read() # self.f.close() print("content:" + content) data = '' for url in self.urls: if url in content: print("已经推送过:" + url) pass else: data = data + url + '\n' try: result = requests.post(push_shenma, data=data, headers=header_baidu) print("url:" + url) print("status_code:" + str(result.status_code)) print("content:" + result.content.decode('utf-8')) if str(result.status_code) == "200": content = json.loads(result.content.decode('utf-8')) print("returnCode " + str(content['returnCode'])) if str(content['returnCode']) == "200": file.write(data + '\n') file.flush() except Exception as e: print(e) sleep(1) file.close() def postSougou(self): print("postSougou:=================================") header_baidu = { "User-Agent": "Opera/9.80 (Windows NT 6.2; Win64; x64) Presto/2.12.388 Version/12.15", "Host": "sogou.com", "Content-Type": "application/x-www-form-urlencoded" } file = open('push_sogou.txt', 'a+') file.seek(0, 0) # 游标移动到第一行,继续读,否则读取到的是空 content = file.read() # self.f.close() print("content:" + content) for url in self.urls: if url in content: print("已经推送过:" + url) pass else: try: result = requests.post(push_sogou, data={"source": "1", "site_type": "1", "Shoulu": {"webAdr": url, "email": "[email protected]", "reason": "网站收录不正常,恳请收录!"}}, headers=header_baidu) print(url + result.content.decode('utf-8')) except Exception as e: print(e) sleep(1) file.close() def get360Token(self, url, key): n = list(url) n.reverse() r = list(key) i = [] # for ($s=0, $o=16;$s < $o;$s++) for s in range(0, 16): if n[s] is None: i.append(r[s]) else: i.append(r[s] + n[s]) return ''.join(i) def run(self, sitemapUrl): self.getSitemap(sitemapUrl) self.postBaidu() urlSitemap = '' # Your sitemap url, like 'http://blog.kxrr.us/index.php/sitemap' urlPost = '' # Your Baidu API, like 'http://data.zz.baidu.com/urls?site=blog.kxrr.us&token=xxxxxxxxxxxx' push_baidu = 'http://data.zz.baidu.com/urls?site=meishih.com&token=' # 定义你要周期运行的函数 def job(): print("I'm working...") pusher = Pusher() sitemapUrls = pusher.getSitemapIndex("http://meishih.com/sitemap_index.xml") for sitemapUrl in sitemapUrls: pusher.run(sitemapUrl) schedule.every().day.at("17:32").do(job) # 每天在 10:30 时间点运行 job 函数 if __name__ == '__main__': # while True: # schedule.run_pending() # 运行所有可以运行的任务 # time.sleep(10) job() # pusher = Pusher() # pusher.postBingQuota() # pusher.getSitemapIndex('meishih.com/sitempa_index.xml') # print(pusher.get360Token("http://meishih.com/", "d182b3f28525f2db83acfaaf6e696db"))# pusher = Pusher() # pusher.postBingQuota() # pusher.getSitemapIndex('meishih.com/sitempa_index.xml') # print(pusher.get360Token("http://meishih.com/", "d182b3f28525f2db83acfaaf6e696db"))