wuqiongrj 2018-12-14
欢迎点击右上角关注小编,除了分享技术文章之外还有很多福利,私信学习资料可以领取包括不限于Python实战演练、PDF电子文档、面试集锦、学习资料等。

本次通过对BOSS直聘,拉勾网数据分析岗数据分析,了解数据分析岗位的行业情况,也以此来了解从事数据分析所需要的技能。
版本:Python3
系统:Windows
相关模块:pyspider
直接在命令行pip3 install pyspider即可。

获取BOSS直聘索引页信息,主要是岗位名称、薪资、地点、工作年限、学历要求,公司名称、类型、状态、规模。

获取拉勾网索引页信息,主要是岗位名称、地点、薪资、工作年限、学历要求,公司名称、类型、状态、规模,工作技能,工作福利。
网页为Ajax请求,采用PyCharm编写代码,轻车熟路。
import requests
import pymysql
import random
import time
import json
count = 0
# 设置请求网址及请求头参数
url = 'https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
'Cookie': '你的Cookie值',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Connection': 'keep-alive',
'Host': 'www.lagou.com',
'Origin': 'https://www.lagou.com',
'Referer': 'ttps://www.lagou.com/jobs/list_%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90?labelWords=sug&fromSearch=true&suginput=shuju'
}
# 连接数据库
db = pymysql.connect(host='127.0.0.1', user='root', password='774110919', port=3306, db='lagou_job', charset='utf8mb4')
def add_Mysql(id, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people, job_tips, job_welfare):
# 将数据写入数据库中
try:
cursor = db.cursor()
sql = 'insert into job(id, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people, job_tips, job_welfare) values ("%d", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")' % (id, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people, job_tips, job_welfare);
print(sql)
cursor.execute(sql)
print(cursor.lastrowid)
db.commit()
except Exception as e:
print(e)
db.rollback()
def get_message():
for i in range(1, 31):
print('第' + str(i) + '页')
time.sleep(random.randint(10, 20))
data = {
'first': 'false',
'pn': i,
'kd': '数据分析'
}
response = requests.post(url=url, data=data, headers=headers)
result = json.loads(response.text)
job_messages = result['content']['positionResult']['result']
for job in job_messages:
global count
count += 1
# 岗位名称
job_title = job['positionName']
print(job_title)
# 岗位薪水
job_salary = job['salary']
print(job_salary)
# 岗位地点
job_city = job['city']
print(job_city)
# 岗位经验
job_experience = job['workYear']
print(job_experience)
# 岗位学历
job_education = job['education']
print(job_education)
# 公司名称
company_name = job['companyShortName']
print(company_name)
# 公司类型
company_type = job['industryField']
print(company_type)
# 公司状态
company_status = job['financeStage']
print(company_status)
# 公司规模
company_people = job['companySize']
print(company_people)
# 工作技能
if len(job['positionLables']) > 0:
job_tips = ','.join(job['positionLables'])
else:
job_tips = 'None'
print(job_tips)
# 工作福利
job_welfare = job['positionAdvantage']
print(job_welfare + '
')
# 写入数据库
add_Mysql(count, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people, job_tips, job_welfare)
if __name__ == '__main__':
get_message()获取数据:

import requests
import pymysql
import random
import time
import json
count = 0
# 设置请求网址及请求头参数
url = 'https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
'Cookie': '你的Cookie值',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Connection': 'keep-alive',
'Host': 'www.lagou.com',
'Origin': 'https://www.lagou.com',
'Referer': 'ttps://www.lagou.com/jobs/list_%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90?labelWords=sug&fromSearch=true&suginput=shuju'
}
# 连接数据库
db = pymysql.connect(host='127.0.0.1', user='root', password='774110919', port=3306, db='lagou_job', charset='utf8mb4')
def add_Mysql(id, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people, job_tips, job_welfare):
# 将数据写入数据库中
try:
cursor = db.cursor()
sql = 'insert into job(id, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people, job_tips, job_welfare) values ("%d", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s")' % (id, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people, job_tips, job_welfare);
print(sql)
cursor.execute(sql)
print(cursor.lastrowid)
db.commit()
except Exception as e:
print(e)
db.rollback()
def get_message():
for i in range(1, 31):
print('第' + str(i) + '页')
time.sleep(random.randint(10, 20))
data = {
'first': 'false',
'pn': i,
'kd': '数据分析'
}
response = requests.post(url=url, data=data, headers=headers)
result = json.loads(response.text)
job_messages = result['content']['positionResult']['result']
for job in job_messages:
global count
count += 1
# 岗位名称
job_title = job['positionName']
print(job_title)
# 岗位薪水
job_salary = job['salary']
print(job_salary)
# 岗位地点
job_city = job['city']
print(job_city)
# 岗位经验
job_experience = job['workYear']
print(job_experience)
# 岗位学历
job_education = job['education']
print(job_education)
# 公司名称
company_name = job['companyShortName']
print(company_name)
# 公司类型
company_type = job['industryField']
print(company_type)
# 公司状态
company_status = job['financeStage']
print(company_status)
# 公司规模
company_people = job['companySize']
print(company_people)
# 工作技能
if len(job['positionLables']) > 0:
job_tips = ','.join(job['positionLables'])
else:
job_tips = 'None'
print(job_tips)
# 工作福利
job_welfare = job['positionAdvantage']
print(job_welfare + '
')
# 写入数据库
add_Mysql(count, job_title, job_salary, job_city, job_experience, job_education, company_name, company_type, company_status, company_people, job_tips, job_welfare)
if __name__ == '__main__':
get_message()获取数据:

01 城市分布图


岗位的分布情况,这里可以看出岗位大多都分布在东部地区,中部也有一些。
02 城市分布热力图


京津冀、长三角、珠三角密集度不相上下,成都重庆地区也有一小些需求。
可以说北上广深,这四个一线城市包揽了大部分的岗位需求。
03 工作经验薪水图


04 学历薪水图


总的来说「硕士」>「本科」>「大专」,当然大专、本科中也有高薪水的。
毕竟越往后能力就越重要,学历算是一个重要的加分项。
05 公司状态薪水图


06 公司规模薪水图


07 公司类型TOP10


08 工作技能图

09 工作福利图

最后贴两张BOSS直聘以及拉勾网薪水TOP20,以此来作为勉励。
01 BOSS直聘薪水TOP20

02 拉勾网薪水TOP20

毕竟我们不能仅仅当条咸鱼,我们要当就当一只有梦想的咸鱼!!!
