1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
| import requests import time, datetime import json
start_date = '20230520' date = datetime.datetime.now()
end_date = str(date.year) + (str(date.month) if date.month > 9 else ('0' + str(date.month))) + (str(date.day) if date.day > 9 else ('0' + str(date.day)))
access_token = '121.xxx' site_id = '191xxx'
dataUrl = 'https://openapi.baidu.com/rest/2.0/tongji/report/getData?access_token=' + access_token + '&site_id=' + site_id
metrics = 'pv_count' config = json.load(open('./token.json'))
def downFile(url, fileName, prefix='./'): print('downloading :', url) res = requests.get(url) res = json.loads(res.content)
if('error_code' in res.keys()): update(prefix=prefix) downFile(url, fileName, prefix) with open(prefix+fileName, 'w') as f: json.dump(res, f) print('writing :', prefix+fileName) def update(prefix, url='http://openapi.baidu.com/oauth/2.0/token?grant_type=refresh_token&refresh_token=' + config['refresh'] + '&client_id=' + config['api_key'] + '&client_secret=' + config['serect_key']): res = requests.get(url) res = json.loads(res.content) print(res)
config['access'] = res['access_token'] config['refresh'] = res['refresh_token'] with open(prefix + 'token.json', 'w') as f: json.dump(config, f)
downFile(dataUrl + '&start_date=' + start_date + '&end_date=' + end_date + '&metrics=' + metrics + '&method=visit/district/a', 'map.json')
downFile(dataUrl + '&start_date=' + start_date + '&end_date=' + end_date + '&metrics=' + metrics + '&method=trend/time/a&gran=month', 'trends.json')
downFile(dataUrl + '&start_date=' + start_date + '&end_date=' + end_date + '&metrics=' + metrics + '&method=source/all/a', 'sources.json')
downFile(dataUrl + '&start_date=' + start_date + '&end_date=' + end_date + '&metrics=' + metrics + '&method=source/engine/a', 'engine.json')
downFile(dataUrl + '&start_date=' + start_date + '&end_date=' + end_date + '&metrics=' + metrics + '&method=source/link/a', 'link.json')
''' 访问日历需要获取一年内的数据,按照一年365天计算,大概为52周多一点,所以前面有完整的52排,获取方式只要通过开始日期年份-1即可 然后就是第53排的处理,python中的date.weekday()获取的星期几是0对应周一,所以通过(date.weekday()+1)%7即可转换到0对应周日 于是在52周的基础上,减去星期数,就可以得到新的start_date ''' date = datetime.datetime(date.year-1, date.month, date.day) date = datetime.datetime.fromtimestamp(date.timestamp()-3600*24*((date.weekday()+1)%7)) start_date = str(date.year) + (str(date.month) if date.month > 9 else ('0' + str(date.month))) + (str(date.day) if date.day > 9 else ('0' + str(date.day))) downFile(dataUrl + '&method=overview/getTimeTrendRpt' + '&metrics=' + metrics + '&start_date=' + start_date + '&end_date=' + end_date, 'calendar.json')
|