怎样将爬虫爬取的今日头条信息以图表展示.docx
- 文档编号:8919288
- 上传时间:2023-02-02
- 格式:DOCX
- 页数:10
- 大小:16.63KB
怎样将爬虫爬取的今日头条信息以图表展示.docx
《怎样将爬虫爬取的今日头条信息以图表展示.docx》由会员分享,可在线阅读,更多相关《怎样将爬虫爬取的今日头条信息以图表展示.docx(10页珍藏版)》请在冰豆网上搜索。
怎样将爬虫爬取的今日头条信息以图表展示
如何使用Echarts将头条抓取结果以柱状图和饼形图展示?
后台框架:
Spring+SpringMVC+Mybatis
表结构:
CREATE TABLE `article` (
`id` int(20) NOT NULL AUTO_INCREMENT,
`title` varchar(255) DEFAULT NULL COMMENT '文章名',
`readcount` int(10) DEFAULT NULL COMMENT '阅读次数',
`showimgcount` int
(1) DEFAULT NULL COMMENT '图片展示数',
`type` int
(1) DEFAULT NULL COMMENT '类型:
1-文章 2-图片 3-视频',
`commentcount` int(255) DEFAULT NULL COMMENT '评论数量',
`publishtime` timestamp NULL DEFAULT NULL COMMENT '发布时间',
`gallerycount` int(10) DEFAULT NULL COMMENT '图片数量-只针对相集',
`section` varchar(255) DEFAULT NULL COMMENT '领域',
`tagword` varchar(255) DEFAULT NULL COMMENT '关键字',
`author` int(20) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `t_s_index` (`type`,`section`)
)
编写查询语句:
--查询每个领域的平均阅读量-->
select a.section,avg(a.readcount) as readcount_avg,count
(1) as section_count
from article a
where a.section is not null and a.section !
= ''
group by a.section
having count
(1) > 100
--查询每个文章类型的平均阅读量-->
select a.type,avg(a.readcount) as readcount_sum,count
(1) as type_count
from article a
group by a.type
--查询文章总量-->
select count
(1)
from article
此处省略dao层和service层的查询代码,只展示controller层的代码
@Autowired
private TouTiaoService touTiaoService;
@RequestMapping("/section")
public void section_avg(HttpServletResponse response){
DataSourceContextHolder.setDbType(DataSourceType.DS_TOUTIAO);
List
DataSourceContextHolder.setDbType(DataSourceType.DS_EASYWEB);
PrintUtils.printMsg(response, list);
}
@RequestMapping("/type")
public void type_avg(HttpServletResponse response){
DataSourceContextHolder.setDbType(DataSourceType.DS_TOUTIAO);
List
DataSourceContextHolder.setDbType(DataSourceType.DS_EASYWEB);
PrintUtils.printMsg(response, list);
}
编写页面
本页统计数据来源于某头条,闲暇时刻写了一个爬虫,爬取头条的文章,只统计阅读量在1万以上的文章,并且单个类型下的文章数量大于100篇。
共爬取,
--引入echarts的js文件-->
toutiao.js代码:
$(function(){
var section_avg = document.getElementById("section_avg");
var section_article = document.getElementById("section_article");
var type_count = document.getElementById("type_count");
var type_avg = document.getElementById("type_avg");
var section_avg_chart = echarts.init(section_avg);
var section_article_chart = echarts.init(section_article);
var type_count_chart = echarts.init(type_count);
var type_avg_chart = echarts.init(type_avg);
initSectionChart(section_avg_chart,section_article_chart);
initTypeChart(type_count_chart,type_avg_chart);
});
function initSectionChart(chart,chart1){
var option = {
legend:
{
data:
['各分类平均阅读量']
},
xAxis:
[
{
type:
'category',
data:
[],
axisLabel:
{
rotate:
-90,
interval:
0
}
}
],
yAxis:
[
{
type:
'value',
name:
'平均阅读量',
min:
0,
axisLabel:
{
formatter:
function(data){
return data/10000+"万";
}
}
}
],
series:
[
{
name:
'各分类平均阅读量',
type:
'bar',
data:
[]
}
]
};
var option1 = {
legend:
{
data:
['各分类文章数量']
},
xAxis:
[
{
type:
'category',
data:
[],
axisLabel:
{
rotate:
-90,
interval:
0
}
}
],
yAxis:
[
{
type:
'value',
name:
'文章数量',
min:
0
}
],
series:
[
{
name:
'各分类文章数量',
type:
'bar',
data:
[]
}
]
};
$.ajax({
url:
BASE_PATH+'/toutiao/section.do',
type:
'get',
dataType:
'json',
success:
function(data){
var sectionName = [];
var section_avg_value = [];
var section_count = [];
for(var i = 0; i < data.length;i++){
sectionName.push(data[i]['section']);
section_avg_value.push(data[i]['readcount_avg']);
section_count.push(data[i]['section_count']);
}
option.xAxis[0].data = sectionName;
option.series[0].data = section_avg_value;
chart.setOption(option);
option1.xAxis[0].data = sectionName;
option1.series[0].data = section_count;
chart1.setOption(option1);
}
});
}
function initTypeChart(chart,chart1){
var option = {
title :
{
text:
'不同文章类型阅读量比例',
x:
'center'
},
legend:
{
data:
[]
},
series :
[
{
name:
'不同文章类型阅读量比例',
type:
'pie',
data:
[],
label:
{
normal:
{
position:
'inner'
}
}
}
]
};
var option1 = {
title :
{
text:
'不同文章类型文章数量比例',
x:
'center'
},
legend:
{
data:
[]
},
series :
[
{
name:
'不同文章类型文章数量比例',
type:
'pie',
data:
[],
label:
{
normal:
{
position:
'inner'
}
}
}
]
};
$.ajax({
url:
BASE_PATH+'/toutiao/type.do',
type:
'get',
dataType:
'json',
success:
function(data){
var typeName = [];
var type_readcount = [];
var type_count = [];
var sum = 0;
var count = 0;
for(var i = 0; i < data.length;i++){
var type_name = "";
if(data[i]['type']==1){
type_name = "文章";
}
if(data[i]['type']==2){
type_name="图册";
}
if(data[i]['type']==3){
type_name="视频";
}
type_readcount.push({"name":
type_name+"\n平均阅读量:
"+data[i]['readcount_sum'].toFixed(0),"value":
data[i]['readcount_sum']});
type_count.push({"name":
type_name+"\n文章数量:
"+data[i]['type_count'].toFixed(0),"value":
data[i]['type_count']});
sum += data[i]['readcount_sum'];
count += data[i]['type_count'];
}
for(var i = 0;i < type_readcount.length; i++){
type_readcount[i]['value'] = type_readcount[i]['value']/sum;
type_count[i]['value'] = type_count[i]['value']/count;
}
option.series[0].data = type_readcount;
chart.setOption(option);
option1.series[0].data = type_count;
chart1.setOption(option1);
}
});
}
演示页面请XX微儿博客
- 配套讲稿:
如PPT文件的首页显示word图标,表示该PPT已包含配套word讲稿。双击word图标可打开word文档。
- 特殊限制:
部分文档作品中含有的国旗、国徽等图片,仅作为作品整体效果示例展示,禁止商用。设计者仅对作品中独创性部分享有著作权。
- 关 键 词:
- 怎样 爬虫 今日 信息 图表 展示