From dc5619f9f4121b036991469681b42de3d3d85443 Mon Sep 17 00:00:00 2001 From: chenss <2490758406@qq.com> Date: Wed, 1 Apr 2020 21:15:29 +0800 Subject: [PATCH] =?UTF-8?q?=E9=99=88=E5=A7=97=E5=A7=97=E7=9A=84=E7=AC=AC?= =?UTF-8?q?=E4=B8=80=E6=AC=A1=E6=8F=90=E4=BA=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- chenshanshan/.vscode/launch.json | 17 +++++++++++ chenshanshan/app.js | 52 ++++++++++++++++++++++++++++++++ chenshanshan/db.js | 17 +++++++++++ chenshanshan/initDb.js | 15 +++++++++ chenshanshan/models/articles.js | 24 +++++++++++++++ 5 files changed, 125 insertions(+) create mode 100644 chenshanshan/.vscode/launch.json create mode 100644 chenshanshan/app.js create mode 100644 chenshanshan/db.js create mode 100644 chenshanshan/initDb.js create mode 100644 chenshanshan/models/articles.js diff --git a/chenshanshan/.vscode/launch.json b/chenshanshan/.vscode/launch.json new file mode 100644 index 0000000..d1bb652 --- /dev/null +++ b/chenshanshan/.vscode/launch.json @@ -0,0 +1,17 @@ +{ + // 使用 IntelliSense 了解相关属性。 + // 悬停以查看现有属性的描述。 + // 欲了解更多信息,请访问: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "node", + "request": "launch", + "name": "启动程序", + "skipFiles": [ + "/**" + ], + "program": "${workspaceFolder}\\app.js" + } + ] +} \ No newline at end of file diff --git a/chenshanshan/app.js b/chenshanshan/app.js new file mode 100644 index 0000000..7c3475b --- /dev/null +++ b/chenshanshan/app.js @@ -0,0 +1,52 @@ +'use strict' + +var addTable = require('./utils/initDb')//需要连接的一个initDb的js + +addTable(); + +module.exports = () => { + article.sync({ + force: true + }) +} + +//爬虫 +var articles = require('./models/articles') +var Crewler = require('crawler'); + +var c = new Crewler({ + maxConnections: 10, + //这将为每个爬取的页面调用 + callback: function (error, res, done) { + if (error) { + console.log(error); + } else { + var $ = res.$; + + var list = $('#content .article table').toArray(); + + list.forEach(element => { + //add title + var title = $(element).find(' .pl2 a').text().trim().replace(/[ \r\n]/g, ''); + console.log(title); + + //add writer + var writer = $(element).find(' .pl').text().trim().replace(/[ \r\n]/g, ''); + console.log(writer); + + //add introduce + var introdtion = $(element).find(' span.inq').text().trim().replace(/[ \r\n]/g, ''); + console.log(introdtion); + + //add all + articles.create({ title: title, writer: writer, introdtion: introdtion }) + console.log('添加的ID:'+row.id); + }) + } + done(); + } +}); + +// 队列只有一个url,默认回调 +c.queue('https://book.douban.com/top250?icn=index-book250-all') + diff --git a/chenshanshan/db.js b/chenshanshan/db.js new file mode 100644 index 0000000..2f84003 --- /dev/null +++ b/chenshanshan/db.js @@ -0,0 +1,17 @@ +const Sequelize = require('sequelize'); + + +//数据库的信息 +const sequelize = new Sequelize('mssql','sa','123456',{ + host:'localhost',//本地 + dialect:'mssql' +}); + +sequelize.authenticate() +.then(()=>{ + console.log('链接成功了!') +}).catch(()=>{ + console.log('链接失败了!') +}); + +module.exports = sequelize \ No newline at end of file diff --git a/chenshanshan/initDb.js b/chenshanshan/initDb.js new file mode 100644 index 0000000..2482c65 --- /dev/null +++ b/chenshanshan/initDb.js @@ -0,0 +1,15 @@ +'use strict' + +//Crawler +var Reptile = require('reptile'); +var articles = require('./models/articles'); // 创建一个表 +var initDb=require('./utils/initDb'); //判断表是否存在 +var cra=require('./app'); +//var url='https://book.douban.com/top250?start='; +initDb(); + +for(let i=0;i<=250;){ + cra(url+i); + // console.log(i); + i=i+25 +} diff --git a/chenshanshan/models/articles.js b/chenshanshan/models/articles.js new file mode 100644 index 0000000..b8c8f3a --- /dev/null +++ b/chenshanshan/models/articles.js @@ -0,0 +1,24 @@ +'use strict' + +let Sequelize = require('sequelize'); +let sequelize = require('../db'); + +const User = sequelize.define('articles', { + title: { + type: Sequelize.STRING(800), + allowNull: false + }, + writer:{ + type:Sequelize.STRING + // allowNull:false + }, + introdtion:{ + type:Sequelize.STRING + //allowNull:false + } +},{ + +}) + +module.exports = User; + -- Gitee