diff --git a/xie_wl/app.js b/xie_wl/app.js index 839103612257cd899717b13f556a1d08b30eb3b3..c0a4e0aec368b073fe316a9eb48c616979efd34a 100644 --- a/xie_wl/app.js +++ b/xie_wl/app.js @@ -1,153 +1,12 @@ 'use strict' -// const cralwer = require('./data'); -// const https = require('https'); -// var fs = require('fs'); - -const Crawler = require('crawler'); -const book = require('./model/booK'); -const force = require('./initDb/intbook') -const page = require('./data'); - -var arr = [0,25, 50, 75, 100, 125, 150, 175, 200, 225]; -force(); -for (var i of arr) { - var url = 'https://book.douban.com/top250?start=' + i; - page(url); -} - - -setTimeout(() => { - book.findAll().then((user)=>{ - console.log('All books:' ,JSON.stringify(user,null,4)) - }) -}, 5000); - - - - - - - - -// var url = 'https://book.douban.com/top250?start=0'; -// var startPage = 0; -// var endPage = 225; -// var arr = [0, 25, 50, 75, 100, 125, 150, 175, 200, 225]; -// var array=[]; -// var int = 1; -// var xls = 0; - -// const crawler = new Crawler({ -// maxConnections: 100, -// // This will be called for each crawled page -// callback: function (error, res, done) { -// if (error) { -// console.log(error); -// } else { -// var $ = res.$; -// // $ is Cheerio by default -// //a lean implementation of core jQuery designed specifically for the server -// //var list=$('#content .article .paginator ').toArray(); -// //console.log(list); -// var list = $('#content .article table').toArray(); -// // console.log('Grabbed', $('title').text(), 'bytes'); -// // console.log(list.length); -// if (startPage <= endPage) { -// list.forEach(element => { -// var txt = $(element); -// var title = txt.find('.pl2 a').text().trim().replace(/[ \n\r]/g, ''); -// var author = txt.find('p.pl').text().trim().replace(/([/][^/]+){3,4}$/, ''); -// var price = txt.find('p.pl').text().trim().match(/([^/]+)$/, '')[1].replace(/([^\d]+)$/, ''); -// var quote = txt.find('.quote .inq').text().trim().replace(/[ \n\r]/g, ''); -// console.log(title); -// // book.sync({ force: true }).then(() => { -// book.create({ -// title: title, -// //author:author, -// // price:price, -// // quote:quote -// }).then((row) => { -// console.log(`添加的记录id:${row.id}`); -// }) -// // }) -// // Book.sync({ force: true }).then(() => { -// // Book.create({ -// // title: title, -// // // author: author, -// // // price: price, -// // // quote: quote -// // }).then((row) => { -// // console.log(`添加的记录Id:${row.id}`) -// // }) -// // }) - -// }); -// url = 'https://book.douban.com/top250?start=' + arr[int++]; -// startPage += list.length -// xls += list.length -// crawler.queue(url); -// } -// } -// done(); -// } -// }); - -// crawler.queue(url) - - -// console.log(http.url); -//cralwer.queue(carurl) -//console.log(result); - -/** - * 抓取数据请求函数 - * @param {抓取地址} url - */ -/* - -const startPage = 1; // 开始页 -const endPage = 2; // 结束页 - -let page = startPage; // 当前抓取页 -let total = 0; // 数据总数 - -// 初始化url -var url='https://book.douban.com/top250?icn=index-book250-all' - -// 收集最终数据 -let result = [{ - name: '链家', - data: [ - ['链家网-上海市-闵行区-浦江镇-整租-一室'], - ['标题', '价格', '地点', '大小', '朝向', '格局', '层数', '来源', '发布时间', '标签'] - ] - }]; - - -getData(url) -function getData(url) { - https.get(url, res => { - let data = ''; - res.on('data',function(chunk){ - data += chunk; - // console.log(data); - }); - console.log(res) - // res.on('end',function(){ - // let formatData = filter(data); // 筛选出需要的数据 - // result[0].data = result[0].data.concat(formatData); // 拼接此次抓取到的数据 - // page++; - // if (page <= endPage) { // 继续抓取下一页 - // // 通过分析 url 规律,拼出下一页的 url - // let tempUrl = 'https://sh.lianjia.com/zufang/pujiang1/pg' + page + 'rt200600000001l0/'; - // getData(tempUrl); // 递归继续抓取 - // } else { // 结束抓取 - // result[0].data.push(['总数', total]); // 在最后添加一个总数 - // //writeData(result, 'LJ.xlsx'); // 写入文件 - // } - // }) - }); - } - - */ \ No newline at end of file +const domdate = require('./crawler/domdata'); +//const article=require('./model/article'); +const fs=require('fs'); +const url = 'https://www.cnblogs.com/sitehome/p/'; + +//article.sync({force:true}); +for (var i = 1; i <= 10; i++) { + var surl = url + i; + domdate(surl); +} \ No newline at end of file diff --git a/xie_wl/config.js b/xie_wl/config.js index b39ca7661e574a1707b2a32d275e47d6f1cf8196..264cb3dd869ce8ed0cdfc59878fb1f22418705b1 100644 --- a/xie_wl/config.js +++ b/xie_wl/config.js @@ -1,10 +1,11 @@ -'use strict' -const config={ - database:'CommonPermission', - userName:'sa', - password:'123456', - host:'localhost', - dialect:'mssql' +// 配置数据库信息 + +var config = { + database:'CommonPermission', //库名 + userName:'sa', //用户名 + password:'123456', // 密码 + host:'localhost', // 本地数据库 + dialect:'mssql' // 语言 } module.exports=config; \ No newline at end of file diff --git a/xie_wl/db.js b/xie_wl/db.js index 75f85b60e7472d2c1304894b5e5863fb434ca9b0..ef854955447475e89f77a1b446b5ab3fbfa92890 100644 --- a/xie_wl/db.js +++ b/xie_wl/db.js @@ -1,20 +1,18 @@ 'use strict' +// 链接数据库,测试数据库链接是否成功 -const Sequelize = require('sequelize'); -const config = require('./config'); +var config=require('./config'); +var Sequelize=require('sequelize'); -const sequelize = new Sequelize(config.database, config.userName, config.password, { - host: config.host, - dialect: config.dialect +const sequelize=new Sequelize(config.database,config.userName,config.password,{ + host:config.host, + dialect:config.dialect +}); +//authenticate +sequelize.authenticate().then(()=>{ + console.log(`链接数据库${config.database}成功!`) +}).catch((err)=>{ + console.log(`链接失败:${err}`); }); -sequelize - .authenticate() - .then(() => { - console.log(`链接数据库${config.database}成功`) - }) - .catch(err => { - console.log(`错误信息:${err}`) - }) - -module.exports = sequelize; \ No newline at end of file +module.exports=sequelize; \ No newline at end of file