crawler.js
1
data를 받아서 파일에 저장한다.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
const request = require('request');
const cheerio = require('cheerio');
var fs = require('fs');
const news =
'https://www.baidu.com/s?wd=%E9%9F%A9%E5%B8%81%E5%AF%B9%E4%BA%BA%E6%B0%91%E5%B8%81%E6%B1%87%E7%8E%87&rsv_spt=1&rsv_iqid=0xdcac14d600269c84&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_dl=tb&rsv_enter=1&oq=nodejs%2520spider%2520baidu&rsv_btype=t&inputT=2284&rsv_t=43ff7iv1frVDf7S6pDhlILkdjpa9ixOuXiEu3oI5YBVf5yfNunJHNaM%2BbqDyz5736bp5&rsv_pq=fefa904e002cf515&rsv_sug3=28&rsv_sug1=5&rsv_sug7=100&sug=%25E9%259F%25A9%25E5%25B8%2581%25E5%25AF%25B9%25E4%25BA%25BA%25E6%25B0%2591%25E5%25B8%2581%25E6%25B1%2587%25E7%258E%2587&rsv_n=1&bs=nodejs%20spider%20baidu';
request({ uri: news }, function (err, response, body) {
console.log(body);
const $ = cheerio.load(body);
const menew = [];
let html = getFormatDate(new Date());
$('.c-border .c-row .op_exrate_result div').each((index, item) => {
console.log($(item).text());
// menew.push({ id: index, text: $(item).text() });
html += ',' + $(item).text();
});
// const jsonData = JSON.stringify(menew);
// console.log(jsonData);
fs.writeFile('data2.json', html, 'utf8', function (err) {
console.log('비동기적 파일 쓰기 완료');
});
});
function getFormatDate(date) {
var year = date.getFullYear();
var month = 1 + date.getMonth();
month = month >= 10 ? month : '0' + month;
var day = date.getDate();
day = day >= 10 ? day : '0' + day;
return year + '-' + month + '-' + day;
}
index.js
1
2
웹에서 테이터를 읽으려고 했으나 cors로 막혀 있다.
될줄로 알았는데 안된다.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
const express = require('express');
const fs = require('fs');
const app = express();
const port = 3000;
app.use('/', async (req, response) => {
response.writeHead(200, {'Content-Type': 'text/html;charset=UTF-8'});
fs.readFile(__dirname + '/data2.json', (err, data) => {
if (err) {
return console.error(err);
}
response.end(data, 'utf-8');
});
});
app.listen(port, () => {
console.log(`Example app listening at http://localhost:${port}`);
});
결론
1
cors로 막혀있는데는 프림워크 같은 것을 써서 해야 될거 같다.